# Visualizations Notebook

The following notebook has as a purpose to show visualizations regarding the Education status at South America.

### Set up of libraries

In [15]:
import pandas as pd
import altair as alt
import requests

In [16]:
from utils import filters_countries_cols, translate_english_degrees

In [17]:
# Loading the data
raw = pd.read_csv("../data/raw/matricula.csv", encoding="latin1", sep=None, engine="python")


In [18]:
raw

Unnamed: 0,AÑO,TOTAL MATRÍCULA,TOTAL MATRÍCULA MUJERES,TOTAL MATRÍCULA HOMBRES,TOTAL MATRÍCULA NO BINARIOS O INDEFINIDOS,TOTAL MATRÍCULA PRIMER AÑO,TOTAL MATRÍCULA MUJERES PRIMER AÑO,TOTAL MATRÍCULA HOMBRES PRIMER AÑO,TOTAL MATRÍCULA NO BINARIOS O INDEFINIDOS PRIMER AÑO,CLASIFICACIÓN INSTITUCIÓN NIVEL 1,...,TES PARTICULAR SUBVENCIONADO,TES PARTICULAR PAGADO,TES CORP. DE ADMINISTRACIÓN DELEGADA,TES SERVICIO LOCAL EDUCACION,TOTAL TES,% COBERTURA TES,TIPO ESTABLECIMIENTO HC,TIPO ESTABLECIMIENTO TP,CLAS_EST ADULTO,CLAS_EST JOVEN
0,MAT_2025,245,179.0,66.0,,94.0,67.0,27.0,,Institutos Profesionales,...,162.0,2.0,11.0,7.0,228,"93,10%",142.0,86.0,43.0,185.0
1,MAT_2025,86,18.0,68.0,,29.0,3.0,26.0,,Institutos Profesionales,...,58.0,2.0,,,74,"86,00%",44.0,30.0,12.0,62.0
2,MAT_2025,15,3.0,12.0,,,,,,Institutos Profesionales,...,7.0,,1.0,,12,"80,00%",8.0,4.0,5.0,7.0
3,MAT_2025,18,14.0,4.0,,6.0,5.0,1.0,,Universidades,...,10.0,2.0,,,15,"83,30%",14.0,1.0,,15.0
4,MAT_2025,114,19.0,95.0,,28.0,4.0,24.0,,Institutos Profesionales,...,51.0,1.0,1.0,1.0,103,"90,40%",49.0,54.0,23.0,80.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264253,MAT_2007,18,13.0,5.0,,18.0,13.0,5.0,,Universidades,...,5.0,2.0,,,10,"55,60%",9.0,1.0,,10.0
264254,MAT_2007,13,7.0,6.0,,13.0,7.0,6.0,,Universidades,...,4.0,1.0,,,7,"53,80%",5.0,2.0,,7.0
264255,MAT_2007,16,10.0,6.0,,,,,,Universidades,...,5.0,,,,8,"50,00%",6.0,2.0,1.0,7.0
264256,MAT_2007,123,61.0,62.0,,44.0,23.0,21.0,,Universidades,...,33.0,8.0,,,58,"47,20%",50.0,8.0,4.0,54.0


In [19]:
# year column to number
raw["AÑO"] = raw["AÑO"].astype(str).str[-4:]
raw["AÑO"] = raw["AÑO"].astype(int)

# columns in lowercase
cols = [col.lower() for col in raw.columns]
raw.columns = cols

#

In [20]:
raw["carrera clasificación nivel 2"].unique()

array(['Carreras Técnicas', 'Doctorado', 'Carreras Profesionales',
       'Postítulo', 'Magister'], dtype=object)

# **First Chart: Evolution total students of undergrads, and grad students 2007-2025**

This line chart shows the total number of enrolled students for undergrads, grads and diploma students
in Chile for the period 2007-2025

In [21]:
# Getting the data
cols_1 = ["año", "nivel global"]
df_1 = raw.groupby(cols_1)["total matrícula"].sum().reset_index() 
df_1["nivel global"] = df_1.apply(translate_english_degrees, axis=1)
df_1.rename(columns = {"nivel global" : "Degree Type"}, inplace=True) 

# Chart number of enrolled during type
chart_1 = alt.Chart(df_1).mark_area().encode(
    x=alt.Y("año:O", axis= alt.Axis(title="Year"), scale=alt.Scale(2007,2025)),
    y=alt.Y("total matrícula:Q", axis= alt.Axis(title="Total enrolled")),
    color = "Degree Type:N").properties(
    title = "Total enrolled students by Degree Type for 2007-2025"
)

chart_1

# **Second Chart: Out of undergrads, relation of professional and technical undergrads**

The following chart shows the relation out of undergrads of students studying
technical careers and professional careers


In [22]:
# Degrees to show
def translates_professional_undegrads(row):
    value = row["Degree type of undergrads"]
    if value == "Carreras Profesionales":
        return "Professionals"
    elif value == "Carreras Técnicas":
        return "Technical"
    elif value == "Doctorados":
        return "PhD"
    elif value == "Postítulo":
        return "Diplomas"
    else:
        return "Masters"

degrees = ["Carreras Profesionales", "Carreras Técnicas"]

# Data
cols_2 = ["año", "carrera clasificación nivel 2"]
df_2 = raw.groupby(cols_2)["total matrícula"].sum().reset_index()

# Cleaning the data
df_2.rename(columns={"carrera clasificación nivel 2": "Degree type of undergrads"}, inplace=True)
df_2 = df_2[df_2["Degree type of undergrads"].isin(degrees)]
df_2["Degree type of undergrads"] = df_2.apply(translates_professional_undegrads, axis=1)

chart_2 = alt.Chart(df_2).mark_bar().encode(
    x = alt.X("año:O", axis=alt.Axis(title="Year")),
    y = alt.Y("sum(total matrícula)", axis=alt.Axis(title="Total enrolled")).stack("normalize"),
    color = "Degree type of undergrads").properties(
        title="Share of Technical and Professioanl enrolled undergrads"
    )

chart_2


# **Third Chart: Evolution Total male and female students of overall enrolled**

The following chart shows the growth of male and females students during 2007-2025


In [23]:
cols_3 = ["año"]
df_3 = raw.groupby(cols_3)[["total matrícula hombres", "total matrícula mujeres"]].sum().reset_index()



df_3 = df_3.melt(id_vars="año",
                 value_vars=["total matrícula hombres", "total matrícula mujeres"],
                 var_name="enrolled type",
                 value_name="total matrícula")

chart_3 = alt.Chart(df_3).mark_line().encode(
    x = alt.X("año:O", axis=alt.Axis(title="Year")),
    y = alt.Y("sum(total matrícula)", axis=alt.Axis(title="Total enrolled")),
    color = "enrolled type")

chart_3


# **Fourth Chart: Differences of study areas between men and women**

The following chart shows the ranking of top enrolled number of students of
knowkledge areas, comparing men and women for the 2007-2025 period

In [24]:
cols_4 = ["año", "área del conocimiento"]
col_add = "total matrícula mujeres"
df_4 = raw.groupby(cols_4)[col_add].sum().reset_index()

df_4_final = pd.DataFrame({"año": [], "área del conocimiento": [],
                           "total matrícula mujeres": [], "rank": []})

for year in df_4["año"].unique():
    yr_df = df_4[df_4["año"] == year].copy()
    yr_df["rank"] = yr_df["total matrícula mujeres"].rank(ascending=False)
    df_4_final = pd.concat([df_4_final, yr_df])


chart_4 = alt.Chart(df_4_final).mark_line().encode(
    x = alt.X("año:O").title("Year"),
    y = alt.Y("rank").title("Rank"),
    color = "área del conocimiento")
chart_4

In [25]:
cols_44 = ["año", "área del conocimiento"]
col_add_44 = "total matrícula hombres"
df_44 = raw.groupby(cols_44)[col_add_44].sum().reset_index()

df_44_final = pd.DataFrame({"año": [], "área del conocimiento": [],
                           "total matrícula hombres": [], "rank": []})

for year in df_44["año"].unique():
    yr_df = df_44[df_44["año"] == year].copy()
    yr_df["rank"] = yr_df["total matrícula hombres"].rank(ascending=False)
    df_44_final = pd.concat([df_44_final, yr_df])


chart_44 = alt.Chart(df_44_final).mark_line().encode(
    x = alt.X("año:O"),
    y = "rank",
    color = "área del conocimiento")
chart_44

# **Fifth Chart: Where in Chile are they studying at 2025**

The following Chart shows the number of univerisity student in each region of
Chile ordered from north to South at 2025

In [26]:
cols_5 = ["año", "región"]
value_5 = "total matrícula"
df_5 = raw.groupby(cols_5)[value_5].sum().reset_index()
df_5 = df_5[df_5["año"] == 2025]
order_regions = ['Arica y Parinacota',
                 'Tarapacá',
                 'Antofagasta', 
                 'Atacama',
                 'Coquimbo', 
                 'Valparaíso',
                 'Metropolitana',
                 "Lib. Gral. B. O'Higgins",
                 'Maule',   
                 'Ñuble',
                 'Biobío',
                 'La Araucanía',
                 'Los Ríos',
                 'Los Lagos',
                 'Aysén',
                 'Magallanes']

chart_5 = alt.Chart(df_5).mark_bar().encode(
    x = alt.X("total matrícula").title("Total enrolled"),
    y = alt.Y("región", sort= order_regions).title("Regions of Chile from North to South")
).properties(
    title="Total enrolled university students by region for 2025"
)
chart_5

# **Sixth Chart: heatmap duración de la carrera y área del conocimiento **

The following Chart shows 

In [13]:
raw

Unnamed: 0,año,total matrícula,total matrícula mujeres,total matrícula hombres,total matrícula no binarios o indefinidos,total matrícula primer año,total matrícula mujeres primer año,total matrícula hombres primer año,total matrícula no binarios o indefinidos primer año,clasificación institución nivel 1,...,tes particular subvencionado,tes particular pagado,tes corp. de administración delegada,tes servicio local educacion,total tes,% cobertura tes,tipo establecimiento hc,tipo establecimiento tp,clas_est adulto,clas_est joven
0,2025,245,179.0,66.0,,94.0,67.0,27.0,,Institutos Profesionales,...,162.0,2.0,11.0,7.0,228,"93,10%",142.0,86.0,43.0,185.0
1,2025,86,18.0,68.0,,29.0,3.0,26.0,,Institutos Profesionales,...,58.0,2.0,,,74,"86,00%",44.0,30.0,12.0,62.0
2,2025,15,3.0,12.0,,,,,,Institutos Profesionales,...,7.0,,1.0,,12,"80,00%",8.0,4.0,5.0,7.0
3,2025,18,14.0,4.0,,6.0,5.0,1.0,,Universidades,...,10.0,2.0,,,15,"83,30%",14.0,1.0,,15.0
4,2025,114,19.0,95.0,,28.0,4.0,24.0,,Institutos Profesionales,...,51.0,1.0,1.0,1.0,103,"90,40%",49.0,54.0,23.0,80.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264253,2007,18,13.0,5.0,,18.0,13.0,5.0,,Universidades,...,5.0,2.0,,,10,"55,60%",9.0,1.0,,10.0
264254,2007,13,7.0,6.0,,13.0,7.0,6.0,,Universidades,...,4.0,1.0,,,7,"53,80%",5.0,2.0,,7.0
264255,2007,16,10.0,6.0,,,,,,Universidades,...,5.0,,,,8,"50,00%",6.0,2.0,1.0,7.0
264256,2007,123,61.0,62.0,,44.0,23.0,21.0,,Universidades,...,33.0,8.0,,,58,"47,20%",50.0,8.0,4.0,54.0
