In [1]:
## Import Needed Libraries

import pandas as pd
import numpy as np
import seaborn as sns
import math
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

In [2]:
file_path = "UNESCO_Tertiary_Graduates_by_field.csv"

df = pd.read_csv(file_path)
df.head(30)
print(df.shape)

df.columns

(520, 9)


Index(['NATMON_IND', 'Indicator', 'LOCATION', 'Country', 'TIME', 'Time',
       'Value', 'Flag Codes', 'Flags'],
      dtype='object')

In [3]:
columns_to_drop = ['NATMON_IND', 'Indicator', 'LOCATION', 'TIME', 'Flag Codes', 'Flags']

df.head(30)

df = df.drop(columns=columns_to_drop)

#df_17 = df[df['Time'] == 2017].copy()

# Round values to 2 decimal places
df.loc[:, 'Value'] = df['Value'].round(2)

df.head(30)

Unnamed: 0,Country,Time,Value
0,Cambodia,2019,23.2
1,Argentina,2018,16.05
2,Argentina,2019,15.42
3,Argentina,2020,14.15
4,Argentina,2021,15.04
5,Mozambique,2017,9.03
6,Mozambique,2018,9.56
7,Republic of Korea,2017,29.34
8,Republic of Korea,2019,29.61
9,Republic of Korea,2020,30.18


In [4]:
df_23 = df[df['Time'] == 2023]
print("2023:", df_23.shape)

df_22 = df[df['Time'] == 2022]
print("2022:", df_22.shape)

df_21 = df[df['Time'] == 2021]
print("2021:", df_21.shape)

df_20 = df[df['Time'] == 2020]
print("2020:", df_20.shape)

df_19 = df[df['Time'] == 2019]
print("2019:", df_19.shape)

df_18 = df[df['Time'] == 2018]
print("2018:", df_18.shape)

df_17 = df[df['Time'] == 2017]
print("2017:", df_17.shape)

2023: (4, 3)
2022: (48, 3)
2021: (86, 3)
2020: (101, 3)
2019: (101, 3)
2018: (93, 3)
2017: (87, 3)


## Percentage of Graduates from STEM Programmes in Tertiary Education by Country and Year

In [9]:


# Get unique years in the data
#years = df['Time'].unique()
years = [2017, 2018, 2019, 2020, 2021, 2022] # dropped 2023 because of not enough data

# Determine the number of rows and columns for the subplot grid
num_cols = 2  # Number of columns
num_rows = math.ceil(len(years) / num_cols)  # Ceiling division to get number of rows

# Create subplots
fig = make_subplots(rows=num_rows, cols=num_cols, 
                    subplot_titles=[str(year) for year in years],
                    specs=[[{"type": "choropleth"}] * num_cols] * num_rows)

# Create a map for each year
for i, year in enumerate(years):
    df_year = df[df['Time'] == year]
    choropleth = go.Choropleth(
        locations=df_year['Country'],
        locationmode="country names",
        z=df_year['Value'],
        colorscale="Plasma",
        colorbar_title="Value",
        showscale=False if i != len(years) - 1 else True  # Show color scale only for the last subplot
    )
    row = i // num_cols + 1
    col = i % num_cols + 1
    fig.add_trace(choropleth, row=row, col=col)

# Update layout
fig.update_layout(
    title_text="Percentage of Graduates from STEM Programmes in Tertiary Education by Country and Year",
    height= 600 * num_rows,  # height based on the number of rows
    width= 1000
)

fig.show()


In [None]:

# Create a choropleth map for 2017
fig = px.choropleth(df_17,
                    locations="Country",
                    locationmode="country names",
                    color="Value",
                    hover_name="Country",
                    color_continuous_scale=px.colors.sequential.Plasma,
                    title="Percentage Values by Country for 2017",
                    labels={'Value': 'Percentage Value'})

fig.show()

In [None]:
years