In [2]:
# Load packages
import pandas as pd
import altair as alt
import numpy as np
from datetime import datetime, time

# Get the data from jsonfile local
region_count = pd.read_json("../map/data/regions.json")
region_count.head()

Unnamed: 0,name,value
0,AFG,1
1,ALB,2
2,DZA,4
3,ASM,5
4,AND,2


In [3]:
# create df, column one is Region (which is currently as value), column two is count (number of times that region appears)
region_count = region_count['value'].value_counts().reset_index()
region_count.columns = ['region', 'count']

# rename to region, 1 is South Asia, 2 is Europe & Central Asia, 3 is Latin America & Caribbean, 4 is Middle East & North Africa, 5 is East Asia & Pacific, 6 is Sub-Saharan Africa, 7 is North America
region_count['region'] = region_count['region'].replace([1, 2, 3, 4, 5, 6, 7], ['South Asia', 'Europe & Central Asia', 'Latin America & Caribbean', 'Middle East & North Africa', 'East Asia & Pacific', 'Sub-Saharan Africa', 'North America'])

region_count

Unnamed: 0,region,count
0,Europe & Central Asia,58
1,Sub-Saharan Africa,48
2,Latin America & Caribbean,42
3,East Asia & Pacific,37
4,Middle East & North Africa,21
5,South Asia,8
6,North America,3


In [4]:
overtime = pd.read_csv("../cleaned_data/filtered_linked_plot_data.csv")

overtime.head()

# drop Country Code
overtime = overtime.drop(columns=['Country Code'])

# select only rows with this Indicator name: 'Gross enrolment ratio, primary, female (%)', 'Gross enrolment ratio, secondary, female (%)', 'Gross enrolment ratio, tertiary, female (%)'
overtime = overtime[overtime['Indicator Name'].isin(['Gross enrolment ratio, primary, female (%)', 'Gross enrolment ratio, secondary, female (%)', 'Gross enrolment ratio, tertiary, female (%)'])]

overtime['Indicator Name'] = overtime['Indicator Name'].replace('Gross enrolment ratio, primary, female (%)', 'Gross_enrolment_ratio_primary_female_pct')
overtime['Indicator Name'] = overtime['Indicator Name'].replace('Gross enrolment ratio, secondary, female (%)', 'Gross_enrolment_ratio_secondary_female_pct')
overtime['Indicator Name'] = overtime['Indicator Name'].replace('Gross enrolment ratio, tertiary, female (%)', 'Gross_enrolment_ratio_tertiary_female_pct')


overtime.head()

Unnamed: 0,Country Name,Indicator Name,1970,1971,1972,1973,1974,1975,1976,1977,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
19,Arab World,Gross_enrolment_ratio_primary_female_pct,52.192139,52.178249,53.66478,55.03175,55.691349,57.210602,59.0625,60.388969,...,88.470802,89.40609,90.549637,91.305931,92.448387,93.156799,93.979622,93.215538,95.375809,94.077911
21,Arab World,Gross_enrolment_ratio_secondary_female_pct,17.85453,17.76573,18.527479,19.38983,20.314341,21.43166,23.073299,24.960899,...,62.899658,63.38303,63.553699,62.551121,63.070641,64.340599,65.474861,68.150383,69.083763,67.672501
23,Arab World,Gross_enrolment_ratio_tertiary_female_pct,4.22603,4.19567,4.3734,4.62362,4.92048,5.3174,5.8386,6.12443,...,19.93504,21.27,22.141029,22.31143,23.95093,24.60832,25.289089,24.82563,26.558371,27.82209
49,East Asia & Pacific,Gross_enrolment_ratio_primary_female_pct,,,,95.340248,96.94413,103.485222,106.407097,104.285347,...,105.999313,102.404228,103.18116,105.970963,107.841438,108.558922,108.766289,108.365738,108.165413,107.407242
51,East Asia & Pacific,Gross_enrolment_ratio_secondary_female_pct,30.257,34.790771,38.464439,41.481781,40.309841,41.083889,45.030788,46.829559,...,65.36779,66.847008,69.119431,73.019508,75.792267,79.192047,81.339211,84.264297,86.537781,88.965439


In [5]:
over_melted = overtime.melt(id_vars=['Country Name', 'Indicator Name'], 
                          var_name='Year', 
                          value_name='Value')

In [6]:
# select only the rows with the regions that are in ["East Asia & Pacific", "Europe & Central Asia",'Latin America & Caribbean' ,'Middle East & North Africa', 'North America', 'South Asia', 'Sub-Saharan Africa']
over_melted = over_melted[over_melted['Country Name'].isin(["East Asia & Pacific", "Europe & Central Asia",'Latin America & Caribbean' ,'Middle East & North Africa', 'North America', 'South Asia', 'Sub-Saharan Africa'])]

In [7]:
print(over_melted.columns)

Index(['Country Name', 'Indicator Name', 'Year', 'Value'], dtype='object')


In [8]:
over_melted['Indicator Name'].unique()

array(['Gross_enrolment_ratio_primary_female_pct',
       'Gross_enrolment_ratio_secondary_female_pct',
       'Gross_enrolment_ratio_tertiary_female_pct'], dtype=object)

In [9]:
import altair as alt
alt.data_transformers.enable("vegafusion")

# Define a custom color scheme
color_scheme = ['#0099FF', '#009643', '#CB4349', '#FF818C', '#FCC92B', '#FD5109', '#CE6DD3','#FA8F38', '#8538B1', '#4983F8', '#A9DDD6', '#A2F17D', '#0C0582', '#960505']

# Create bar charts for region counts
bar1 = alt.Chart(region_count).mark_bar().encode(
    x='region',
    y='count',
    color=alt.Color('region', scale=alt.Scale(range=color_scheme), title="Region")
).properties(
    title="Region Count",
    width=450,
    height=225
)
bar1.encoding.x.title = 'Region'
bar1.encoding.y.title = 'Count'

def create_scatter(melted_df, indicator, title):
    # Filter the DataFrame for the specified indicator.
    filtered_df = melted_df[melted_df['Indicator Name'] == indicator]
    
    # Create the scatter plot with the fixed axis range.
    return alt.Chart(filtered_df).mark_circle(size=45).encode(
        x=alt.X('Year:Q', title="Year", scale=alt.Scale(domain=[1968, 2015])),
        y=alt.Y('Value:Q', title=title),
        color=alt.Color('Country Name', scale=alt.Scale(range=color_scheme), title="Country Name")
    ).properties(
        title=title,
        width=450,
        height=167
    )

scatter1 = create_scatter(over_melted, 'Gross_enrolment_ratio_primary_female_pct', 'Gross_enrolment_ratio_primary_female_pct')
scatter2 = create_scatter(over_melted, 'Gross_enrolment_ratio_secondary_female_pct', 'Gross Enrolment Ratio, Secondary, Female (%) Over Time')
scatter3 = create_scatter(over_melted, 'Gross_enrolment_ratio_tertiary_female_pct', 'Gross Enrolment Ratio, Tertiary, Female (%) Over Time')

# Combine the charts
chart1 = alt.vconcat(bar1)
chart2 = alt.vconcat(scatter1, scatter2, scatter3)
final_chart = alt.hconcat(chart1, chart2, spacing=5).configure(background="#F1F0DA").configure_title(fontSize=15)

final_chart

In [11]:
import altair as alt
import pandas as pd

# Define a custom color scheme
color_scheme = ['#0099FF', '#009643', '#CB4349', '#FF818C', '#FCC92B', '#FD5109', '#CE6DD3','#FA8F38', '#8538B1', '#4983F8', '#A9DDD6', '#A2F17D', '#0C0582', '#960505']

# Sample dataframes
# region_count = pd.DataFrame({'region': ['A', 'B', 'C'], 'count': [10, 20, 30]})
# over_melted = pd.DataFrame({'Year': [2010, 2011, 2012], 'Value': [100, 200, 300], 'Country Name': ['USA', 'UK', 'Canada'], 'Indicator Name': ['Gross_enrolment_ratio_primary_female_pct']*3})

# Create bar charts for region counts
def create_bar_chart(data, title):
    return alt.Chart(data).mark_bar().encode(
        x='region',
        y='count',
        color=alt.Color('region', scale=alt.Scale(range=color_scheme), title="Region")
    ).properties(
        title=title,
        width=450,
        height=225
    )

# Define the interaction
selection = alt.selection_multi(fields=['region'], bind='legend')

bar1 = create_bar_chart(region_count, "Region Count").add_selection(selection)

def create_scatter(data, indicator, title):
    # Filter the DataFrame for the specified indicator.
    filtered_df = data[data['Indicator Name'] == indicator]
    
    # Create the scatter plot with the fixed axis range.
    return alt.Chart(filtered_df).mark_circle(size=45).encode(
        x=alt.X('Year:Q', title="Year", scale=alt.Scale(domain=[1968, 2015])),
        y=alt.Y('Value:Q', title=title),
        color=alt.Color('Country Name', scale=alt.Scale(range=color_scheme), title="Country Name")
    ).properties(
        title=title,
        width=450,
        height=167
    ).add_selection(selection)

scatter1 = create_scatter(over_melted, 'Gross_enrolment_ratio_primary_female_pct', 'Gross_enrolment_ratio_primary_female_pct')
scatter2 = create_scatter(over_melted, 'Gross_enrolment_ratio_secondary_female_pct', 'Gross Enrolment Ratio, Secondary, Female (%) Over Time')
scatter3 = create_scatter(over_melted, 'Gross_enrolment_ratio_tertiary_female_pct', 'Gross Enrolment Ratio, Tertiary, Female (%) Over Time')

# Combine the charts
chart1 = alt.vconcat(bar1)
chart2 = alt.vconcat(scatter1, scatter2, scatter3)
final_chart = alt.hconcat(chart1, chart2, spacing=5).configure(background="#F1F0DA").configure_title(fontSize=15)

final_chart


