# The Great Divergence
## Get the data

In [5]:
!wget https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2020.xlsx -P ../data/

--2025-04-10 13:25:12--  https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2020.xlsx
Resolving www.rug.nl (www.rug.nl)... 129.125.2.51
Connecting to www.rug.nl (www.rug.nl)|129.125.2.51|:443... connected.
HTTP request sent, awaiting response... 200 
Length: 1764793 (1.7M) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘../data/mpd2020.xlsx’


2025-04-10 13:25:13 (1.21 MB/s) - ‘../data/mpd2020.xlsx’ saved [1764793/1764793]



## Time series
### Regional data
#### GDP per capita

In [1]:
import pandas as pd

df = pd.read_excel('../data/mpd2020.xlsx', sheet_name='Regional data', header=1)
display(df)

Unnamed: 0,Region,Western Europe,Eastern Europe,Western Offshoots,Latin America,Asia (East),Asia (South and South-East),Middle East,Sub-Sahara Africa,Western Europe.1,Western Offshoots.1,Eastern Europe.1,Latin America.1,Asia (South and South-East).1,Asia (East).1,Middle East.1,Sub-Sahara Africa.1,World,World GDP pc
0,Year,,,,,,,,,,,,,,,,,,
1,1820,2306.96669,818.0,2513.045418,952.816821,1088.584951,929.0,974.0,800.0,132371.0,11231.0,90785.0,20099.0,255695.0,427757.0,35600.0,60000.0,1033538.0,1101.565459
2,1830,2384.314573,942.0,,,,,,,,,,,,,,,,
3,1840,2579.534884,907.0,,,,,,,,,,,,,,,,
4,1850,2678.0,985.0,3474.410149,1080.657319,899.657658,929.0,1000.0,800.0,165348.0,26760.0,117661.0,29485.0,278706.0,455774.0,42000.0,65000.0,1180734.0,1225.081067
5,1860,3034.0,1358.0,4214.440703,1588.0,,,,,,,,,,,,,,
6,1870,3301.304919,1575.0,4647.453901,1318.541091,989.477165,850.346572,1165.0,800.0,186566.0,46088.0,141626.0,37692.0,327692.0,409153.0,49839.0,70000.0,1268656.0,1497.982325
7,1880,3585.154363,1886.0,6019.123928,,,,,,,,,,,,,,,
8,1890,4079.313847,2204.0,6480.955612,1672.812962,,950.999236,,,,,,,,,,,,
9,1900,4724.295785,2700.0,7740.850451,1750.626743,1086.328445,994.419007,1300.0,850.0,232433.0,86396.0,194693.0,61004.0,366060.0,457058.0,56000.0,86000.0,1539644.0,2212.043316


In [2]:
df_regional_gdp = df.copy()
df_regional_gdp = df_regional_gdp.iloc[:, :9]  # Keep only the gdp per capita data
df_regional_gdp = df_regional_gdp.rename(columns={df_regional_gdp.columns[0]: 'Year'})

display(df_regional_gdp)

Unnamed: 0,Year,Western Europe,Eastern Europe,Western Offshoots,Latin America,Asia (East),Asia (South and South-East),Middle East,Sub-Sahara Africa
0,Year,,,,,,,,
1,1820,2306.96669,818.0,2513.045418,952.816821,1088.584951,929.0,974.0,800.0
2,1830,2384.314573,942.0,,,,,,
3,1840,2579.534884,907.0,,,,,,
4,1850,2678.0,985.0,3474.410149,1080.657319,899.657658,929.0,1000.0,800.0
5,1860,3034.0,1358.0,4214.440703,1588.0,,,,
6,1870,3301.304919,1575.0,4647.453901,1318.541091,989.477165,850.346572,1165.0,800.0
7,1880,3585.154363,1886.0,6019.123928,,,,,
8,1890,4079.313847,2204.0,6480.955612,1672.812962,,950.999236,,
9,1900,4724.295785,2700.0,7740.850451,1750.626743,1086.328445,994.419007,1300.0,850.0


In [3]:
import altair as alt

# First, let's create a visualization that shows all regions without filtering
chart = alt.Chart(
    df_regional_gdp.reset_index()
    .melt(id_vars='Year', var_name='Region', value_name='GDP_per_capita')
    .query("Region != 'index'")
).mark_line(
    strokeWidth=3
).encode(
    x=alt.X('Year:Q', title='Year'),
    y=alt.Y('GDP_per_capita:Q', title='GDP per Capita (in 2011 US$)'),
    color=alt.Color(
        'Region:N',
        legend=alt.Legend(
            orient='right',
            title='Region',
            labelFontSize=12
        )
    ),
    tooltip=['Year', 'Region', 'GDP_per_capita']
).properties(
    width=800,
    height=400,
    title={
        'text': 'GDP per Capita by Region Over Time',
        'fontSize': 20,
        'anchor': 'middle'
    }
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_view(
    strokeWidth=0
).configure_legend(
    strokeColor='gray',
    padding=10,
    cornerRadius=5
)

chart


#### Population


In [5]:
first_col = df.iloc[:, 0]
region_col_names = df.columns[9:]

region_cols = df[region_col_names]

df_regional_population = pd.concat([first_col, region_cols], axis=1)
df_regional_population.drop(columns=["World", "World GDP pc"], inplace=True)
df_regional_population = df_regional_population.rename(columns={df_regional_population.columns[0]: 'Year'})
display(df_regional_population)

Unnamed: 0,Year,Western Europe.1,Western Offshoots.1,Eastern Europe.1,Latin America.1,Asia (South and South-East).1,Asia (East).1,Middle East.1,Sub-Sahara Africa.1
0,Year,,,,,,,,
1,1820,132371.0,11231.0,90785.0,20099.0,255695.0,427757.0,35600.0,60000.0
2,1830,,,,,,,,
3,1840,,,,,,,,
4,1850,165348.0,26760.0,117661.0,29485.0,278706.0,455774.0,42000.0,65000.0
5,1860,,,,,,,,
6,1870,186566.0,46088.0,141626.0,37692.0,327692.0,409153.0,49839.0,70000.0
7,1880,,,,,,,,
8,1890,,,,,,,,
9,1900,232433.0,86396.0,194693.0,61004.0,366060.0,457058.0,56000.0,86000.0


In [6]:
# Fill NaN values using forward fill strategy
df_regional_population = df_regional_population.ffill()
# Create the population visualization with filled data and linear y-axis
population_chart = alt.Chart(
    df_regional_population.reset_index()
    .melt(id_vars='Year', var_name='Region', value_name='Population')
    .query("Region != 'index'")
).mark_line(
    strokeWidth=3
).encode(
    x=alt.X('Year:Q',
            title='Year',
            scale=alt.Scale(domain=[1800, 2020])),
    y=alt.Y('Population:Q',
            title='Population (millions)'),  # Removed log scale
    color=alt.Color(
        'Region:N',
        legend=alt.Legend(
            orient='right',
            title='Region',
            labelFontSize=12
        )
    ),
    tooltip=['Year', 'Region', 'Population']
).properties(
    width=800,
    height=400,
    title={
        'text': 'Population by Region Over Time (1820-2018)',
        'fontSize': 20,
        'anchor': 'middle'
    }
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_view(
    strokeWidth=0
)

population_chart

# Countries

In [7]:
country_df = pd.read_excel('../data/mpd2020.xlsx', sheet_name='Full data', header=1)
display(country_df)

Unnamed: 0,AFG,Afghanistan,1820,Unnamed: 3,3280
0,AFG,Afghanistan,1870,,4207.00000
1,AFG,Afghanistan,1913,,5730.00000
2,AFG,Afghanistan,1950,1156.0000,8150.00000
3,AFG,Afghanistan,1951,1170.0000,8284.00000
4,AFG,Afghanistan,1952,1189.0000,8425.00000
...,...,...,...,...,...
21676,ZWE,Zimbabwe,2014,1594.0000,13313.99205
21677,ZWE,Zimbabwe,2015,1560.0000,13479.13812
21678,ZWE,Zimbabwe,2016,1534.0000,13664.79457
21679,ZWE,Zimbabwe,2017,1582.3662,13870.26413


In [8]:
# Rename the columns in the country dataframe
country_df.columns = ['Country code', 'Country', 'Year', 'GDP', 'Population']

# Display the dataframe with the new column names
display(country_df)

Unnamed: 0,Country code,Country,Year,GDP,Population
0,AFG,Afghanistan,1870,,4207.00000
1,AFG,Afghanistan,1913,,5730.00000
2,AFG,Afghanistan,1950,1156.0000,8150.00000
3,AFG,Afghanistan,1951,1170.0000,8284.00000
4,AFG,Afghanistan,1952,1189.0000,8425.00000
...,...,...,...,...,...
21676,ZWE,Zimbabwe,2014,1594.0000,13313.99205
21677,ZWE,Zimbabwe,2015,1560.0000,13479.13812
21678,ZWE,Zimbabwe,2016,1534.0000,13664.79457
21679,ZWE,Zimbabwe,2017,1582.3662,13870.26413


In [9]:
import altair as alt
import ipywidgets as widgets
from IPython.display import display

# Create a dropdown widget for country selection
countries = sorted(country_df['Country'].unique())
country_selector = widgets.Dropdown(
    options=countries,
    description='Country:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

def create_dual_axis_chart(country_name):
    # Filter data for the selected country
    country_data = country_df[country_df['Country'] == country_name].copy()

    # Fill NaN values using backward fill strategy
    country_data = country_data.bfill()

    # Create the GDP per capita chart (left y-axis)
    gdp_chart = alt.Chart(country_data).mark_line(
        color='#1f77b4',
        strokeWidth=3
    ).encode(
        x=alt.X('Year:Q', title='Year'),
        y=alt.Y('GDP:Q',
                title='GDP per capita (2011 US$)',
                axis=alt.Axis(titleColor='#1f77b4')),
        tooltip=['Year', 'GDP']
    )

    # Create the population chart (right y-axis)
    pop_chart = alt.Chart(country_data).mark_line(
        color='#ff7f0e',
        strokeWidth=3
    ).encode(
        x=alt.X('Year:Q'),
        y=alt.Y('Population:Q',
                axis=alt.Axis(titleColor='#ff7f0e'),
                title='Population'),
        tooltip=['Year', 'Population']
    )

    chart = alt.layer(gdp_chart, pop_chart).resolve_scale(
        y='independent'
    ).properties(
        width=800,
        height=400,
        title=f'GDP per Capita and Population for {country_name} Over Time'
    )

    return chart

def update_chart(change):
    with chart_output:
        chart_output.clear_output(wait=True)
        display(create_dual_axis_chart(change['new']))

# Connect the widget to the update function
country_selector.observe(update_chart, names='value')

# Create an output widget to display the chart
chart_output = widgets.Output()

# Display the widget and initial chart
display(country_selector, chart_output)

# Show initial chart with first country
update_chart({'new': countries[0]})

Dropdown(description='Country:', layout=Layout(width='300px'), options=('Afghanistan', 'Albania', 'Algeria', '…

Output()