In [3]:
import warnings
warnings.filterwarnings("ignore")

In [10]:
import pandas as pd
df = pd.read_csv("raw/sspgdp.csv")
df.head()

Unnamed: 0,Model,Scenario,Region,Variable,Unit,2010,2015,2020,2025,2030,...,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100
0,OECD ENV-Growth 2023,SSP1,Aruba,GDP|PPP,billion USD_2017/yr,,,3.11606,4.60237,4.88357,...,7.47681,8.17492,8.92051,9.6737,10.4262,11.1708,11.892,12.5779,13.2108,13.7626
1,OECD ENV-Growth 2023,SSP1,Angola,GDP|PPP,billion USD_2017/yr,,,201.395,227.037,272.628,...,869.5,1101.9,1383.07,1712.61,2090.41,2511.58,2971.55,3466.53,3989.2,4529.67
2,OECD ENV-Growth 2023,SSP1,Albania,GDP|PPP,billion USD_2017/yr,,,38.6186,48.8271,58.0645,...,103.257,108.274,113.047,117.746,121.665,124.77,127.062,128.632,129.482,129.816
3,OECD ENV-Growth 2023,SSP1,United Arab Emirates,GDP|PPP,billion USD_2017/yr,,,628.455,792.174,983.035,...,1529.24,1563.54,1587.76,1594.23,1579.56,1550.22,1508.37,1459.36,1409.41,1364.09
4,OECD ENV-Growth 2023,SSP1,Argentina,GDP|PPP,billion USD_2017/yr,,,750.58,861.741,976.732,...,1835.6,2056.5,2287.45,2522.03,2751.28,2969.72,3174.74,3363.23,3529.76,3667.27


In [8]:
df = df.drop(columns=['Model', 'Variable', 'Unit', "2010", "2015"])

In [4]:
df = df.groupby(['Scenario', 'Region'], as_index=False).sum()

In [5]:
scenario_dfs = {
    scenario: df[df['Scenario'] == scenario].reset_index(drop=True)
    for scenario in df['Scenario'].unique()
}

In [7]:
ssp1_df = scenario_dfs['SSP1']
ssp2_df = scenario_dfs['SSP2']
ssp3_df = scenario_dfs['SSP3']
ssp4_df = scenario_dfs['SSP4']
ssp5_df = scenario_dfs['SSP5']

In [9]:
def interpolate_annual(df):
    df_long = df.melt(id_vars=['Scenario', 'Region'], var_name='Year', value_name='GDP')
    df_long['Year'] = df_long['Year'].astype(int)

    full_years = pd.DataFrame({'Year': range(df_long['Year'].min(), df_long['Year'].max() + 1)})

    df_filled = (
        df_long.groupby(['Scenario', 'Region'], group_keys=False)
        .apply(lambda group: (
            pd.merge(full_years, group, on='Year', how='left')
            .assign(Scenario=group['Scenario'].iloc[0], Region=group['Region'].iloc[0])
            .interpolate(method='linear')
        ))
        .reset_index(drop=True)
    )

    df_interp = df_filled.pivot(index=['Scenario', 'Region'], columns='Year', values='GDP').reset_index()
    df_interp.columns.name = None
    df_interp.columns = df_interp.columns.astype(str)
    return df_interp

ssp1_interp = interpolate_annual(ssp1_df)
ssp2_interp = interpolate_annual(ssp2_df)
ssp3_interp = interpolate_annual(ssp3_df)
ssp4_interp = interpolate_annual(ssp4_df)
ssp5_interp = interpolate_annual(ssp5_df)

ssp1_interp.to_csv("data/ssp1_gdp.csv", index=False)
ssp2_interp.to_csv("data/ssp2_gdp.csv", index=False)
ssp3_interp.to_csv("data/ssp3_gdp.csv", index=False)
ssp4_interp.to_csv("data/ssp4_gdp.csv", index=False)
ssp5_interp.to_csv("data/ssp5_gdp.csv", index=False)

In [10]:
ssp1_interp.head()

Unnamed: 0,Scenario,Region,2020,2021,2022,2023,2024,2025,2026,2027,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
0,SSP1,Albania,38.6186,40.6603,42.702,44.7437,46.7854,48.8271,50.67458,52.52206,...,128.802,128.972,129.142,129.312,129.482,129.5488,129.6156,129.6824,129.7492,129.816
1,SSP1,Algeria,463.722,479.6352,495.5484,511.4616,527.3748,543.288,553.8082,564.3284,...,3474.34,3555.3,3636.26,3717.22,3798.18,3883.172,3968.164,4053.156,4138.148,4223.14
2,SSP1,Angola,201.395,206.5234,211.6518,216.7802,221.9086,227.037,236.1552,245.2734,...,3571.064,3675.598,3780.132,3884.666,3989.2,4097.294,4205.388,4313.482,4421.576,4529.67
3,SSP1,Antigua and Barbuda,1.74094,1.859352,1.977764,2.096176,2.214588,2.333,2.404894,2.476788,...,11.08038,11.19996,11.31954,11.43912,11.5587,11.66084,11.76298,11.86512,11.96726,12.0694
4,SSP1,Argentina,750.58,772.8122,795.0444,817.2766,839.5088,861.741,884.7392,907.7374,...,3396.536,3429.842,3463.148,3496.454,3529.76,3557.262,3584.764,3612.266,3639.768,3667.27


In [11]:
dfpop=pd.read_csv("raw/population.csv")

In [12]:
dfpop = dfpop.drop(columns=['Model', 'Variable', 'Unit'])

In [13]:
dfpop= dfpop.groupby(['Scenario', 'Region'], as_index=False).sum()

In [18]:
scenariopop_dfs = {
    scenario: dfpop[dfpop['Scenario'] == scenario].reset_index(drop=True)
    for scenario in df['Scenario'].unique()
}

In [19]:
ssp1_pop = scenariopop_dfs['SSP1']
ssp2_pop = scenariopop_dfs['SSP2']
ssp3_pop = scenariopop_dfs['SSP3']
ssp4_pop= scenariopop_dfs['SSP4']
ssp5_pop= scenariopop_dfs['SSP5']

In [20]:
ssp1p_interp = interpolate_annual(ssp1_pop)
ssp2p_interp = interpolate_annual(ssp2_pop)
ssp3p_interp = interpolate_annual(ssp3_pop)
ssp4p_interp = interpolate_annual(ssp4_pop)
ssp5p_interp = interpolate_annual(ssp5_pop)

ssp1p_interp.to_csv("data/ssp1_pop.csv", index=False)
ssp2p_interp.to_csv("data/ssp2_pop.csv", index=False)
ssp3p_interp.to_csv("data/ssp3_pop.csv", index=False)
ssp4p_interp.to_csv("data/ssp4_pop.csv", index=False)
ssp5p_interp.to_csv("data/ssp5_pop.csv", index=False)

In [21]:
ssp1p_interp.head()

Unnamed: 0,Scenario,Region,2020,2021,2022,2023,2024,2025,2026,2027,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
0,SSP1,Afghanistan,38.326031,39.419564,40.513097,41.606629,42.700162,43.793695,44.882668,45.971641,...,79.569255,79.32872,79.088185,78.84765,78.607115,78.282681,77.958248,77.633814,77.309381,76.984947
1,SSP1,Albania,2.87213,2.85619,2.840251,2.824311,2.808372,2.792432,2.776861,2.76129,...,1.491476,1.475693,1.459911,1.444128,1.428346,1.41302,1.397693,1.382367,1.36704,1.351714
2,SSP1,Algeria,43.090992,43.770728,44.450464,45.130199,45.809935,46.489671,47.028835,47.567998,...,54.407648,54.199993,53.992339,53.784684,53.577029,53.35002,53.12301,52.896001,52.668991,52.441982
3,SSP1,Angola,32.899002,33.8996,34.900197,35.900795,36.901392,37.90199,38.858763,39.815535,...,73.69172,73.505867,73.320014,73.134161,72.948308,72.662869,72.37743,72.09199,71.806551,71.521112
4,SSP1,Antigua and Barbuda,0.092375,0.092961,0.093547,0.094133,0.094719,0.095305,0.095772,0.09624,...,0.085351,0.084595,0.08384,0.083084,0.082328,0.081507,0.080687,0.079866,0.079046,0.078225


In [22]:
def compute_growth(df):
    growth_df = df.copy()
    meta_cols = ['Scenario', 'Region']
    year_cols = [col for col in df.columns if col not in meta_cols]
    growth_rates = df[year_cols].pct_change(axis=1)
    
    for col in meta_cols:
        growth_rates[col] = df[col]
    
    cols_order = meta_cols + year_cols
    return growth_rates[cols_order]

In [23]:
ssp1_growth = compute_growth(ssp1_interp)
ssp2_growth = compute_growth(ssp2_interp)
ssp3_growth = compute_growth(ssp3_interp)
ssp4_growth = compute_growth(ssp4_interp)
ssp5_growth = compute_growth(ssp5_interp)

In [25]:
ssp1_growth.to_csv("data/ssp1_growth.csv", index=False)
ssp2_growth.to_csv("data/ssp2_growth.csv", index=False)
ssp3_growth.to_csv("data/ssp3_growth.csv", index=False)
ssp4_growth.to_csv("data/ssp4_growth.csv", index=False)
ssp5_growth.to_csv("data/ssp5_growth.csv", index=False)