In [1]:
import pandas as pd
import numpy as np

## Generate emissions data:

In [2]:
np.random.seed(42)  # for reproducibility
years = list(range(2017, 2025))

# Starting and ending values for each country
trends = {
    "France": (330, 280),
    "Switzerland": (50, 35),
    "Germany": (780, 650)
}

emissions_dict = {}

for country, (start, end) in trends.items():
    # Create a linear decreasing trend
    base = np.linspace(start, end, len(years))
    # Add small random noise
    noise = np.random.normal(0, 5, len(years))
    emissions = np.round(base + noise).astype(int)
    
    # Store results in dictionary
    emissions_dict[country] = emissions.tolist()

print("Generated emissions data:")
print(emissions_dict)

Generated emissions data:
{'France': [332, 322, 319, 316, 300, 293, 295, 284], 'Switzerland': [48, 51, 43, 41, 43, 30, 29, 32], 'Germany': [775, 763, 738, 717, 713, 686, 669, 643]}


## Build the dataframe:

In [3]:
rows = []
for country, values in emissions_dict.items():
    for year, value in zip(years, values):
        rows.append({"Country": country, "Year": year, "CO2_Emissions[tCO2]": value})

df = pd.DataFrame(rows)

print("\nFinal DataFrame:")
print(df)


Final DataFrame:
        Country  Year  CO2_Emissions[tCO2]
0        France  2017                  332
1        France  2018                  322
2        France  2019                  319
3        France  2020                  316
4        France  2021                  300
5        France  2022                  293
6        France  2023                  295
7        France  2024                  284
8   Switzerland  2017                   48
9   Switzerland  2018                   51
10  Switzerland  2019                   43
11  Switzerland  2020                   41
12  Switzerland  2021                   43
13  Switzerland  2022                   30
14  Switzerland  2023                   29
15  Switzerland  2024                   32
16      Germany  2017                  775
17      Germany  2018                  763
18      Germany  2019                  738
19      Germany  2020                  717
20      Germany  2021                  713
21      Germany  2022               

## Do the same for population

In [6]:
def generate_df(years, trends, variable_name, add_noise = True):
    
    np.random.seed(42)  # for reproducibility
    
    # generate values and put them in dictionary
    dict_temp = {}
    for country, (start, end) in trends.items():
        
        # Create a linear trend
        base = np.linspace(start, end, len(years))
        
        # Add small random noise
        if add_noise == True:
            noise = np.random.normal(0, 5, len(years))
            values = np.round(base + noise, 2)
        else:
            values = np.round(base, 2)
        
        # Store results in dictionary
        dict_temp[country] = values.tolist()
        
    # make the df
    rows = []
    for country, values in dict_temp.items():
        for year, value in zip(years, values):
            rows.append({"Country": country, "Year": year, variable_name: value})
    df = pd.DataFrame(rows)
    
    # return
    return df


years = list(range(2017, 2025))

# remake emissions
trends_emission = {"France": (330, 280), "Switzerland": (50, 35), "Germany": (780, 650)}
df_emissions = generate_df(years = years, trends = trends_emission, variable_name = "CO2_emissions[MtCO2]")

# make poulation
trends_population = {"France": (67.0, 68.0), "Switzerland": (8.4, 8.9), "Germany": (82.8, 83.5)}
df_population = generate_df(years = years, trends = trends_population, variable_name = "population[milpeople]", add_noise = False)

## Merge two dataframes

In [7]:
# merge two dfs together
df = pd.merge(df_emissions, df_population, "left", ["Country","Year"])

## Create variables / do operations

In [8]:
# add emissions per capita
df["CO2_emissions_per_capita[tCO2/person]"] = (df["CO2_emissions[MtCO2]"] * 1e6) / (df["population[milpeople]"] * 1e6)

# subset
df_sub = df.loc[df["Country"] == "France",:]
df_sub = df.loc[(df["Country"] == "France") & (df["Year"].isin([2017,2018])),:]
df_sub = df.loc[df["Country"] == "France","population[milpeople]"]

## Wide vs long format

In [9]:
# melt
df_melted = pd.melt(df, id_vars = ["Country","Year"])

# pivot
df_back = df_melted.pivot(index=["Country","Year"], columns='variable', values='value').reset_index()

## Group by

In [10]:
# make ts
df_agg = df_melted.groupby(["Year","variable"], as_index=False)['value'].agg("sum")

# sort
df_agg.sort_values(["variable","Year"], inplace=True)

# pivot back in case
df_agg_pivoted = df_agg.pivot(index=["Year"], columns='variable', values='value').reset_index()