In [5]:
import pandas as pd
# The DataFrame 'df' is not displaying the headers correctly due to the initial row misalignment. 
# We need to adjust this first. Let's reload the data and skip the first row which seems to be incorrect.
df_corrected = pd.read_csv('raw_data/co2_pcap_cons.csv', header=None)

# Now we define the first row as the header
df_corrected.columns = df_corrected.iloc[0]
df_corrected = df_corrected.drop(df_corrected.index[0])

# The headers now should be countries, let's transpose the data to make countries as one of the columns
df_transposed = df_corrected.set_index(df_corrected.columns[0]).T
df_transposed.reset_index(inplace=True)
df_transposed.rename(columns={df_transposed.columns[0]: 'year'}, inplace=True)

# Melt the transposed dataframe to get "country, year, GDP per capita" format
df_melted = pd.melt(df_transposed, id_vars=['year'], var_name='country', value_name='co2_per_capita')

In [6]:
df_melted.dtypes

year              object
country           object
co2_per_capita    object
dtype: object

In [7]:
df_melted

Unnamed: 0,year,country,co2_per_capita
0,1800.0,Afghanistan,0.001
1,1801.0,Afghanistan,0.001
2,1802.0,Afghanistan,0.001
3,1803.0,Afghanistan,0.001
4,1804.0,Afghanistan,0.001
...,...,...,...
43257,2018.0,Zimbabwe,0.83
43258,2019.0,Zimbabwe,0.783
43259,2020.0,Zimbabwe,0.737
43260,2021.0,Zimbabwe,0.789


In [None]:
def reformatGDP(gdp):
    gdp_str = str(gdp).lower()
    if 'k' in gdp_str:
        new_str = gdp_str.replace('k', '')
        return float(new_str) * 1000
    return float(gdp)
    

In [None]:
print(reformatGDP(187))

187.0


In [None]:
df_melted['GDP per capita'] = df_melted['GDP per capita'].map(reformatGDP)

In [8]:
df_melted.to_csv('reformatted_data/reformatted_co2.csv', index=False)

In [None]:
df_melted.head()

Unnamed: 0,year,country,GDP per capita
0,1800,Afghanistan,599.0
1,1801,Afghanistan,599.0
2,1802,Afghanistan,599.0
3,1803,Afghanistan,599.0
4,1804,Afghanistan,599.0


In [2]:
import pandas as pd

In [31]:
df = pd.read_csv('reformatted_gender_equality.csv')

In [32]:
df.rename(columns={'life_expectancy': 'gender_ratio_of_mean_years_in_school'}, inplace=True)

In [None]:
def date_to_int(year):
    return int(year)

In [None]:
df['year'] = df_melted['year'].map(date_to_int)

In [19]:
df_2024 = df[df['year']==2024]

In [20]:
df_2024.head()

Unnamed: 0,year,country,life_expectancy
224,2024.0,Afghanistan,64.8
525,2024.0,Angola,66.1
826,2024.0,Albania,79.5
1127,2024.0,Andorra,83.0
1428,2024.0,UAE,74.8


In [33]:
df.to_csv('reformatted_gender_equality.csv', index=False)