In [53]:
import pandas as pd
# The DataFrame 'df' is not displaying the headers correctly due to the initial row misalignment. 
# We need to adjust this first. Let's reload the data and skip the first row which seems to be incorrect.
df_corrected = pd.read_csv('raw_data/lex.csv', header=None)

# Now we define the first row as the header
df_corrected.columns = df_corrected.iloc[0]
df_corrected = df_corrected.drop(df_corrected.index[0])

# The headers now should be countries, let's transpose the data to make countries as one of the columns
df_transposed = df_corrected.set_index(df_corrected.columns[0]).T
df_transposed.reset_index(inplace=True)
df_transposed.rename(columns={df_transposed.columns[0]: 'year'}, inplace=True)

# Melt the transposed dataframe to get "country, year, GDP per capita" format
df_melted = pd.melt(df_transposed, id_vars=['year'], var_name='country', value_name='life_expectancy')

In [54]:
df_melted.dtypes

year               float64
country             object
life_expectancy    float64
dtype: object

In [55]:
df_melted

Unnamed: 0,year,country,life_expectancy
0,1800.0,Afghanistan,28.2
1,1801.0,Afghanistan,28.2
2,1802.0,Afghanistan,28.2
3,1803.0,Afghanistan,28.2
4,1804.0,Afghanistan,28.2
...,...,...,...
58991,2096.0,Zimbabwe,70.8
58992,2097.0,Zimbabwe,71.0
58993,2098.0,Zimbabwe,71.1
58994,2099.0,Zimbabwe,71.2


In [14]:
def reformatGDP(gdp):
    gdp_str = str(gdp).lower()
    if 'k' in gdp_str:
        new_str = gdp_str.replace('k', '')
        return float(new_str) * 1000
    return float(gdp)
    

In [None]:
print(reformatGDP(187))

187.0


In [15]:
df_melted['life_expectancy'] = df_melted['life_expectancy'].map(reformatGDP)

In [51]:
df_melted.to_csv('reformatted_data/reformatted_life_expectancy.csv', index=False)

In [58]:
df_melted.head()

Unnamed: 0,year,country,life_expectancy
0,1800.0,Afghanistan,28.2
1,1801.0,Afghanistan,28.2
2,1802.0,Afghanistan,28.2
3,1803.0,Afghanistan,28.2
4,1804.0,Afghanistan,28.2


In [2]:
import pandas as pd

In [68]:
df = pd.read_csv('reformatted_data/reformatted_gdp.csv')

In [69]:
df.head()

Unnamed: 0,year,country,gdp_per_capita
0,1800,Afghanistan,599.0
1,1801,Afghanistan,599.0
2,1802,Afghanistan,599.0
3,1803,Afghanistan,599.0
4,1804,Afghanistan,599.0


In [23]:
value = '1800.0'
int_value = int(float(value))
print(int_value)

1800


In [32]:
df.rename(columns={'life_expectancy': 'gender_ratio_of_mean_years_in_school'}, inplace=True)

In [56]:
def date_to_int(year):
    return int(float(year))

In [59]:
df_melted['year'] = df_melted['year'].map(date_to_int)

In [70]:
df_2024 = df[df['year']==2024]

In [71]:
df_2024

Unnamed: 0,year,country,gdp_per_capita
224,2024,Afghanistan,1370.0
525,2024,Angola,5820.0
826,2024,Albania,16700.0
1127,2024,Andorra,55200.0
1428,2024,UAE,76900.0
...,...,...,...
57414,2024,Samoa,5650.0
57715,2024,Yemen,1720.0
58016,2024,South Africa,13400.0
58317,2024,Zambia,3370.0


In [72]:
df_2024.to_csv('2024ONLY_reformatted_gdp.csv', index=False)