In [9]:
import pandas as pd
# The DataFrame 'df' is not displaying the headers correctly due to the initial row misalignment. 
# We need to adjust this first. Let's reload the data and skip the first row which seems to be incorrect.
df_corrected = pd.read_csv('raw_data/gdp_pcap.csv', header=None)

# Now we define the first row as the header
df_corrected.columns = df_corrected.iloc[0]
df_corrected = df_corrected.drop(df_corrected.index[0])

# The headers now should be countries, let's transpose the data to make countries as one of the columns
df_transposed = df_corrected.set_index(df_corrected.columns[0]).T
df_transposed.reset_index(inplace=True)
df_transposed.rename(columns={df_transposed.columns[0]: 'year'}, inplace=True)

# Melt the transposed dataframe to get "country, year, GDP per capita" format
df_melted = pd.melt(df_transposed, id_vars=['year'], var_name='country', value_name='gdp_per_capita')

In [10]:
df_melted.dtypes

year              object
country           object
gdp_per_capita    object
dtype: object

In [11]:
df_melted

Unnamed: 0,year,country,gdp_per_capita
0,1800,Afghanistan,599
1,1801,Afghanistan,599
2,1802,Afghanistan,599
3,1803,Afghanistan,599
4,1804,Afghanistan,599
...,...,...,...
58690,2096,Zimbabwe,10.8k
58691,2097,Zimbabwe,11k
58692,2098,Zimbabwe,11.3k
58693,2099,Zimbabwe,11.6k


In [14]:
def reformatGDP(gdp):
    gdp_str = str(gdp).lower()
    if 'k' in gdp_str:
        new_str = gdp_str.replace('k', '')
        return float(new_str) * 1000
    return float(gdp)
    

In [None]:
print(reformatGDP(187))

187.0


In [15]:
df_melted['gdp_per_capita'] = df_melted['gdp_per_capita'].map(reformatGDP)

In [17]:
df_melted.to_csv('reformatted_data/reformatted_gdp.csv', index=False)

In [16]:
df_melted.head()

Unnamed: 0,year,country,gdp_per_capita
0,1800,Afghanistan,599.0
1,1801,Afghanistan,599.0
2,1802,Afghanistan,599.0
3,1803,Afghanistan,599.0
4,1804,Afghanistan,599.0


In [2]:
import pandas as pd

In [27]:
df = pd.read_csv('reformatted_data/reformatted_life_expectancy.csv')

In [31]:
df.head()

Unnamed: 0,year,country,life_expectancy
0,1800,Afghanistan,28.2
1,1801,Afghanistan,28.2
2,1802,Afghanistan,28.2
3,1803,Afghanistan,28.2
4,1804,Afghanistan,28.2


In [23]:
value = '1800.0'
int_value = int(float(value))
print(int_value)

1800


In [32]:
df.rename(columns={'life_expectancy': 'gender_ratio_of_mean_years_in_school'}, inplace=True)

In [29]:
def date_to_int(year):
    return int(float(year))

In [30]:
df['year'] = df['year'].map(date_to_int)

In [19]:
df_2024 = df[df['year']==2024]

In [20]:
df_2024.head()

Unnamed: 0,year,country,life_expectancy
224,2024.0,Afghanistan,64.8
525,2024.0,Angola,66.1
826,2024.0,Albania,79.5
1127,2024.0,Andorra,83.0
1428,2024.0,UAE,74.8


In [32]:
df.to_csv('reformatted_life_expectancy.csv', index=False)