In [45]:
import pandas as pd
# The DataFrame 'df' is not displaying the headers correctly due to the initial row misalignment. 
# We need to adjust this first. Let's reload the data and skip the first row which seems to be incorrect.
df_corrected = pd.read_csv('raw_data/mean_years_in_school_women_percent_men_25_to_34_years.csv', header=None)

# Now we define the first row as the header
df_corrected.columns = df_corrected.iloc[0]
df_corrected = df_corrected.drop(df_corrected.index[0])

# The headers now should be countries, let's transpose the data to make countries as one of the columns
df_transposed = df_corrected.set_index(df_corrected.columns[0]).T
df_transposed.reset_index(inplace=True)
df_transposed.rename(columns={df_transposed.columns[0]: 'year'}, inplace=True)

# Melt the transposed dataframe to get "country, year, GDP per capita" format
df_melted = pd.melt(df_transposed, id_vars=['year'], var_name='country', value_name='gender_ratio_of_mean_years_in_school')

In [46]:
df_melted.dtypes

year                                    float64
country                                  object
gender_ratio_of_mean_years_in_school    float64
dtype: object

In [47]:
df_melted

Unnamed: 0,year,country,gender_ratio_of_mean_years_in_school
0,1970.0,Afghanistan,15.4
1,1971.0,Afghanistan,15.8
2,1972.0,Afghanistan,15.4
3,1973.0,Afghanistan,15.6
4,1974.0,Afghanistan,15.9
...,...,...,...
8643,2011.0,Zimbabwe,90.4
8644,2012.0,Zimbabwe,90.8
8645,2013.0,Zimbabwe,91.2
8646,2014.0,Zimbabwe,91.7


In [14]:
def reformatGDP(gdp):
    gdp_str = str(gdp).lower()
    if 'k' in gdp_str:
        new_str = gdp_str.replace('k', '')
        return float(new_str) * 1000
    return float(gdp)
    

In [None]:
print(reformatGDP(187))

187.0


In [15]:
df_melted['gdp_per_capita'] = df_melted['gdp_per_capita'].map(reformatGDP)

In [17]:
df_melted.to_csv('reformatted_data/reformatted_gdp.csv', index=False)

In [16]:
df_melted.head()

Unnamed: 0,year,country,gdp_per_capita
0,1800,Afghanistan,599.0
1,1801,Afghanistan,599.0
2,1802,Afghanistan,599.0
3,1803,Afghanistan,599.0
4,1804,Afghanistan,599.0


In [2]:
import pandas as pd

In [39]:
df = pd.read_csv('reformatted_data/reformatted_gender_equality.csv')

In [43]:
df.head()

Unnamed: 0,year,country,gender_ratio_of_mean_years_in_school
0,2024,Afghanistan,64.8
1,2024,Angola,66.1
2,2024,Albania,79.5
3,2024,Andorra,83.0
4,2024,UAE,74.8


In [23]:
value = '1800.0'
int_value = int(float(value))
print(int_value)

1800


In [32]:
df.rename(columns={'life_expectancy': 'gender_ratio_of_mean_years_in_school'}, inplace=True)

In [41]:
def date_to_int(year):
    return int(float(year))

In [48]:
df_melted['year'] = df_melted['year'].map(date_to_int)

In [19]:
df_2024 = df[df['year']==2024]

In [49]:
df_melted.head()

Unnamed: 0,year,country,gender_ratio_of_mean_years_in_school
0,1970,Afghanistan,15.4
1,1971,Afghanistan,15.8
2,1972,Afghanistan,15.4
3,1973,Afghanistan,15.6
4,1974,Afghanistan,15.9


In [50]:
df_melted.to_csv('reformatted_gender_equality.csv', index=False)