In [9]:
# Import pandas
import pandas as pd

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('Resources/filtered_population_fertility_gdp_2014_2024.csv')

In [59]:
# Step 1: Filter the rows for 'GDP per capita (current US$)' and 'GDP per capita growth (annual %)' separately
gdp_per_capita_df = df[df['Series Name_y'] == 'GDP per capita (current US$)'].copy()
gdp_growth_df = df[df['Series Name_y'] == 'GDP per capita growth (annual %)'][['Country Code', 'Year', 'GDP']].copy()

# Rename 'GDP' in gdp_growth_df to 'GDP Percentage' for clarity
gdp_growth_df.rename(columns={'GDP': 'GDP Percentage'}, inplace=True)

In [69]:
# Step 2: Merge the gdp_growth_df with gdp_per_capita_df based on 'Country Code' and 'Year'
merged_df = gdp_per_capita_df.merge(gdp_growth_df, on=['Country Code', 'Year'], how='left')

# Step 3: Create a new column 'Series Name_z' and set it as 'GDP per capita growth (annual %)'
merged_df['Series Name_z'] = 'GDP per capita growth (annual %)'

In [75]:
# Step 4: Drop the 'Series Code' column as requested
merged_df.drop(columns=['Series Code'], inplace=True)

In [79]:
# Step 5: Rearrange the columns in the desired order
final_df = merged_df[['Country Code', 'Country Name', 'Year', 'Series Name_x', 'Fertility Rate', 
                      'Series Name_y', 'GDP', 'Series Name_z', 'GDP Percentage']]

In [83]:
# Step 6: Remove any remaining duplicate rows to ensure countries and years are unique
final_df = final_df.drop_duplicates(subset=['Country Code', 'Year'], keep='first')

In [85]:
final_df

Unnamed: 0,Country Code,Country Name,Year,Series Name_x,Fertility Rate,Series Name_y,GDP,Series Name_z,GDP Percentage
0,AFG,Afghanistan,2014,"Fertility rate, total (births per woman)",5.560,GDP per capita (current US$),626.512929,GDP per capita growth (annual %),-0.964803
15,ALB,Albania,2014,"Fertility rate, total (births per woman)",1.674,GDP per capita (current US$),4578.633208,GDP per capita growth (annual %),1.985388
30,DZA,Algeria,2014,"Fertility rate, total (births per woman)",3.004,GDP per capita (current US$),6164.644699,GDP per capita growth (annual %),2.060062
45,ASM,American Samoa,2014,"Fertility rate, total (births per woman)",0.000,GDP per capita (current US$),12313.997357,GDP per capita growth (annual %),3.279022
60,AND,Andorra,2014,"Fertility rate, total (births per woman)",0.000,GDP per capita (current US$),45680.534992,GDP per capita growth (annual %),2.140939
...,...,...,...,...,...,...,...,...,...
379365,UZB,Uzbekistan,2023,"Fertility rate, total (births per woman)",0.000,GDP per capita (current US$),2496.107758,GDP per capita growth (annual %),3.764399
379380,VUT,Vanuatu,2023,"Fertility rate, total (births per woman)",0.000,GDP per capita (current US$),3367.094639,GDP per capita growth (annual %),-0.160067
379395,VNM,Viet Nam,2023,"Fertility rate, total (births per woman)",0.000,GDP per capita (current US$),4346.768492,GDP per capita growth (annual %),4.332271
379410,ZMB,Zambia,2023,"Fertility rate, total (births per woman)",0.000,GDP per capita (current US$),1369.129365,GDP per capita growth (annual %),2.992084


In [87]:
# Save the filtered DataFrame to a new CSV file
final_df.to_csv('Resources/fertility_gdp_2014_2024.csv', index=False)
# Save the filtered DataFrame to a JSON file
final_df.to_json('Resources/fertility_gdp_2014_2024.json', orient='records', indent=4)