# Coding for Economists - Session 6

***

In [69]:
import pandas as pd
import re
# Turn on copy on write
pd.options.mode.copy_on_write = True

In [70]:
df_pop = pd.read_csv('pop.csv')
df_pop = pd.melt(df_pop, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='population') 
df_pop.head()

Unnamed: 0,country,year,population
0,Afghanistan,1800,3.28M
1,Angola,1800,1.57M
2,Albania,1800,400k
3,Andorra,1800,2650
4,UAE,1800,40.2k


In [71]:
# Function to convert population values
def convert_column(value):
    value = value.lower()  # Convert to lowercase for uniformity
    if 'm' in value:
        return float(value.replace('m', '')) * 1e6  # Convert millions
    elif 'b' in value:
        return float(value.replace('b', '')) * 1e9  # Convert billions
    elif 'k' in value:
        return float(value.replace('k', '')) * 1e3  # Convert thousands
    else:
        return float(value)  # Convert numbers without suffix

# Apply function to column
df_pop['population'] = df_pop['population'].astype(str)
df_pop['population'] = df_pop['population'].apply(convert_column)

In [72]:
df_gdp = pd.read_csv('gdp_pcap.csv')
df_gdp = pd.melt(df_gdp, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='gdp_per_capita') 
df_gdp.head()

Unnamed: 0,country,year,gdp_per_capita
0,Afghanistan,1800,481
1,Angola,1800,373
2,Albania,1800,469
3,Andorra,1800,1370
4,UAE,1800,1140


In [73]:
# Apply function to column
df_gdp['gdp_per_capita'] = df_gdp['gdp_per_capita'].astype(str)
df_gdp['gdp_per_capita'] = df_gdp['gdp_per_capita'].apply(convert_column)

In [74]:
df_lex = pd.read_csv('lex.csv')
df_lex = pd.melt(df_lex, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='life_expectancy') 
df_lex.head()

Unnamed: 0,country,year,life_expectancy
0,Afghanistan,1800,28.2
1,Angola,1800,27.0
2,Albania,1800,35.4
3,Andorra,1800,
4,UAE,1800,30.7


In [75]:
df_income = pd.read_csv('mincpcap_cppp.csv')
df_income = pd.melt(df_income, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='daily_income') 
df_income.head()

Unnamed: 0,country,year,daily_income
0,Afghanistan,1800,1.33
1,Angola,1800,0.779
2,Albania,1800,0.919
3,Andorra,1800,1.88
4,UAE,1800,1.65


In [76]:
df_mortality = pd.read_csv('child_mortality_0_5_year_olds_dying_per_1000_born.csv')
df_mortality = pd.melt(df_mortality, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='child_mortality') 
df_mortality.head()

Unnamed: 0,country,year,child_mortality
0,Afghanistan,1800,469.0
1,Angola,1800,486.0
2,Albania,1800,375.0
3,Andorra,1800,
4,UAE,1800,434.0


In [77]:
df_co2 = pd.read_csv('co2_pcap_cons.csv')
df_co2 = pd.melt(df_co2, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='co2_emission') 
df_co2.head()

Unnamed: 0,country,year,co2_emission
0,Afghanistan,1800,0.001
1,Angola,1800,0.009
2,Albania,1800,0.001
3,Andorra,1800,0.333
4,UAE,1800,0.063


In [78]:
df_bpw = pd.read_csv('children_per_woman_total_fertility.csv')
df_bpw = pd.melt(df_bpw, 
                  id_vars=['country'], 
                  var_name='year',        
                  value_name='babies_per_woman') 
df_bpw.head()

Unnamed: 0,country,year,babies_per_woman
0,Afghanistan,1800,7.0
1,Angola,1800,6.93
2,Albania,1800,4.6
3,Andorra,1800,2.11
4,UAE,1800,6.94


In [79]:
merged_df = pd.merge(df_pop, df_gdp, on=['country', 'year'], how='outer')
merged_df = pd.merge(merged_df, df_lex, on=['country', 'year'], how='outer')
merged_df = pd.merge(merged_df, df_income, on=['country', 'year'], how='outer')
merged_df = pd.merge(merged_df, df_mortality, on=['country', 'year'], how='outer')
merged_df = pd.merge(merged_df, df_co2, on=['country', 'year'], how='outer')
merged_df = pd.merge(merged_df, df_bpw, on=['country', 'year'], how='outer')

In [80]:
merged_df.head()

Unnamed: 0,country,year,population,gdp_per_capita,life_expectancy,daily_income,child_mortality,co2_emission,babies_per_woman
0,Afghanistan,1800,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0
1,Afghanistan,1801,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0
2,Afghanistan,1802,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0
3,Afghanistan,1803,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0
4,Afghanistan,1804,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0


In [81]:
countries = merged_df.drop_duplicates(subset=['country'])

In [82]:
data_country = pd.DataFrame({'country': countries.reset_index().loc[:,'country']})
data_country.to_csv('country.csv')

In [83]:
'", "'.join(data_country['country'])

'Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Central African Republic", "Chad", "Chile", "China", "Colombia", "Comoros", "Congo, Dem. Rep.", "Congo, Rep.", "Costa Rica", "Cote d\'Ivoire", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Eswatini", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Holy See", "Honduras", "Hong Kong, China", "Hungary", "Iceland", "India", "Indonesia", "Iran

In [84]:
data_country['continent'] = [
    #  1. Afghanistan
    "Asia",
    #  2. Albania
    "Europe",
    #  3. Algeria
    "Africa",
    #  4. Andorra
    "Europe",
    #  5. Angola
    "Africa",
    #  6. Antigua and Barbuda
    "North America",
    #  7. Argentina
    "South America",
    #  8. Armenia
    "Asia",
    #  9. Australia
    "Oceania",
    # 10. Austria
    "Europe",
    # 11. Azerbaijan
    "Asia",
    # 12. Bahamas
    "North America",
    # 13. Bahrain
    "Asia",
    # 14. Bangladesh
    "Asia",
    # 15. Barbados
    "North America",
    # 16. Belarus
    "Europe",
    # 17. Belgium
    "Europe",
    # 18. Belize
    "North America",
    # 19. Benin
    "Africa",
    # 20. Bhutan
    "Asia",
    # 21. Bolivia
    "South America",
    # 22. Bosnia and Herzegovina
    "Europe",
    # 23. Botswana
    "Africa",
    # 24. Brazil
    "South America",
    # 25. Brunei
    "Asia",
    # 26. Bulgaria
    "Europe",
    # 27. Burkina Faso
    "Africa",
    # 28. Burundi
    "Africa",
    # 29. Cambodia
    "Asia",
    # 30. Cameroon
    "Africa",
    # 31. Canada
    "North America",
    # 32. Cape Verde
    "Africa",
    # 33. Central African Republic
    "Africa",
    # 34. Chad
    "Africa",
    # 35. Chile
    "South America",
    # 36. China
    "Asia",
    # 37. Colombia
    "South America",
    # 38. Comoros
    "Africa",
    # 39. Congo, Dem. Rep.
    "Africa",
    # 40. Congo, Rep.
    "Africa",
    # 41. Costa Rica
    "North America",
    # 42. Cote d'Ivoire
    "Africa",
    # 43. Croatia
    "Europe",
    # 44. Cuba
    "North America",
    # 45. Cyprus
    "Europe",
    # 46. Czech Republic
    "Europe",
    # 47. Denmark
    "Europe",
    # 48. Djibouti
    "Africa",
    # 49. Dominica
    "North America",
    # 50. Dominican Republic
    "North America",
    # 51. Ecuador
    "South America",
    # 52. Egypt
    "Africa",
    # 53. El Salvador
    "North America",
    # 54. Equatorial Guinea
    "Africa",
    # 55. Eritrea
    "Africa",
    # 56. Estonia
    "Europe",
    # 57. Eswatini
    "Africa",
    # 58. Ethiopia
    "Africa",
    # 59. Fiji
    "Oceania",
    # 60. Finland
    "Europe",
    # 61. France
    "Europe",
    # 62. Gabon
    "Africa",
    # 63. Gambia
    "Africa",
    # 64. Georgia
    "Asia",
    # 65. Germany
    "Europe",
    # 66. Ghana
    "Africa",
    # 67. Greece
    "Europe",
    # 68. Grenada
    "North America",
    # 69. Guatemala
    "North America",
    # 70. Guinea
    "Africa",
    # 71. Guinea-Bissau
    "Africa",
    # 72. Guyana
    "South America",
    # 73. Haiti
    "North America",
    # 74. Holy See
    "Europe",
    # 75. Honduras
    "North America",
    # 76. Hong Kong, China
    "Asia",
    # 77. Hungary
    "Europe",
    # 78. Iceland
    "Europe",
    # 79. India
    "Asia",
    # 80. Indonesia
    "Asia",
    # 81. Iran
    "Asia",
    # 82. Iraq
    "Asia",
    # 83. Ireland
    "Europe",
    # 84. Israel
    "Asia",
    # 85. Italy
    "Europe",
    # 86. Jamaica
    "North America",
    # 87. Japan
    "Asia",
    # 88. Jordan
    "Asia",
    # 89. Kazakhstan
    "Asia",
    # 90. Kenya
    "Africa",
    # 91. Kiribati
    "Oceania",
    # 92. Kuwait
    "Asia",
    # 93. Kyrgyz Republic
    "Asia",
    # 94. Lao
    "Asia",
    # 95. Latvia
    "Europe",
    # 96. Lebanon
    "Asia",
    # 97. Lesotho
    "Africa",
    # 98. Liberia
    "Africa",
    # 99. Libya
    "Africa",
    # 100. Liechtenstein
    "Europe",
    # 101. Lithuania
    "Europe",
    # 102. Luxembourg
    "Europe",
    # 103. Madagascar
    "Africa",
    # 104. Malawi
    "Africa",
    # 105. Malaysia
    "Asia",
    # 106. Maldives
    "Asia",
    # 107. Mali
    "Africa",
    # 108. Malta
    "Europe",
    # 109. Marshall Islands
    "Oceania",
    # 110. Mauritania
    "Africa",
    # 111. Mauritius
    "Africa",
    # 112. Mexico
    "North America",
    # 113. Micronesia, Fed. Sts.
    "Oceania",
    # 114. Moldova
    "Europe",
    # 115. Monaco
    "Europe",
    # 116. Mongolia
    "Asia",
    # 117. Montenegro
    "Europe",
    # 118. Morocco
    "Africa",
    # 119. Mozambique
    "Africa",
    # 120. Myanmar
    "Asia",
    # 121. Namibia
    "Africa",
    # 122. Nauru
    "Oceania",
    # 123. Nepal
    "Asia",
    # 124. Netherlands
    "Europe",
    # 125. New Zealand
    "Oceania",
    # 126. Nicaragua
    "North America",
    # 127. Niger
    "Africa",
    # 128. Nigeria
    "Africa",
    # 129. North Korea
    "Asia",
    # 130. North Macedonia
    "Europe",
    # 131. Norway
    "Europe",
    # 132. Oman
    "Asia",
    # 133. Pakistan
    "Asia",
    # 134. Palau
    "Oceania",
    # 135. Palestine
    "Asia",
    # 136. Panama
    "North America",
    # 137. Papua New Guinea
    "Oceania",
    # 138. Paraguay
    "South America",
    # 139. Peru
    "South America",
    # 140. Philippines
    "Asia",
    # 141. Poland
    "Europe",
    # 142. Portugal
    "Europe",
    # 143. Qatar
    "Asia",
    # 144. Romania
    "Europe",
    # 145. Russia
    "Europe",
    # 146. Rwanda
    "Africa",
    # 147. Samoa
    "Oceania",
    # 148. San Marino
    "Europe",
    # 149. Sao Tome and Principe
    "Africa",
    # 150. Saudi Arabia
    "Asia",
    # 151. Senegal
    "Africa",
    # 152. Serbia
    "Europe",
    # 153. Seychelles
    "Africa",
    # 154. Sierra Leone
    "Africa",
    # 155. Singapore
    "Asia",
    # 156. Slovak Republic
    "Europe",
    # 157. Slovenia
    "Europe",
    # 158. Solomon Islands
    "Oceania",
    # 159. Somalia
    "Africa",
    # 160. South Africa
    "Africa",
    # 161. South Korea
    "Asia",
    # 162. South Sudan
    "Africa",
    # 163. Spain
    "Europe",
    # 164. Sri Lanka
    "Asia",
    # 165. St. Kitts and Nevis
    "North America",
    # 166. St. Lucia
    "North America",
    # 167. St. Vincent and the Grenadines
    "North America",
    # 168. Sudan
    "Africa",
    # 169. Suriname
    "South America",
    # 170. Sweden
    "Europe",
    # 171. Switzerland
    "Europe",
    # 172. Syria
    "Asia",
    # 173. Taiwan
    "Asia",
    # 174. Tajikistan
    "Asia",
    # 175. Tanzania
    "Africa",
    # 176. Thailand
    "Asia",
    # 177. Timor-Leste
    "Asia",
    # 178. Togo
    "Africa",
    # 179. Tonga
    "Oceania",
    # 180. Trinidad and Tobago
    "North America",
    # 181. Tunisia
    "Africa",
    # 182. Turkey
    "Asia",
    # 183. Turkmenistan
    "Asia",
    # 184. Tuvalu
    "Oceania",
    # 185. UAE
    "Asia",
    # 186. UK
    "Europe",
    # 187. USA
    "North America",
    # 188. Uganda
    "Africa",
    # 189. Ukraine
    "Europe",
    # 190. Uruguay
    "South America",
    # 191. Uzbekistan
    "Asia",
    # 192. Vanuatu
    "Oceania",
    # 193. Venezuela
    "South America",
    # 194. Vietnam
    "Asia",
    # 195. Yemen
    "Asia",
    # 196. Zambia
    "Africa",
    # 197. Zimbabwe
    "Africa"
]

In [85]:
data_country.head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


In [86]:
merged_df = pd.merge(merged_df, data_country, on=['country'], how='left')

In [87]:
merged_df.head()

Unnamed: 0,country,year,population,gdp_per_capita,life_expectancy,daily_income,child_mortality,co2_emission,babies_per_woman,continent
0,Afghanistan,1800,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0,Asia
1,Afghanistan,1801,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0,Asia
2,Afghanistan,1802,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0,Asia
3,Afghanistan,1803,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0,Asia
4,Afghanistan,1804,3280000.0,481.0,28.2,1.33,469.0,0.001,7.0,Asia


In [92]:
merged_df['co2_emission'] = merged_df['co2_emission'].astype(str).str.replace('−', '-', regex=False)
merged_df['co2_emission'] = merged_df['co2_emission'].astype(float)
merged_df['continent'] = merged_df['continent'].astype(str)

In [93]:
merged_df.dtypes

country              object
year                 object
population          float64
gdp_per_capita      float64
life_expectancy     float64
daily_income        float64
child_mortality     float64
co2_emission        float64
babies_per_woman    float64
continent            object
dtype: object

In [89]:
merged_df.to_csv('Gapminder.csv',index=False)