In [13]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import requests

## World Bank Data Cleaning

In [14]:
# Storing columns to keep in a list to re-iterate over multiple csv's
columns_to_keep = ["Country Name", "Country Code", "Indicator Name", "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"]

rows_to_remove = ["ARB", "CEB", "EUU", "EAP", "EAR", "EAS", "ECA", "ECS", "EMU", "FCS", "HIC", "HPC", "IBT", "IBD", "IDB", "IDA", "IDX", "INX", "LAC", "LIC", "LDC", "LCN", "LMC", "LMY", "LTE","LMY", "MIC", "MEA", "MNA", "NAC", "OED", "OSS", "PRE", "PST", "SAS", "SSA", "SSF", "SST", "TEA", "TEC", "TLA", "TMN", "TSA", "TSS", "UMC"]

In [15]:
# Final length of list should be 265 - len(rows_to_remove)
len(rows_to_remove)

45

In [16]:
# Loading raw world bank data and adding to a list
CO2_data = pd.read_csv('raw_data/CO2emissions_data.csv', error_bad_lines=False)

GDP_data = pd.read_csv('raw_data/GDPpercapita-data.csv', error_bad_lines=False)

Renewable_data = pd.read_csv('raw_data/Renewable_energy.csv', error_bad_lines=False)

csv_list = []

csv_list = [CO2_data, GDP_data, Renewable_data]

In [17]:
#Loop through each world bank csv and clean at the same time 
for csv in csv_list:

    # Filtering to only columns to keep 
    csv.drop(columns=[col for col in csv if col not in columns_to_keep], inplace=True)

    # Rename columns 
    csv.rename(columns={"country_name":"Country Name","country_code":"Country Code","indicator_name":"Indicator Name"}, inplace = True)

    # Set Country code as the index 
    csv.set_index("Country Code", inplace=True)
    
    # Drop any indices that are non-countries ex. MIC is for the all Middle Income countr CO2 emissions
    csv.drop(index=["ARB", "CEB", "EUU", "EAP", "EAR", "EAS", "ECA", "ECS", "EMU", "FCS", "HIC", "HPC", "IBT", "IBD", "IDB", "IDA", "IDX", "INX", "LAC", "LIC", "LDC", "LCN", "LMC", "LMY", "LTE","LMY", "MIC", "MEA", "MNA", "NAC", "OED", "OSS", "PRE", "PST", "SAS", "SSA", "SSF", "SST", "TEA", "TEC", "TLA", "TMN", "TSA", "TSS", "UMC"], inplace=True)

In [18]:
#Checking to confirm cleaning is correct
GDP_data.head(5)

Unnamed: 0_level_0,Country Name,Indicator Name,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ABW,Aruba,GDP per capita growth (annual %),5.427608,-5.107188,-5.405878,-0.152951,6.031742,-0.094958,0.245883,1.41001,-0.224764,-10.6053,-3.88776,3.063882,-1.864168,3.593198,-0.294412,5.125616
AFG,Afghanistan,GDP per capita growth (annual %),,,,3.868362,-2.875184,7.207933,2.253357,11.022774,1.594211,18.515369,11.264133,-2.681081,8.97488,1.974169,-0.665271,-1.622887
AGO,Angola,GDP per capita growth (annual %),-0.267945,0.822114,9.943764,-0.431851,7.187036,11.030836,7.582329,9.890012,7.116873,-2.808634,0.640294,-0.220851,4.706498,1.292041,1.219835,-2.468719
ALB,Albania,GDP per capita growth (annual %),7.633866,9.311124,4.853922,5.92563,5.951881,6.071391,6.570332,6.783927,8.328036,4.048888,4.223038,2.821558,1.585156,1.187204,1.985426,2.516853
AND,Andorra,GDP per capita growth (annual %),1.913452,4.986929,0.504741,4.040888,3.792889,1.892421,2.057546,-0.523712,-6.885786,-5.976668,-1.958707,0.830102,-3.452688,-1.573746,4.524456,2.997046


## Manipulating Cleaned World Bank Data 


In [19]:
# Slice out World CO2 emissions 
world_CO2= CO2_data.loc["WLD"]

world_CO2

Country Name                   World
Indicator Name    CO2 emissions (kt)
2000                 23918172.187422
2001                 24101535.279574
2002                 24634020.255788
2003                 25893535.033145
2004                 27106644.134534
2005                 28043870.884737
2006                 29021469.960864
2007                 29513157.394651
2008                 30680656.021326
2009                 29915812.396866
2010                 31927784.123311
2011                 33090763.411553
2012                 33683606.846031
2013                 33848272.799064
2014                 34103192.880726
2015                 34040671.314996
Name: WLD, dtype: object

In [20]:
# Drop World emissions
CO2_data.drop(index=["WLD"], inplace=True)

# Slice top 20 countries with highest CO2 emissions
top20_CO2df = CO2_data.nlargest(20, '2015')

# Sort Country Names alphabetically 
top20_CO2df.sort_values("Country Name", inplace= True)

top20_CO2df.head(5)

Unnamed: 0_level_0,Country Name,Indicator Name,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AUS,Australia,CO2 emissions (kt),329443.28,324844.862,341353.696,336271.234,342699.485,350172.831,365346.877,372090.49,385904.079,394792.887,390861.863,391818.95,388126.3,372317.8,361316.8,365332.2
BRA,Brazil,CO2 emissions (kt),327983.814,337433.673,332266.87,321621.569,337826.042,347308.904,347668.27,363212.683,387631.236,367147.374,419754.156,439412.943,470028.7,503677.1,533530.2,504388.5
CAN,Canada,CO2 emissions (kt),534380.909,527926.989,519335.208,553100.944,553357.634,561425.034,571993.328,571846.648,561791.734,532191.71,527263.262,522774.854,517721.7,519188.5,540614.8,549430.3
CHN,China,CO2 emissions (kt),3405179.867,3487566.356,3850269.326,4540417.061,5233538.733,5896957.705,6529291.518,6697654.489,7553070.247,7557789.676,8776040.416,9733538.12,10028570.0,10258010.0,10291930.0,10145000.0
DEU,Germany,CO2 emissions (kt),829977.779,853662.932,829724.756,823003.145,815969.839,797759.517,816721.574,781247.016,779296.172,720547.165,757880.892,729810.007,738141.4,757961.6,720363.8,727045.1


In [21]:
# Store Top 20 country codes in a list 
top20_countrynames_list = top20_CO2df["Country Code"]
top20_countrycodes_list

KeyError: 'Country Code'

In [None]:
# Store Top 20 Countries-GDP chart 
top20_GDP= GDP_data.loc[top20_countrycodes_list]
top20_GDP.sort_values("Country Name", inplace= True)
len(top20_GDP)

In [None]:
# Store Top 20 Countries-Renewable Energy chart 
top20_Renewable= Renewable_data.loc[top20_countrycodes_list]
top20_Renewable.sort_values("Country Name", inplace= True)
len(top20_Renewable)

## Exporting to clean CSVs

In [None]:
top20_CO2df.to_csv("top20_CO2df.csv", index=True)

CO2_data.to_csv("CO2_data.csv", index=True)

top20_GDP.to_csv("top20_GDP.csv", index=True)

GDP_data.to_csv("GDP_data.csv", index=True)

top20_Renewable.to_csv("top20_Renewable.csv", index=True)

Renewable_data.to_csv("Renewable_data.csv", index=True)

world_CO2.to_csv("top20_Renewable.csv", index=True)

top20_countrynames_list.to_csv("top20_countrynames_list.csv", index=True)

top20_countrycodes_list.to_csv("top20_countrycodes_list.csv", index=True)