In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import requests

## World Bank Data Cleaning

In [2]:
# Storing columns to keep in a list to re-iterate over multiple csv's
# columns_to_keep = ["Country Name", "Country Code", "Indicator Name", "2015"]
columns_to_keep = ["Country Name", "Country Code", "Indicator Name", "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"]

rows_to_remove = ["ARB", "CEB", "EUU", "EAP", "EAR", "EAS", "ECA", "ECS", "EMU", "FCS", "HIC", "HPC", "IBT", "IBD", "IDB", "IDA", "IDX", "INX", "LAC", "LIC", "LDC", "LCN", "LMC", "LMY", "LTE","LMY", "MIC", "MEA", "MNA", "NAC", "OED", "OSS", "PRE", "PST", "SAS", "SSA", "SSF", "SST", "TEA", "TEC", "TLA", "TMN", "TSA", "TSS", "UMC"]

In [3]:
# Final length of list should be 265 - len(rows_to_remove)
len(rows_to_remove)

45

In [4]:
# Loading raw world bank data and adding to a list
CO2_data = pd.read_csv('raw_data/CO2emissions_data.csv', error_bad_lines=False)

GDP_data = pd.read_csv('raw_data/GDPpercapita-data.csv', error_bad_lines=False)

Renewable_data = pd.read_csv('raw_data/Renewable_energy.csv', error_bad_lines=False)

csv_list = []

csv_list = [CO2_data, GDP_data, Renewable_data]

In [6]:
#Loop through each world bank csv and clean at the same time 
for csv in csv_list:

    # Filtering to only columns to keep 
    csv.drop(columns=[col for col in csv if col not in columns_to_keep], inplace=True)

    # Rename columns 
    csv.rename(columns={"country_name":"Country Name","country_code":"Country Code","indicator_name":"Indicator Name"}, inplace = True)

    # Set Country code as the index 
    csv.set_index("Country Code", inplace=True)
    
    # Drop any indices that are non-countries ex. MIC is for the all Middle Income countr CO2 emissions
    csv.drop(index=["ARB", "CEB", "EUU", "EAP", "EAR", "EAS", "ECA", "ECS", "EMU", "FCS", "HIC", "HPC", "IBT", "IBD", "IDB", "IDA", "IDX", "INX", "LAC", "LIC", "LDC", "LCN", "LMC", "LMY", "LTE","LMY", "MIC", "MEA", "MNA", "NAC", "OED", "OSS", "PRE", "PST", "SAS", "SSA", "SSF", "SST", "TEA", "TEC", "TLA", "TMN", "TSA", "TSS", "UMC"], inplace=True)

In [7]:
#Checking to confirm cleaning is correct
GDP_data

Unnamed: 0_level_0,Country Name,Indicator Name,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ABW,Aruba,GDP per capita growth (annual %),5.427608,-5.107188,-5.405878,-0.152951,6.031742,-0.094958,0.245883,1.410010,-0.224764,-10.605300,-3.887760,3.063882,-1.864168,3.593198,-0.294412,5.125616
AFG,Afghanistan,GDP per capita growth (annual %),,,,3.868362,-2.875184,7.207933,2.253357,11.022774,1.594211,18.515369,11.264133,-2.681081,8.974880,1.974169,-0.665271,-1.622887
AGO,Angola,GDP per capita growth (annual %),-0.267945,0.822114,9.943764,-0.431851,7.187036,11.030836,7.582329,9.890012,7.116873,-2.808634,0.640294,-0.220851,4.706498,1.292041,1.219835,-2.468719
ALB,Albania,GDP per capita growth (annual %),7.633866,9.311124,4.853922,5.925630,5.951881,6.071391,6.570332,6.783927,8.328036,4.048888,4.223038,2.821558,1.585156,1.187204,1.985426,2.516853
AND,Andorra,GDP per capita growth (annual %),1.913452,4.986929,0.504741,4.040888,3.792889,1.892421,2.057546,-0.523712,-6.885786,-5.976668,-1.958707,0.830102,-3.452688,-1.573746,4.524456,2.997046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XKX,Kosovo,GDP per capita growth (annual %),,26.887780,-0.768379,5.908225,2.542539,5.961248,3.667026,6.427746,5.455412,2.766627,2.483395,3.482471,1.893956,2.813093,1.520680,5.525764
YEM,"Yemen, Rep.",GDP per capita growth (annual %),3.205962,0.853194,0.974921,0.789305,1.019025,2.612580,0.276736,0.450009,0.761294,0.986330,4.732882,-15.107922,-0.391864,2.014195,-2.802880,-29.827069
ZAF,South Africa,GDP per capita growth (annual %),2.742091,1.339791,2.397943,1.696827,3.289033,3.982017,4.277783,4.008500,1.823488,-2.898731,1.551073,1.720714,0.607949,0.852685,0.247279,-0.341677
ZMB,Zambia,GDP per capita growth (annual %),1.150423,2.595847,1.843714,4.236897,4.309021,4.471509,5.091501,5.497427,4.875226,6.190402,7.129801,2.423912,4.310349,1.809879,1.480524,-0.188153


In [None]:
##

In [None]:
# Slice top 20 countries with highest CO2 emissions -- USE THIS CHART TO FIND TOP 20 CO2 EMISSION DATA 
top20_CO2df = data.nlargest(20, '2015')

top20_CO2df.sort_values("Country Name", inplace= True)

top20_CO2df.head(5)

## Exporting to clean CSVs

In [None]:
CO2_data.to_csv("Cleaned_CO2.csv", index=True)

GDP_data.to_csv("Cleaned_GDP.csv", index=True)

Renewable_data.to_csv("Cleaned_renewable.csv", index=True)
