In [110]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#ingest data
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_countries_by_carbon_dioxide_emissions")
emissions = tables[1]

#rename columns
emissions.columns = ["country", "emission1990", "emission2005", "emission2017", "2017percentage", "change2017_1990", "2017perlandarea", "2017percapita", "2018incl", "2018excl"]

#filter out the rows that aren't countries, which are the first 3 rows and "European Union" 
emissions = emissions.loc[(emissions.index > 2) & (emissions.country != "European Union"), :] 

In [None]:
#Graph 1: C02 of the bigger countries

#sort on 2017 column
sortedemissions = emissions.sort_values("emission2017", ascending=False)

#only include first 5
biggestproducers = sortedemissions.iloc[0:5, 0:4]

#make graph
fig, ax = plt.subplots()

years = biggestproducers.columns[1:4]

for index, row in biggestproducers.iterrows():
  plt.plot(years, row[1:], label = row[0])

plt.title("C02 emissions of the biggest producers")
plt.xlabel("Year")
plt.ylabel("Mt C02")
plt.legend(loc= "center left", bbox_to_anchor=(1, 0.5))
plt.grid()
plt.show()


In [None]:
#Graph 2: worst and best changers

#add columns
emissions["1990Percentage"] = 100
emissions['Change2005'] = (emissions.loc[:,'emission2005'] / emissions.loc[:,'emission1990']) * 100
emissions["Change2017"] = (emissions.loc[:, "emission2017"] / emissions.loc[:, "emission1990"]) * 100

#make new dataframe which only includes countries with an emission bigger dan 5 Mt C02
relativeemissions = emissions.loc[emissions.emission1990 > 5.0]
relativeemissions = relativeemissions.loc[:, ["country", "1990Percentage", "Change2005", "Change2017"]]

#sort on the best and worst scoring countries
relativeemissions = relativeemissions.sort_values("Change2017")

#make new dataframe which only includes the best and worst scoring countries
best_worst_relativeemissions = relativeemissions.iloc[[0, 1, 3, -3, -2, -1], :]

#make chart
fig, ax = plt.subplots()

years = best_worst_relativeemissions.columns[1:4]

for index, row in best_worst_relativeemissions.iterrows():
  plt.plot(years, row[1:], label = row[0])

plt.title("Best and worst performers C02 emissions")
plt.xlabel("Year")
plt.ylabel("Percent change")
plt.legend(loc= "center left", bbox_to_anchor=(1, 0.5))
plt.grid()
plt.show()
