In [1]:
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.style.use('ggplot')

In [2]:
DATA_PATH = "/home/brandon/Documents/Data Science/Final-Project-Climate-Change/"
global_temperatures = pd.read_csv(DATA_PATH + "GlobalLandTemperaturesByCountry.csv", parse_dates=['dt'])
global_temperatures.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
global_temperatures['dt'] = global_temperatures['dt'].apply(lambda x: x.year)
global_temperatures.columns = ['Year', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country']
  
global_temperatures.head()

Unnamed: 0,Year,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743,4.384,2.294,Åland
1,1743,,,Åland
2,1744,,,Åland
3,1744,,,Åland
4,1744,,,Åland


In [4]:
# average temperatures of each year for each country
avg_data = global_temperatures.groupby(['Country', 'Year']).mean()
avg_data = avg_data.reset_index()
avg_data.head()

Unnamed: 0,Country,Year,AverageTemperature,AverageTemperatureUncertainty
0,Afghanistan,1838,18.379571,2.756
1,Afghanistan,1839,,
2,Afghanistan,1840,13.413455,2.502
3,Afghanistan,1841,13.9976,2.4521
4,Afghanistan,1842,15.154667,2.381222


In [5]:
# Example:
# ... | AverageTemperature | ... | Change
# ... |         a          | ... |  a - b
# ... |         b          | ... |   ...

avg_data['Change'] = avg_data.groupby(['Country'])['AverageTemperature'].transform(np.diff)
avg_data.head()

Unnamed: 0,Country,Year,AverageTemperature,AverageTemperatureUncertainty,Change
0,Afghanistan,1838,18.379571,2.756,
1,Afghanistan,1839,,,
2,Afghanistan,1840,13.413455,2.502,0.584145
3,Afghanistan,1841,13.9976,2.4521,1.157067
4,Afghanistan,1842,15.154667,2.381222,-1.398417


In [6]:
# Average rate of change for each country
avg_rate = avg_data.groupby(['Country'])[['Country', 'Change']].mean()
avg_rate = avg_rate.reset_index()
avg_rate.columns = ['Country', 'AverageChange']
avg_rate.head()

Unnamed: 0,Country,AverageChange
0,Afghanistan,0.018036
1,Africa,0.013839
2,Albania,0.016188
3,Algeria,-0.057642
4,American Samoa,0.006192


In [8]:
avg_rate_top10 = avg_rate.sort_values(['AverageChange'], ascending=False)[:10]
avg_rate_top10

Unnamed: 0,Country,AverageChange
119,Kuwait,0.157133
225,Turkmenistan,0.083038
84,Georgia,0.081638
234,Uzbekistan,0.077606
16,Azerbaijan,0.064173
214,Syria,0.060052
114,Jordan,0.04445
144,Mongolia,0.039008
123,Lebanon,0.038286
83,Gaza Strip,0.037341


### If we take a look at the top ten countries with the highest positive rate of change in average temperature, we notice these countries are almost entirely Middle Eastern countries.  Our data suggests there is some geographical correlation to the rate of global warming.

In [15]:
# Let's look at the most recent decade
avg_data = avg_data[avg_data.Year >= 2003]

avg_rate = avg_data.groupby(['Country'])[['Country', 'Change']].mean()
avg_rate = avg_rate.reset_index()
avg_rate.columns = ['Country', 'AverageChange']

avg_rate_top10 = avg_rate.sort_values(['AverageChange'], ascending=False)[:10]
avg_rate_top10

Unnamed: 0,Country,AverageChange
119,Kuwait,1.183792
228,Ukraine,0.981178
142,Moldova,0.935576
179,Romania,0.834383
22,Belarus,0.806746
33,Bulgaria,0.801352
131,Macedonia,0.788792
193,Serbia,0.787705
75,Finland,0.782186
84,Georgia,0.765375


### If we focus on the most recent decade, we see that these countries are concentrated in Eastern Europe.