In [2]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np # linear algebra
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [3]:
glob_coun = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
glob_coun.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [4]:
glob_coun.describe()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty
count,544811.0,545550.0
mean,17.193354,1.019057
std,10.953966,1.20193
min,-37.658,0.052
25%,10.025,0.323
50%,20.901,0.571
75%,25.814,1.206
max,38.842,15.003


As the describe showed, 'Average Temperature' column is missing some data. While it is possible to fill in the missing ones, it is improbable. The data is too large for it, and if I just selected a constant data, it would make no sense. For example, if I set the temperature to 17 as the mean data, it will look funny during January, or February. 

So for this project, I will divide the data into 2 categorizes:
    1. The average temperature per year per country
    2. The average temperature per month per year per country

# Average Temperature per Year

In [5]:
glob_coun.dt = pd.to_datetime(glob_coun.dt).dt.strftime('%d/%m/%Y')
glob_coun['dt']=glob_coun['dt'].apply(lambda x:x[6:])
glob_coun=glob_coun.groupby(['dt', 'Country'])['AverageTemperature'].mean().reset_index()
glob_coun.head()

Unnamed: 0,dt,Country,AverageTemperature
0,1743,Albania,8.62
1,1743,Andorra,7.556
2,1743,Austria,2.482
3,1743,Belarus,0.767
4,1743,Belgium,7.106


In [6]:
glob_coun.groupby('Country').count() 

Unnamed: 0_level_0,dt,AverageTemperature
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,176,175
Africa,164,159
Albania,271,267
Algeria,227,227
American Samoa,147,139
Andorra,271,267
Angola,157,147
Anguilla,190,175
Antarctica,64,0
Antigua And Barbuda,190,175


As we can see here, there is 243 countries. We should all know that countries like France, Denmark, and UK belong to Europe, so no need to clarify on that. 

In [7]:
glob_coun = glob_coun[~glob_coun['Country'].isin(
    ['Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
     'United Kingdom', 'Africa', 'South America'])]

glob_coun = glob_coun.replace(
   ['Denmark (Europe)', 'France (Europe)', 'Netherlands (Europe)', 'United Kingdom (Europe)'],
   ['Denmark', 'France', 'Netherlands', 'United Kingdom'])

glob_coun.head()

Unnamed: 0,dt,Country,AverageTemperature
0,1743,Albania,8.62
1,1743,Andorra,7.556
2,1743,Austria,2.482
3,1743,Belarus,0.767
4,1743,Belgium,7.106


I will try to make a world map, so we can see the temperature from each country. I will compare between 1900 and 2013. Why 1900? Because it was after the Industrial Revolution preiod. One of the most polluted period in human history. I want to see if those pollutions had any impact on the earth

In [61]:
countries = np.unique(glob_coun['Country'])

In [106]:
glob_coun.AverageTemperature[glob_coun['dt']== '1920'].head(100)

25487    13.036500
25489    12.697500
25490    23.094333
25491    26.350333
25492    11.419583
25493    21.783833
25494    26.415000
25495    26.139750
25496    14.611333
25497     7.020583
25498    27.625083
25499     7.263667
25500    21.324417
25501     6.830833
25502     9.594083
25503    24.880000
25504    25.633583
25505    25.185500
25506    24.890917
25507    26.040750
25508     6.400667
25509     9.834500
25510    24.884833
25511    27.074500
25512    11.806417
25513    20.524167
25514    26.868583
25515    10.766500
25516    21.943917
25517    24.409917
           ...    
25560    24.504833
25561     3.017500
25563    10.747750
25564    25.372167
25565    25.259000
25566    24.136167
25567    27.337167
25568    18.551583
25569     6.639583
25570     8.615667
25571    26.835667
25572    14.447583
25573   -19.683750
25574    26.346667
25575    25.980333
25576    26.565917
25577    23.007750
25578    11.198583
25579    25.381250
25580    26.567583
25581    25.626083
25582    26.

### 1900

In [108]:
data = [ dict(
        type = 'choropleth',
        locations = countries,
        z = glob_coun.AverageTemperature[glob_coun['dt']== '1900'],
        locationmode = 'country names',
        text = countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(autotick = True, tickprefix = '', 
            title = '# Average\nTemperature,\n°C')
            )
       ]

layout = dict(
    title = 'Average land temperature in 1920',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'white',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')

### 2013

In [86]:
data = [ dict(
        type = 'choropleth',
        locations = countries,
        z = glob_coun.AverageTemperature[glob_coun['dt']== '2013'],
        locationmode = 'country names',
        text = countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(autotick = True, tickprefix = '', 
            title = '# Average\nTemperature,\n°C')
            )
       ]

layout = dict(
    title = 'Average land temperature in 2013',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'white',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')