In [140]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np # linear algebra
import plotly.plotly as py
from plotly.graph_objs import *

In [129]:
glob_coun = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
glob_coun.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [130]:
glob_coun.describe()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty
count,544811.0,545550.0
mean,17.193354,1.019057
std,10.953966,1.20193
min,-37.658,0.052
25%,10.025,0.323
50%,20.901,0.571
75%,25.814,1.206
max,38.842,15.003


As the describe showed, 'Average Temperature' column is missing some data. While it is possible to fill in the missing ones, it is improbable. The data is too large for it, and if I just selected a constant data, it would make no sense. For example, if I set the temperature to 17 as the mean data, it will look funny during January, or February. 

So for this project, I will divide the data into 2 categorizes:
    1. The average temperature per year per country
    2. The average temperature per month per year per country

# Average Temperature per Year

In [131]:
glob_coun.dt = pd.to_datetime(glob_coun.dt).dt.strftime('%d/%m/%Y')
glob_coun['dt']=glob_coun['dt'].apply(lambda x:x[6:])
glob_coun=glob_coun.groupby(['dt', 'Country'])['AverageTemperature'].mean().reset_index()
glob_coun.head()

Unnamed: 0,dt,Country,AverageTemperature
0,1743,Albania,8.62
1,1743,Andorra,7.556
2,1743,Austria,2.482
3,1743,Belarus,0.767
4,1743,Belgium,7.106


In [132]:
len(glob_coun.groupby('Country').count())

243

As we can see here, there is 235 countries. We should all know that countries like France, Denmark, and UK belong to Europe, so no need to clarify on that. 

In [133]:
glob_coun = glob_coun[~glob_coun['Country'].isin(
    ['Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
     'United Kingdom', 'Africa', 'South America'])]

glob_coun = glob_coun.replace(
   ['Denmark (Europe)', 'France (Europe)', 'Netherlands (Europe)', 'United Kingdom (Europe)'],
   ['Denmark', 'France', 'Netherlands', 'United Kingdom'])

glob_coun.head()

Unnamed: 0,dt,Country,AverageTemperature
0,1743,Albania,8.62
1,1743,Andorra,7.556
2,1743,Austria,2.482
3,1743,Belarus,0.767
4,1743,Belgium,7.106


I will try to make a world map, so we can see the temperature from each country. I will compare between 1900 and 2013. Why 1900? Because it was after the Industrial Revolution preiod. One of the most polluted period in human history. I want to see if those pollutions had any impact on the earth

In [134]:
countries2013 = np.unique(glob_coun.Country[glob_coun['dt']== '2013'])
countries1920 = np.unique(glob_coun.Country[glob_coun['dt']== '1920'])

### 1900

In [144]:
data = [ dict(
        type = 'choropleth',
        locations = countries1920,
        z = glob_coun.AverageTemperature[glob_coun['dt']== '1920'],
        locationmode = 'country names',
        text = countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(autotick = True, tickprefix = '', 
            title = '# Average<br>Temperature,<br>°C')
            )
       ]

layout = dict(
    title = 'Average land temperature in 1920',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'white',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')

### 2013

In [145]:
data = [ dict(
        type = 'choropleth',
        locations = countries2013,
        z = glob_coun.AverageTemperature[glob_coun['dt']== '2013'],
        locationmode = 'country names',
        text = countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(autotick = True, tickprefix = '', 
            title = '# Average<br>Temperature,<br>°C')
            )
       ]

layout = dict(
    title = 'Average land temperature in 2013',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'white',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')