## Has climate change been hapenning?

## Data visualizations to show how the Earth’s surface temperatures have changed over time 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import seaborn as sns
import time
import warnings
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByCity.csv
/kaggle/input/climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByCountry.csv
/kaggle/input/climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByState.csv
/kaggle/input/climate-change-earth-surface-temperature-data/GlobalTemperatures.csv
/kaggle/input/climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByMajorCity.csv


In [2]:
global_temp_country = pd.read_csv('../input/climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByCountry.csv')
print(global_temp_country.shape)
global_temp_country.head(10)

(577462, 4)


Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland
9,1744-08-01,,,Åland


## Data Cleaning

In [3]:
# Rename the columns 
global_temp_country.rename(columns={'dt':'Date', 'AverageTemperature':'AvgTem',
                                    'AverageTemperatureUncertainty':'Uncertainty'}, inplace=True)
global_temp_country.head()

Unnamed: 0,Date,AvgTem,Uncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [4]:
# Explore what is in columns
global_temp_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecu

In [5]:
# As an example see how many column exist per country
global_temp_country[global_temp_country['Country']== 'Canada']

Unnamed: 0,Date,AvgTem,Uncertainty,Country
97255,1768-09-01,5.257,3.107,Canada
97256,1768-10-01,-3.393,2.981,Canada
97257,1768-11-01,-12.829,3.967,Canada
97258,1768-12-01,-20.582,4.622,Canada
97259,1769-01-01,-24.756,4.722,Canada
...,...,...,...,...
100191,2013-05-01,2.926,0.247,Canada
100192,2013-06-01,10.800,0.298,Canada
100193,2013-07-01,13.333,0.354,Canada
100194,2013-08-01,12.632,0.264,Canada


In [6]:
# Group by the countries
global_temp_country[['Country', 'Date']].groupby('Country').count()

Unnamed: 0_level_0,Date
Country,Unnamed: 1_level_1
Afghanistan,2106
Africa,1965
Albania,3239
Algeria,2721
American Samoa,1761
...,...
Western Sahara,2721
Yemen,1653
Zambia,1965
Zimbabwe,1965


#### **The dataset contains a list of reported temperatures from 18th century until 2013 for 243 countries, but there are some duplicates in the list of countries, which need to be replaced.**

In [7]:
#Let's remove the duplicated countries and countries for which no information about the temperature

global_temp_country_clear = global_temp_country[~global_temp_country['Country'].isin(
    ['Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
     'United Kingdom', 'Africa', 'South America'])]

global_temp_country_clear = global_temp_country_clear.replace(
   ['Denmark (Europe)', 'France (Europe)', 'Netherlands (Europe)', 'United Kingdom (Europe)'],
   ['Denmark', 'France', 'Netherlands', 'United Kingdom'])

global_temp_country_clear.dropna(axis=0, how='any', inplace=True)

print('The dataframe had {} rows and columns before cleaning and now it has {} rows and columns, respectively.'.format(global_temp_country.shape, global_temp_country_clear.shape))

global_temp_country_clear.head(10)

The dataframe had (577462, 4) rows and columns before cleaning and now it has (526013, 4) rows and columns, respectively.


Unnamed: 0,Date,AvgTem,Uncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland
10,1744-09-01,11.702,1.517,Åland
11,1744-10-01,5.477,1.862,Åland
12,1744-11-01,3.407,1.425,Åland
13,1744-12-01,-2.181,1.641,Åland
14,1745-01-01,-3.85,1.841,Åland


In [8]:
# Run the same code again to check if the changes were applied: Group by the countries
global_temp_country[['Country', 'Date']].groupby('Country').count()

Unnamed: 0_level_0,Date
Country,Unnamed: 1_level_1
Afghanistan,2106
Africa,1965
Albania,3239
Algeria,2721
American Samoa,1761
...,...
Western Sahara,2721
Yemen,1653
Zambia,1965
Zimbabwe,1965


# The Average Temperature for each country over time

In [9]:
global_temp_country_avg = global_temp_country_clear[['Country', 'AvgTem']].groupby('Country').mean().reset_index()
global_temp_country_avg.head(10)

Unnamed: 0,Country,AvgTem
0,Afghanistan,14.045007
1,Albania,12.610646
2,Algeria,22.985112
3,American Samoa,26.611965
4,Andorra,11.201553
5,Angola,21.824549
6,Anguilla,26.610492
7,Antigua And Barbuda,26.437924
8,Argentina,14.621194
9,Armenia,8.551648


In [10]:
#Create a list of countries and average temperature to be used for ploting

List_Avg_Tem = global_temp_country_avg['AvgTem'].to_list()
List_Countries = global_temp_country_avg['Country'].to_list()


## Mapping of average temperatures in the countries

In [11]:
# Plotting World countries' Temperature 

data = [ dict(
        type = 'choropleth',
        locations = List_Countries,
        z = List_Avg_Tem,
        locationmode = 'country names',
        text = List_Countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(autotick = True, tickprefix = '', 
            title = '# Average\nTemperature,\n°C')
            )
       ]

layout = dict(
    title = 'Average land temperature in countries',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'rgb(0,255,255)',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')


## Exploratory Data Analysis

In [12]:
global_temp = pd.read_csv('../input/climate-change-earth-surface-temperature-data/GlobalTemperatures.csv')
print(global_temp.shape)
global_temp.head(10)

(3192, 9)


Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,
5,1750-06-01,12.937,1.724,,,,,,
6,1750-07-01,15.868,1.911,,,,,,
7,1750-08-01,14.75,2.231,,,,,,
8,1750-09-01,11.413,2.637,,,,,,
9,1750-10-01,6.367,2.668,,,,,,


In [14]:
years = np.unique(global_temp['dt'].apply(lambda x: x[:4]))
years

array(['1750', '1751', '1752', '1753', '1754', '1755', '1756', '1757',
       '1758', '1759', '1760', '1761', '1762', '1763', '1764', '1765',
       '1766', '1767', '1768', '1769', '1770', '1771', '1772', '1773',
       '1774', '1775', '1776', '1777', '1778', '1779', '1780', '1781',
       '1782', '1783', '1784', '1785', '1786', '1787', '1788', '1789',
       '1790', '1791', '1792', '1793', '1794', '1795', '1796', '1797',
       '1798', '1799', '1800', '1801', '1802', '1803', '1804', '1805',
       '1806', '1807', '1808', '1809', '1810', '1811', '1812', '1813',
       '1814', '1815', '1816', '1817', '1818', '1819', '1820', '1821',
       '1822', '1823', '1824', '1825', '1826', '1827', '1828', '1829',
       '1830', '1831', '1832', '1833', '1834', '1835', '1836', '1837',
       '1838', '1839', '1840', '1841', '1842', '1843', '1844', '1845',
       '1846', '1847', '1848', '1849', '1850', '1851', '1852', '1853',
       '1854', '1855', '1856', '1857', '1858', '1859', '1860', '1861',
      

In [None]:
mean_temp_world = []
mean_temp_world_uncertainty = []

for year in years:
    mean_temp_world.append(global_temp[global_temp['dt'].apply(
        lambda x: x[:4]) == year]['LandAverageTemperature'].mean())
    mean_temp_world_uncertainty.append(global_temp[global_temp['dt'].apply(
                lambda x: x[:4]) == year]['LandAverageTemperatureUncertainty'].mean())

trace0 = go.Scatter(
    x = years, 
    y = np.array(mean_temp_world) + np.array(mean_temp_world_uncertainty),
    fill= None,
    mode='lines',
    name='Uncertainty top',
    line=dict(
        color='rgb(0, 255, 255)',
    )
)
trace1 = go.Scatter(
    x = years, 
    y = np.array(mean_temp_world) - np.array(mean_temp_world_uncertainty),
    fill='tonexty',
    mode='lines',
    name='Uncertainty bot',
    line=dict(
        color='rgb(0, 255, 255)',
    )
)

trace2 = go.Scatter(
    x = years, 
    y = mean_temp_world,
    name='Average Temperature',
    line=dict(
        color='rgb(199, 121, 093)',
    )
)
data = [trace0, trace1, trace2]

layout = go.Layout(
    xaxis=dict(title='year'),
    yaxis=dict(title='Average Temperature, °C'),
    title='Average land temperature in world',
    showlegend = False)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)