### Get the data

import needed libs

In [97]:
import numpy as np
import pandas as pd
import plotly as py

get the temperature data

In [98]:
df_temp = pd.read_csv('data/global_land_temp_by_country.csv')

df_temp.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


get summary

In [99]:
df_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 577462 entries, 0 to 577461
Data columns (total 4 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   dt                             577462 non-null  object 
 1   AverageTemperature             544811 non-null  float64
 2   AverageTemperatureUncertainty  545550 non-null  float64
 3   Country                        577462 non-null  object 
dtypes: float64(2), object(2)
memory usage: 17.6+ MB


### Data Cleaning

drop unused columns

In [100]:
df_temp.drop('AverageTemperatureUncertainty', axis=1, inplace=True)

df_temp

Unnamed: 0,dt,AverageTemperature,Country
0,1743-11-01,4.384,Åland
1,1743-12-01,,Åland
2,1744-01-01,,Åland
3,1744-02-01,,Åland
4,1744-03-01,,Åland
...,...,...,...
577457,2013-05-01,19.059,Zimbabwe
577458,2013-06-01,17.613,Zimbabwe
577459,2013-07-01,17.000,Zimbabwe
577460,2013-08-01,19.759,Zimbabwe


rename columns

In [101]:
df_temp = df_temp.rename(columns={'dt':'date'})
df_temp = df_temp.rename(columns={'AverageTemperature':'av_temp'})
df_temp = df_temp.rename(columns={'Country':'country'})

df_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 577462 entries, 0 to 577461
Data columns (total 3 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   date     577462 non-null  object 
 1   av_temp  544811 non-null  float64
 2   country  577462 non-null  object 
dtypes: float64(1), object(2)
memory usage: 13.2+ MB


checking null values

In [102]:
df_temp.isnull().sum()

date           0
av_temp    32651
country        0
dtype: int64

check percentage of missing values

In [103]:
df_temp.isna().mean().round(5) * 100 

date       0.000
av_temp    5.654
country    0.000
dtype: float64

drop missing values


In [104]:
df_temp = df_temp.dropna()
df_temp.isna().sum()

date       0
av_temp    0
country    0
dtype: int64

### Data Preprocessing

Group by `country` averaging the temperature over the years

In [105]:
df_countries = df_temp.groupby( ['country']).mean().reset_index()
df_countries

Unnamed: 0,country,av_temp
0,Afghanistan,14.045007
1,Africa,24.074203
2,Albania,12.610646
3,Algeria,22.985112
4,American Samoa,26.611965
...,...,...
237,Western Sahara,22.319818
238,Yemen,26.253597
239,Zambia,21.282956
240,Zimbabwe,21.117547


### Data Visualization

Load Plotly Components

In [106]:
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

Creating the visualization

In [107]:
fig = go.Figure(data=go.Choropleth(locations = df_countries['country'], 
                                   locationmode = 'country names', 
                                   z = df_countries['av_temp'], 
                                   colorscale = 'Reds', 
                                   marker_line_color = 'black', 
                                   marker_line_width = 0.5, ))

fig.update_layout( title_text = 'Average Temperature in Celcius by Country (1743 - 2013)', 
                  title_x = 0.5, 
                  geo=dict( 
                      showframe = False, 
                      showcoastlines = False, 
                      projection_type = 'equirectangular' ) 
                 ) 

fig.show()