# Temperature data per country

In [1]:
import numpy as np
import pickle
import pandas as pd
import os
from sklearn.preprocessing import scale

# Using CRU dataset

In [2]:
c = pd.read_csv('utils/countries_wb.csv', dtype=str, delimiter=';', header=None)
countries = list(c[0])
countries

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas, The',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo, Dem. Rep.',
 'Congo, Rep.',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czech Republic',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt, Arab Rep.',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia, The',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Greenland',
 'Grenada',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Honduras',
 '

In [3]:
countries_monthly = {}

for country in countries:
    for file in os.listdir('utils/data/temp_country/'):
        if country in file:
            countries_monthly[country] = pd.read_csv('utils/data/temp_country/{}'.format(file), header=3, delimiter='    ', engine='python')

In [4]:
# we do not have temperature data for these countries

not_included = []

for country in countries:
    if country not in list(countries_monthly.keys()):
        not_included.append(country)

In [5]:
print(len(not_included))
not_included

43


['Antigua and Barbuda',
 'Bahamas, The',
 'Bosnia and Herzegovina',
 'Brunei Darussalam',
 'Burkina Faso',
 'Central African Republic',
 'Congo, Dem. Rep.',
 'Congo, Rep.',
 'Costa Rica',
 "Cote d'Ivoire",
 'Czech Republic',
 'Dominican Republic',
 'Egypt, Arab Rep.',
 'El Salvador',
 'Equatorial Guinea',
 'Gambia, The',
 'Iran, Islamic Rep.',
 "Korea, Dem. People's Rep.",
 'Korea, Rep.',
 'Kyrgyz Republic',
 'Lao PDR',
 'Micronesia, Fed. Sts.',
 'New Zealand',
 'Papua New Guinea',
 'Puerto Rico',
 'Russian Federation',
 'Sao Tome and Principe',
 'Saudi Arabia',
 'Sierra Leone',
 'Slovak Republic',
 'Solomon Islands',
 'South Africa',
 'South Sudan',
 'Sri Lanka',
 'Syrian Arab Republic',
 'Timor-Leste',
 'Trinidad and Tobago',
 'United Arab Emirates',
 'United Kingdom',
 'United States',
 'Vanuatu',
 'Venezuela, RB',
 'Yemen, Rep.']

In [6]:
# manually matching these World Bank country names with the CRU country names
countries_monthly['Antigua and Barbuda'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Antigua_and_Barbuda.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Bahamas, The'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Bahamas.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Bosnia and Herzegovina'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Bosnia-Herzegovinia.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Brunei Darussalam'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Brunei.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Burkina Faso'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Burkina_Faso.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Central African Republic'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Central_African_Rep.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Congo, Dem. Rep.'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.DR_Congo.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Congo, Rep.'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Congo.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Costa Rica'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Costa_Rica.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly["Cote d'Ivoire"] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Ivory_Coast.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Czech Republic'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Czech_Republic.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Dominican Republic'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Dominican_Republic.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Egypt, Arab Rep.'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Egypt.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['El Salvador'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.El_Salvador.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Equatorial Guinea'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Equatorial_Guinea.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Gambia, The'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Gambia.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Iran, Islamic Rep.'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Iran.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly["Korea, Dem. People's Rep."] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.North_Korea.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Korea, Rep.'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.South_Korea.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Kyrgyz Republic'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Kyrgyzstan.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Lao PDR'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Laos.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['New Zealand'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.New_Zealand.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Papua New Guinea'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Papua_New_Guinea.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Puerto Rico'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Puerto_Rica.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Russian Federation'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Russia.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Sao Tome and Principe'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Sao_Tome_+_Principe.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Saudi Arabia'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Saudi_Arabia.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Sierra Leone'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Sierra_Leone.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Slovak Republic'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Slovakia.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Solomon Islands'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Solomon_Isl.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['South Africa'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.South_Africa.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['South Sudan'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.South_Sudan.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Sri Lanka'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Sri_Lanka.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Syrian Arab Republic'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Syria.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Timor-Leste'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.East_Timor.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Trinidad and Tobago'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Trinidad_and_Tobago.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['United Arab Emirates'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.United_Arab_Emirates.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['United Kingdom'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.United_Kingdom.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['United States'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.USA.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Vanuatu'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Vanatu.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Venezuela, RB'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Venezuela.tmp.per', header=3, delimiter='    ', engine='python')
countries_monthly['Yemen, Rep.'] = pd.read_csv('utils/data/temp_country/crucy.v4.04.1901.2019.Yemen.tmp.per', header=3, delimiter='    ', engine='python')

In [7]:
for country, df in countries_monthly.items():
    df.columns = df.columns.str.replace(' ', '')
    #countries_monthly[country].drop(labels=['MAM', 'JJA', 'SON', 'DJF', 'ANN'], axis=1, inplace=True)
    months = df.loc[:, 'JAN':'DEC']
    countries_monthly[country]['AVG'] = months.mean(axis=1)

In [8]:
# check
countries_monthly['Italy']

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,MAM,JJA,SON,DJF,ANN,AVG
0,1901,3.8,2.7,8.0,11.8,14.6,19.9,22.0,21.6,19.1,14.5,8.8,7.2,11.5,21.2,14.1,6.8,12.8,12.833333
1,1902,6.1,7.2,8.4,12.7,12.4,17.9,22.2,21.9,19.1,14.2,8.9,5.7,11.2,20.7,14.1,5.9,13.1,13.058333
2,1903,5.2,6.8,9.1,9.1,15.2,17.7,21.0,21.7,19.0,15.0,9.8,7.0,11.1,20.1,14.6,6.6,13.0,13.050000
3,1904,5.4,7.3,8.7,12.3,16.1,20.2,23.4,22.2,17.4,14.1,8.4,6.4,12.4,21.9,13.3,4.6,13.5,13.491667
4,1905,2.9,4.6,8.7,11.3,14.4,19.0,23.3,22.2,19.8,11.6,10.4,6.8,11.5,21.5,13.9,5.6,12.9,12.916667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2015,7.0,6.6,9.6,12.4,17.2,21.1,25.4,24.3,20.0,15.6,12.0,8.5,13.1,23.6,15.9,8.4,15.0,14.975000
115,2016,7.3,9.2,9.4,13.7,16.0,20.5,23.6,23.1,20.5,15.5,11.6,7.8,13.0,22.4,15.9,6.9,14.9,14.850000
116,2017,4.3,8.6,11.1,12.5,17.2,22.4,24.1,24.8,18.7,15.5,10.4,6.5,13.6,23.8,14.9,7.0,14.7,14.675000
117,2018,8.4,6.0,9.1,14.5,17.6,21.1,24.1,24.3,21.1,16.7,12.1,7.9,13.8,23.1,16.7,7.0,15.3,15.241667


In [9]:
# better save these precious data
t = open('utils/data/temp.pkl', 'wb')
pickle.dump(countries_monthly, t)
t.close()

## *old:* Data until 2016

Monthly temperature data for countries comes from the [World Bank](https://climateknowledgeportal.worldbank.org/download-data)

In [None]:
temp = pd.read_csv('utils/data/temp.csv', sep=', ', usecols=range(4), dtype=object)
temp.tail()

In [None]:
countries = []
c = temp.Country.unique()

for country in c:
    countries.append(country.strip())

In [None]:
temp_dict = {}
t_dict = {}
t_dict_std = {}

for country in countries:
    temp_dict[country] = temp[temp.Country == country]
    temp_dict[country].drop(columns=['Statistics', 'Country'], inplace=True)
    t_dict[country] = temp_dict[country].astype(float).groupby('Year')['Temperature - (Celsius)'].mean()
    t_dict[country].rename('Temperature', inplace=True)
    t_dict_std[country] = pd.Series(index=t_dict[country].index, data=scale(list(t_dict[country])))
    t_dict_std[country].rename('Temperature', inplace=True)

In [None]:
# check
t_dict_std['Albania']