# Basic example of data management and vectorization
Dataset: [climate-change-earth-surface-temperature-data](https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data?select=GlobalLandTemperaturesByMajorCity.csv)

In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
from datetime import datetime

In [3]:
import geopandas

In [6]:
D = pd.read_csv('https://island.ricerca.di.unimi.it/~alfio/shared/GlobalLandTemperaturesByMajorCity.csv')
D.shape

(239177, 7)

In [5]:
D.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W


In [None]:
dtypes = {
    'dt': lambda x: datetime.strptime(x, '%Y-%m-%d'),
    'Latitude': lambda x: float(x[:-1]) if x[-1] == 'N' else -1*float(x[:-1]),
    'Longitude': lambda x: float(x[:-1]) if x[-1] == 'E' else -1*float(x[:-1]),
}

In [None]:
D = pd.read_csv('https://island.ricerca.di.unimi.it/~alfio/shared/GlobalLandTemperaturesByMajorCity.csv', 
               converters=dtypes)
type(D.dt.values[0])

In [None]:
dates = [x.astype('M8[D]').astype('O') for x in D.dt.values]
D['year'] = [x.year for x in dates]
D['month'] = [x.month for x in dates]
D['day'] = [x.day for x in dates]

In [None]:
D.head()

In [None]:
E = D[D.dt == datetime(1975, 1, 1)].copy()
E.dropna(inplace=True)

In [None]:
E.shape

In [None]:
import matplotlib.pyplot as plt

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

In [None]:
fig, ax = plt.subplots(figsize=(14, 16), nrows=2)
world.boundary.plot(ax=ax[0], linewidth=0.8, color='#cccccc')
ax[0].scatter(E.Longitude, E.Latitude, c=E.AverageTemperature)
ax[1].scatter(E.Longitude, E.Latitude, c=E.AverageTemperature)
plt.tight_layout()
plt.show()

## Other internal relations

In [None]:
T = D[(D.City=='Abidjan') & (D.dt > datetime(1900, 1, 1))]
time = T.dt.values
temp = T.AverageTemperature.values
inc = T.AverageTemperatureUncertainty.values

In [None]:
fig, ax = plt.subplots(figsize=(14, 6), nrows=2)
ax[0].plot(time, temp, linewidth=0.8, c='#990000')
ax[0].plot(time, temp + inc, linewidth=0.8, c='#cccccc')
ax[0].plot(time, temp - inc, linewidth=0.8, c='#cccccc')
ax[1].plot(time, temp, linewidth=0.8, c='#990000')
ax[1].plot(time, temp + inc, linewidth=0.8, c='#cccccc')
ax[1].plot(time, temp - inc, linewidth=0.8, c='#cccccc')
ax[1].set_ylim(10, 35)
plt.tight_layout()
plt.show()

In [None]:
T = D[(D.City=='Abidjan') & (D.dt > datetime(1900, 1, 1)) & (D.month == 1)]
time = T.dt.values
temp = T.AverageTemperature.values
inc = T.AverageTemperatureUncertainty.values

In [None]:
fig, ax = plt.subplots(figsize=(14, 6), nrows=2)
ax[0].plot(time, temp, linewidth=0.8, c='#990000')
ax[0].plot(time, temp + inc, linewidth=0.8, c='#cccccc')
ax[0].plot(time, temp - inc, linewidth=0.8, c='#cccccc')
ax[1].plot(time, temp, linewidth=0.8, c='#990000')
ax[1].plot(time, temp + inc, linewidth=0.8, c='#cccccc')
ax[1].plot(time, temp - inc, linewidth=0.8, c='#cccccc')
ax[1].set_ylim(10, 35)
plt.tight_layout()
plt.show()