In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
%matplotlib notebook

In [4]:
nomi_stazioni = pd.read_csv("datasets/ECA_all_stations.txt", skiprows=17)   # to make this work, the ECAD_data.zip file must be expanded first
nomi_stazioni.rename(columns=lambda x: x.strip(), inplace=True)
nomi_stazioni.columns

Index([u'STAID', u'STANAME', u'CN', u'LAT', u'LON', u'HGHT'], dtype='object')

In [5]:
dati_stazioni = pd.read_csv("datasets/data60stations.txt")
dati_stazioni.rename(columns=lambda x: x.strip(), inplace=True)
dati_stazioni = dati_stazioni[dati_stazioni.Q_TG == 0]
dati_stazioni

Unnamed: 0,STAID,SOUID,DATE,TG,Q_TG
30,300,6707,18761231,-131,0
31,300,6707,18770101,-114,0
32,300,6707,18770102,-110,0
36,300,6707,18770106,-144,0
37,300,6707,18770107,-132,0
38,300,6707,18770108,-100,0
39,300,6707,18770109,-94,0
40,300,6707,18770110,-64,0
41,300,6707,18770111,-151,0
42,300,6707,18770112,-184,0


In [6]:
dati_stazioni.describe()

Unnamed: 0,STAID,SOUID,DATE,TG,Q_TG
count,3153394.0,3153394.0,3153394.0,3153394.0,3153394.0
mean,3137.936,18246.45,19337200.0,86.81303,0.0
std,4104.755,60017.26,557648.9,80.47712,0.0
min,10.0,30.0,17560100.0,-407.0,0.0
25%,116.0,830.0,18960330.0,29.0,0.0
50%,1686.0,5249.0,19371230.0,87.0,0.0
75%,4559.0,25220.0,19801000.0,148.0,0.0
max,19001.0,926214.0,20191030.0,350.0,0.0


In [7]:
dati_stazioni.columns

Index([u'STAID', u'SOUID', u'DATE', u'TG', u'Q_TG'], dtype='object')

In [8]:
df_merge = dati_stazioni.merge(nomi_stazioni, on="STAID")
df_merge.columns

Index([u'STAID', u'SOUID', u'DATE', u'TG', u'Q_TG', u'STANAME', u'CN', u'LAT',
       u'LON', u'HGHT'],
      dtype='object')

In [9]:
df_merge["DATE"] = pd.to_datetime(df_merge["DATE"], format="%Y%m%d")
df_merge

Unnamed: 0,STAID,SOUID,DATE,TG,Q_TG,STANAME,CN,LAT,LON,HGHT
0,300,6707,1876-12-31 00:00:00,-131,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
1,300,6707,1877-01-01 00:00:00,-114,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
2,300,6707,1877-01-02 00:00:00,-110,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
3,300,6707,1877-01-06 00:00:00,-144,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
4,300,6707,1877-01-07 00:00:00,-132,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
5,300,6707,1877-01-08 00:00:00,-100,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
6,300,6707,1877-01-09 00:00:00,-94,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
7,300,6707,1877-01-10 00:00:00,-64,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
8,300,6707,1877-01-11 00:00:00,-151,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39
9,300,6707,1877-01-12 00:00:00,-184,0,ILULISSAT (JAKOBSHAVN),GL,+69:13:00,-051:03:00,39


In [10]:
def to_lat_lon(v):
    g, m, s = v.split(':')
    return float(g) + int(m)/60.0 + int(s)/3600.0

df_merge["LAT"] = df_merge["LAT"].apply(to_lat_lon)
df_merge["LON"] = df_merge["LON"].apply(to_lat_lon)
df_merge

Unnamed: 0,STAID,SOUID,DATE,TG,Q_TG,STANAME,CN,LAT,LON,HGHT
0,300,6707,1876-12-31 00:00:00,-131,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
1,300,6707,1877-01-01 00:00:00,-114,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
2,300,6707,1877-01-02 00:00:00,-110,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
3,300,6707,1877-01-06 00:00:00,-144,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
4,300,6707,1877-01-07 00:00:00,-132,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
5,300,6707,1877-01-08 00:00:00,-100,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
6,300,6707,1877-01-09 00:00:00,-94,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
7,300,6707,1877-01-10 00:00:00,-64,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
8,300,6707,1877-01-11 00:00:00,-151,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39
9,300,6707,1877-01-12 00:00:00,-184,0,ILULISSAT (JAKOBSHAVN),GL,69.216667,-50.950000,39


In [11]:
df_it = df_merge[df_merge['CN'] == 'IT']

In [12]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [13]:
for citta in df_it.STANAME.unique():
    plt.plot(df_merge[df_merge.STANAME == citta].DATE, df_merge[df_merge.STANAME == citta].TG, label=citta)
#plt.legend()
plt.show()

<IPython.core.display.Javascript object>

## Analisi di Fourier dei dati di temperatura

Approfittando del package `scipy.fftpack`, possiamo provare ad eseguire un'analisi di Fourier dell'andamento delle temperature. Chiaramente questo va ben oltre un'attivita' scolastica...

Riferimento: https://ipython-books.github.io/101-analyzing-the-frequency-components-of-a-signal-with-a-fast-fourier-transform/

In [14]:
import numpy as np
import scipy as sp
import scipy.fftpack

In [15]:
fft = sp.fftpack.fft(df_merge["TG"])
print len(fft)
powerspectrum = np.abs(fft) ** 2
fftfreq = sp.fftpack.fftfreq(len(powerspectrum), 1. / 365)
powerspectrum

3153394


array([7.49421696e+16, 2.82452241e+13, 5.07876351e+14, ...,
       3.54891083e+14, 5.07876351e+14, 2.82452241e+13])

In [16]:
posfreq = fftfreq > 0
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.plot(fftfreq[posfreq], 10 * np.log10(powerspectrum[posfreq]))   # x = freq positive, y = W in dB
ax.set_xlim(0, 2)
ax.set_xlabel('Frequency (1/year)')
ax.set_ylabel('Power Spectrum (dB)')

<IPython.core.display.Javascript object>

Text(0,0.5,'Power Spectrum (dB)')