@author: Valentin Larrieu

In [208]:
%matplotlib notebook
import math                         
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
sns.set(style='whitegrid')

# Flu dataset

In [209]:
df = pd.read_csv("Files/Openhealth_S-Grippal.csv", sep=';', encoding = "ISO-8859-1", decimal=",")

In [210]:
df.head()

Unnamed: 0,PERIODE,IAS_brut,IAS_lissé,Incidence_Sentinelles
0,2009-07-01,1.7,2.2,2.3
1,2009-07-02,3.4,2.1,2.3
2,2009-07-03,2.1,2.1,2.3
3,2009-07-04,1.3,2.0,2.3
4,2009-07-05,14.2,2.0,2.3


# Missing values

In [211]:
sum(df['IAS_brut']==0)

9

In [212]:
mask = df['IAS_brut']==0
list_null_index = df.loc[mask,'PERIODE'].values
df.loc[mask]

Unnamed: 0,PERIODE,IAS_brut,IAS_lissé,Incidence_Sentinelles
340,2010-06-06,0.0,0.8,1.4
378,2010-07-14,0.0,0.7,0.3
627,2011-03-20,0.0,2.4,3.3
732,2011-07-03,0.0,0.8,0.3
858,2011-11-06,0.0,2.6,3.1
1062,2012-05-28,0.0,0.9,1.3
1075,2012-06-10,0.0,1.6,0.6
1502,2013-08-11,0.0,0.5,1.0
3382,2018-10-04,0.0,3.0,1.6


In [213]:
df.loc['PERIODE' == list_null_index]

Unnamed: 0,PERIODE,IAS_brut,IAS_lissé,Incidence_Sentinelles


In [214]:
'PERIODE' == list_null_index
# to interpolate the missing values 
df.interpolate(method ='linear', limit_direction ='forward') 

#df['PERIODE']= df['PERIODE'].astype('datetime64', copy=False)
df['PERIODE'] = pd.to_datetime(df['PERIODE'],infer_datetime_format=True)

df.set_index('PERIODE',inplace=True)



In [215]:

df['IAS_brut'].replace(0,np.nan,inplace=True)


In [216]:

df['IAS_brut'].interpolate(method='time', inplace=True)
print(sum(df['IAS_brut'].isna()))

0


# Plot of Period

In [217]:
plt.figure(figsize=(7,5))
df['IAS_brut'].plot()
plt.title('IAS brut in fct of a period')
plt.ylabel('IAS brut')
plt.show()

<IPython.core.display.Javascript object>

# Histogram

In [218]:
plt.figure(figsize=(7,5))
mask_periode = df['IAS_brut']>1000
plt.hist(df.loc[mask_periode,'IAS_brut'],bins=50,ind)
plt.title('IAS brut in fct of a period')
plt.ylabel('IAS brut')
plt.show()

<IPython.core.display.Javascript object>

In [219]:
plt.figure(figsize=(7,5))
mask_periode = df['IAS_brut']>500
plt.hist(df['IAS_brut'],bins=100)
plt.title('IAS brut in fct of a period')
plt.ylabel('IAS brut')
plt.show()

<IPython.core.display.Javascript object>

In [220]:
#df['IAS_brut_log'] = np.log(df['IAS_brut'])
df['IAS_brut_log'] = df['IAS_brut'].apply(np.log)
plt.figure(figsize=(7,5))
#mask_periode = df['IAS_brut']>0
plt.hist(df['IAS_brut_log'],bins=50,color= sns.xkcd_rgb['deep blue'])
plt.title('IAS brut in fct of a period')
plt.ylabel('IAS brut')
plt.show()

<IPython.core.display.Javascript object>

In [221]:
plt.figure(figsize=(7,5))
df['IAS_brut_log'].plot()
plt.title('IAS brut in fct of a period')
plt.ylabel('IAS brut')
plt.show()

<IPython.core.display.Javascript object>

In [222]:
# Periogram of singal data

In [223]:
from scipy import signal
plt.figure(figsize=(7,5))
f, Pxx_den = signal.periodogram(df['IAS_brut_log'], fs=1,)
plt.semilogy(f[1:], Pxx_den[1:],color= sns.xkcd_rgb['cobalt blue'])
plt.xlabel('frequency [Hz]')
plt.ylabel('PSD [V**2/Hz]')
plt.show()

<IPython.core.display.Javascript object>

In [224]:
#df2 = df.copy()

In [225]:
df['IAS_brut_log_shift'] = df.IAS_brut_log.shift(365, axis=0)
df['IAS_brut_shift'] = df.IAS_brut.shift(365, axis=0)
#df2 = df2.IAS_brut_log.shift(365, axis=0)

In [226]:
#df['IAS_brut_log_shift'].dropna(inplace=True)
#df['IAS_brut_shift'].dropna(inplace=True)
df.dropna(inplace=True)

In [227]:
df

Unnamed: 0_level_0,IAS_brut,IAS_lissé,Incidence_Sentinelles,IAS_brut_log,IAS_brut_log_shift,IAS_brut_shift
PERIODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-07-01,1.00,0.6,1.3,0.000000,0.530628,1.7
2010-07-02,0.90,0.6,1.3,-0.105361,1.223775,3.4
2010-07-03,0.50,0.6,1.3,-0.693147,0.741937,2.1
2010-07-04,1.20,0.7,1.3,0.182322,0.262364,1.3
2010-07-05,0.60,0.7,0.6,-0.510826,2.653242,14.2
2010-07-06,0.70,0.7,0.6,-0.356675,0.336472,1.4
2010-07-07,0.90,0.7,0.6,-0.105361,0.788457,2.2
2010-07-08,0.60,0.7,0.6,-0.510826,0.530628,1.7
2010-07-09,0.60,0.8,0.6,-0.510826,0.832909,2.3
2010-07-10,1.00,0.8,0.6,0.000000,0.993252,2.7


In [237]:
plt.figure(figsize=(9,5))
f, Pxx_den = signal.periodogram(df['IAS_brut_log_shift'], fs=1,)
f2, Pxx_den2 = signal.periodogram(df['IAS_brut_log'], fs=1,)
plt.semilogy(f[1:], Pxx_den[1:],color= sns.xkcd_rgb['cobalt blue'],alpha=0.5)
plt.semilogy(f2[1:], Pxx_den2[1:],color= sns.xkcd_rgb['red'], alpha=0.5)
plt.xlabel('frequency [Hz]')
plt.ylabel('PSD [V**2/Hz]')
plt.show()

<IPython.core.display.Javascript object>

In [239]:
plt.figure(figsize=(9,5))
f, Pxx_den = signal.periodogram(df['IAS_brut_shift'], fs=1,)
f2, Pxx_den2 = signal.periodogram(df['IAS_brut'], fs=1,)
plt.semilogy(f[1:], Pxx_den[1:],color= sns.xkcd_rgb['cobalt blue'],alpha=0.5,label='shifted')
plt.semilogy(f2[1:], Pxx_den2[1:],color= sns.xkcd_rgb['red'], alpha=0.5,label='raw')
plt.xlabel('frequency [Hz]')
plt.ylabel('PSD [V**2/Hz]')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>