### Import des données

In [359]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [360]:
df_corkstation = pd.read_csv("Corkstation_15072019_01022011.csv")
df_corkstation.head()

Unnamed: 0,date,ind,rain,ind.1,temp,ind.2,wetb,dewpt,vappr,rhum,...,ind.3,wdsp,ind.4,wddir,ww,w,sun,vis,clht,clamt
0,2009-07-15 00:00:00,3,0.0,0,11.6,0,11.4,11.3,13.3,98,...,2,6,2,310,2,81,0.0,25000,999,1
1,2009-07-15 01:00:00,3,0.0,0,11.4,0,11.1,10.8,13.0,96,...,2,6,2,310,2,11,0.0,30000,999,1
2,2009-07-15 02:00:00,3,0.1,0,11.2,0,10.9,10.6,12.8,96,...,2,7,2,290,2,11,0.0,30000,999,1
3,2009-07-15 03:00:00,3,0.0,0,11.1,0,10.8,10.5,12.7,96,...,2,8,2,300,2,11,0.0,30000,999,1
4,2009-07-15 04:00:00,3,0.0,0,11.4,0,11.1,10.9,13.0,96,...,2,7,2,310,2,11,0.0,30000,999,1


In [361]:
df_vst = pd.read_csv("df_vst_group6.csv")
df_vst.head()

Unnamed: 0,time,mean,temp
0,2009-07-15 00:00:00,0.341659,11.6
1,2009-07-15 00:30:00,0.274463,11.5
2,2009-07-15 01:00:00,0.231365,11.4
3,2009-07-15 01:30:00,0.202708,11.3
4,2009-07-15 02:00:00,0.182256,11.2


### Prétraitement


Deleting last data from df_corkstation to match the consumption data

In [362]:
df_corkstation_cropped = df_corkstation.drop(df_corkstation.index[12432:])
df_corkstation_cropped

Unnamed: 0,date,ind,rain,ind.1,temp,ind.2,wetb,dewpt,vappr,rhum,...,ind.3,wdsp,ind.4,wddir,ww,w,sun,vis,clht,clamt
0,2009-07-15 00:00:00,3,0.0,0,11.6,0,11.4,11.3,13.3,98,...,2,6,2,310,2,81,0.0,25000,999,1
1,2009-07-15 01:00:00,3,0.0,0,11.4,0,11.1,10.8,13.0,96,...,2,6,2,310,2,11,0.0,30000,999,1
2,2009-07-15 02:00:00,3,0.1,0,11.2,0,10.9,10.6,12.8,96,...,2,7,2,290,2,11,0.0,30000,999,1
3,2009-07-15 03:00:00,3,0.0,0,11.1,0,10.8,10.5,12.7,96,...,2,8,2,300,2,11,0.0,30000,999,1
4,2009-07-15 04:00:00,3,0.0,0,11.4,0,11.1,10.9,13.0,96,...,2,7,2,310,2,11,0.0,30000,999,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12427,2010-12-14 19:00:00,3,0.0,0,2.0,0,1.1,-0.4,5.9,84,...,2,6,2,320,2,11,0.0,20000,999,1
12428,2010-12-14 20:00:00,3,0.0,0,1.0,0,0.4,-0.7,5.8,88,...,2,6,2,310,10,0,0.0,10000,999,1
12429,2010-12-14 21:00:00,3,0.0,0,0.1,1,-0.3,-1.1,5.7,92,...,2,6,2,310,10,0,0.0,8000,999,0
12430,2010-12-14 22:00:00,3,0.0,1,-0.1,1,-0.4,-1.0,5.7,94,...,2,4,2,330,10,0,0.0,8000,999,0


In [363]:
df_vst_cropped = df_vst.drop(df_vst.index[24863:])
df_vst_cropped

Unnamed: 0,time,mean,temp
0,2009-07-15 00:00:00,0.341659,11.60
1,2009-07-15 00:30:00,0.274463,11.50
2,2009-07-15 01:00:00,0.231365,11.40
3,2009-07-15 01:30:00,0.202708,11.30
4,2009-07-15 02:00:00,0.182256,11.20
...,...,...,...
24858,2010-12-14 21:00:00,0.956582,0.10
24859,2010-12-14 21:30:00,0.890195,0.00
24860,2010-12-14 22:00:00,0.849046,-0.10
24861,2010-12-14 22:30:00,0.779234,0.05


Excluding incomplete data from df_corkstation

In [364]:
# Changing index to date column
df_corkstation_cropped['date'] = pd.to_datetime(df_corkstation_cropped['date'])

# Interpolating missing data
df_corkstation_cropped = df_corkstation_cropped.replace({' ': '-1'}, regex=True)
df_corkstation_cropped['wetb'] = pd.to_numeric(df_corkstation_cropped['wetb'])
df_corkstation_cropped['vis'] = pd.to_numeric(df_corkstation_cropped['vis'])
df_corkstation_cropped = df_corkstation_cropped.replace({-1: np.nan}, regex=True)

# Interpolating over missing data
df_corkstation_cropped = df_corkstation_cropped.interpolate()

Resample and interpolating

In [365]:
# Changing index to date column
df_corkstation_cropped['date'] = pd.to_datetime(df_corkstation_cropped['date'])
df_corkstation_cropped.set_index('date', inplace=True)

# Resample for each 30 min and interpolating
df_corkstation_resampled = df_corkstation_cropped.resample('30T').interpolate(method='linear')

df_corkstation_resampled

Unnamed: 0_level_0,ind,rain,ind.1,temp,ind.2,wetb,dewpt,vappr,rhum,msl,ind.3,wdsp,ind.4,wddir,ww,w,sun,vis,clht,clamt
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2009-07-15 00:00:00,3.0,0.00,0.0,11.60,0.0,11.40,11.30,13.30,98.0,1002.80,2.0,6.0,2.0,310.0,2.0,81.0,0.0,25000.0,999.0,1.0
2009-07-15 00:30:00,3.0,0.00,0.0,11.50,0.0,11.25,11.05,13.15,97.0,1003.15,2.0,6.0,2.0,310.0,2.0,46.0,0.0,27500.0,999.0,1.0
2009-07-15 01:00:00,3.0,0.00,0.0,11.40,0.0,11.10,10.80,13.00,96.0,1003.50,2.0,6.0,2.0,310.0,2.0,11.0,0.0,30000.0,999.0,1.0
2009-07-15 01:30:00,3.0,0.05,0.0,11.30,0.0,11.00,10.70,12.90,96.0,1003.95,2.0,6.5,2.0,300.0,2.0,11.0,0.0,30000.0,999.0,1.0
2009-07-15 02:00:00,3.0,0.10,0.0,11.20,0.0,10.90,10.60,12.80,96.0,1004.40,2.0,7.0,2.0,290.0,2.0,11.0,0.0,30000.0,999.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-12-14 21:00:00,3.0,0.00,0.0,0.10,1.0,-0.30,-1.10,5.70,92.0,1041.60,2.0,6.0,2.0,310.0,10.0,0.0,0.0,8000.0,999.0,0.0
2010-12-14 21:30:00,3.0,0.00,0.5,0.00,1.0,-0.35,-1.05,5.70,93.0,1041.80,2.0,5.0,2.0,320.0,10.0,0.0,0.0,8000.0,999.0,0.0
2010-12-14 22:00:00,3.0,0.00,1.0,-0.10,1.0,-0.40,-1.00,5.70,94.0,1042.00,2.0,4.0,2.0,330.0,10.0,0.0,0.0,8000.0,999.0,0.0
2010-12-14 22:30:00,3.0,0.00,0.5,0.05,1.0,-0.30,-0.95,5.70,93.0,1042.20,2.0,5.5,2.0,340.0,10.0,0.0,0.0,8000.0,999.0,0.0


Merging two dataframes

In [366]:
df_vst_cropped['time'] = pd.to_datetime(df_vst_cropped['time'])
df_vst_cropped.set_index('time', inplace=True)

df_corkstation_resampled = df_corkstation_resampled.drop(columns=['temp'])

final_df = pd.concat([df_corkstation_resampled, df_vst_cropped], axis=1)

final_df


Unnamed: 0,ind,rain,ind.1,ind.2,wetb,dewpt,vappr,rhum,msl,ind.3,...,ind.4,wddir,ww,w,sun,vis,clht,clamt,mean,temp
2009-07-15 00:00:00,3.0,0.00,0.0,0.0,11.40,11.30,13.30,98.0,1002.80,2.0,...,2.0,310.0,2.0,81.0,0.0,25000.0,999.0,1.0,0.341659,11.60
2009-07-15 00:30:00,3.0,0.00,0.0,0.0,11.25,11.05,13.15,97.0,1003.15,2.0,...,2.0,310.0,2.0,46.0,0.0,27500.0,999.0,1.0,0.274463,11.50
2009-07-15 01:00:00,3.0,0.00,0.0,0.0,11.10,10.80,13.00,96.0,1003.50,2.0,...,2.0,310.0,2.0,11.0,0.0,30000.0,999.0,1.0,0.231365,11.40
2009-07-15 01:30:00,3.0,0.05,0.0,0.0,11.00,10.70,12.90,96.0,1003.95,2.0,...,2.0,300.0,2.0,11.0,0.0,30000.0,999.0,1.0,0.202708,11.30
2009-07-15 02:00:00,3.0,0.10,0.0,0.0,10.90,10.60,12.80,96.0,1004.40,2.0,...,2.0,290.0,2.0,11.0,0.0,30000.0,999.0,1.0,0.182256,11.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-12-14 21:00:00,3.0,0.00,0.0,1.0,-0.30,-1.10,5.70,92.0,1041.60,2.0,...,2.0,310.0,10.0,0.0,0.0,8000.0,999.0,0.0,0.956582,0.10
2010-12-14 21:30:00,3.0,0.00,0.5,1.0,-0.35,-1.05,5.70,93.0,1041.80,2.0,...,2.0,320.0,10.0,0.0,0.0,8000.0,999.0,0.0,0.890195,0.00
2010-12-14 22:00:00,3.0,0.00,1.0,1.0,-0.40,-1.00,5.70,94.0,1042.00,2.0,...,2.0,330.0,10.0,0.0,0.0,8000.0,999.0,0.0,0.849046,-0.10
2010-12-14 22:30:00,3.0,0.00,0.5,1.0,-0.30,-0.95,5.70,93.0,1042.20,2.0,...,2.0,340.0,10.0,0.0,0.0,8000.0,999.0,0.0,0.779234,0.05


### Analyse exploratoire

### Modèle

### Validation

MAE, RMSE, MAPE ...