In [1]:
import numpy as np
np.set_printoptions(threshold=10000,suppress=True)
import pandas as pd
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

## Traitement des données de base

In [50]:
def traitementCSVTemperature(fichier):
    data = pd.read_csv(fichier,sep=';', header=0)
    data = data[['numer_sta','date','t']]
    data['date'] = pd.to_datetime(data['date'], format='%Y%m%d%H%M%S')
    data['heure'] = data['date'].dt.hour
    data['date'] = data['date'].dt.date
    #On enlève les stations qui n'ont pas un ID entre 7000 et 8000 (outre-mer)
    data = data[data['numer_sta'] >= 7000]
    data = data[data['numer_sta'] <= 8000]
    data = data[data['t'] != 'mq']
    data['t'] = data['t'].astype(float)
    data['t'] = data['t'] - 273.15
    data = data.groupby(['date','heure','numer_sta']).mean()
    data = data.groupby(['date','heure']).mean()
    data.reset_index(inplace=True)
    return data

In [51]:
def traitementCSVConsomation(fichier):
    rte = pd.read_csv(fichier,sep=';', header=0)
    rte['Date'] = pd.to_datetime(rte['Date'], dayfirst=True)
    rte['Date'] = rte['Date'].dt.date
    rte = rte[rte['Heures'].str.endswith('00')]
    rte['Heures'] = rte['Heures'].str.slice(0,2)
    rte['Heures'] = rte['Heures'].astype(int)
    return rte

In [52]:
def fusionData(rte,temperature):
    data = pd.merge(rte,temperature,how='inner',left_on=['Date','Heures'],right_on=['date','heure'])
    data = data.drop(['date','heure'],axis=1)
    data = data.dropna()
    return data

In [53]:
def traitementAllCSVTemperatures(fichiers):
    data = pd.DataFrame()
    for fichier in fichiers:
        temp = traitementCSVTemperature(fichier)
        data = pd.concat([data,temp])
    data = data.groupby(['date','heure']).mean()
    data.reset_index(inplace=True)
    return data

In [54]:
fichiers_temperature = ['temperatures/synop.201801.csv', 'temperatures/synop.201802.csv', 'temperatures/synop.201803.csv', 'temperatures/synop.201804.csv', 'temperatures/synop.201805.csv', 'temperatures/synop.201806.csv', 'temperatures/synop.201807.csv', 'temperatures/synop.201808.csv', 'temperatures/synop.201809.csv', 'temperatures/synop.201810.csv', 'temperatures/synop.201811.csv', 'temperatures/synop.201812.csv']
temperatures_2018 = traitementAllCSVTemperatures(fichiers_temperature)

In [55]:
rte_2018 = traitementCSVConsomation('rte_2018.csv')

In [56]:
data_2018 = fusionData(rte_2018,temperatures_2018)

In [58]:
data_2018.to_csv('data_2018.csv',index=False, sep=';')

## Traitement des données pour apprentissage

In [70]:
def ajouterJourSemaine(data):
    data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
    data['jour_semaine'] = data['Date'].dt.dayofweek
    return data

def enleverDate(data):
    data = data.drop(['Date'],axis=1)
    return data

def ajouterJourFerie(data):
    data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
    data['jour_ferie'] = data['Date'].isin(pd.to_datetime(['2018-01-01','2018-04-01','2018-05-01','2018-05-08','2018-05-10','2018-05-21','2018-05-31','2018-07-14','2018-08-15','2018-11-01','2018-11-11','2018-12-25']))
    data['jour_ferie'] = data['jour_ferie'].astype(int)
    return data

def ajouterJourVacances(data):
    data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
    data['vacances'] = data['Date'].isin(pd.to_datetime(['2018-02-12','2018-02-13','2018-02-14','2018-02-15','2018-02-16','2018-02-19','2018-02-20','2018-02-21','2018-02-22','2018-02-23','2018-04-09','2018-04-10','2018-04-11','2018-04-12','2018-04-13','2018-04-16','2018-04-17','2018-04-18','2018-04-19','2018-04-20','2018-07-09','2018-07-10','2018-07-11','2018-07-12','2018-07-13','2018-07-16','2018-07-17','2018-07-18','2018-07-19','2018-07-20','2018-10-29','2018-10-30','2018-10-31','2018-11-01','2018-11-02','2018-11-05','2018-11-06','2018-11-07','2018-11-08','2018-11-09','2018-12-24','2018-12-25','2018-12-26','2018-12-27','2018-12-28','2018-12-31']))
    data['vacances'] = data['vacances'].astype(int)
    return data

data_apprentissage = ajouterJourFerie(data_2018)
data_apprentissage = ajouterJourVacances(data_apprentissage)
data_apprentissage = ajouterJourSemaine(data_apprentissage)
data_apprentissage = enleverDate(data_apprentissage)
data_apprentissage.to_csv('data_apprentissage_2018.csv',index=False, sep=';')
data_apprentissage.head(3)

Unnamed: 0,Heures,Consommation,t,jour_ferie,vacances,jour_semaine
0,0,61127.0,8.338095,1,0,0
1,3,54727.0,7.761905,1,0,0
2,6,50751.0,7.471429,1,0,0
