# Global Preprocessing Data

## Importation

In [1]:
############ Usual Import ############
import pandas as pd
import numpy as np
import seaborn as sns
import datetime

############ SKLearn ############
from sklearn.metrics import mean_squared_error,mean_absolute_percentage_error

#Sclaing
from sklearn.preprocessing import RobustScaler

#Machine Learning
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression


############ Internal Importation ############
#Importation Données
from analyse.import_data import import_TEMPO, import_RTE_TR, import_temp_ENEDIS

#Preprocessing Interne
from analyse.preproc import preprocessing_RTE_encours, preproc_temperature_ENEDIS, preproc_tempo
from analyse.feat_eng import add_schoolholidays, add_weekday, add_public_holidays

from analyse.feat_eng import set_time_columuns,sin_cos_colonne,shit_colonne


############ Others ############
from lineartree import LinearBoostRegressor
from vacances_scolaires_france import SchoolHolidayDates


############ Vizualisation ############
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.rcParams.update({'font.size': 16})


############ MagicLines ############
%load_ext autoreload
%autoreload 2

In [2]:
############ Data Import ############
EnCours_Temps_R = import_RTE_TR()

Temp_Nationale = import_temp_ENEDIS()

#Importation Données Tempo
TEMPOs = import_TEMPO(deb=21,fin=23)
Tempo_RTE_2122 = TEMPOs['eCO2mix_RTE_tempo_2021-2022']
Tempo_RTE_2223 = TEMPOs['eCO2mix_RTE_tempo_2022-2023']
Tempo_RTE_2324 = TEMPOs['eCO2mix_RTE_tempo_2023-2024']

RTE Data Retrieval


  df = pd.read_csv('data_raw/eCO2mix_RTE_En-cours-TR.xls',encoding = "ISO-8859-1", delimiter='\t')


ENEDIS Data Retrieval
TEMPO data retrieval year 2021-2022
TEMPO data retrieval year 2022-2023
TEMPO data retrieval year 2023-2024


## Why did I choose to process the data "TR"?

2 databases in RTE 
- TR : Real Time, i.e. updated with forecasts and modelling of what can be recovered 
    Exemple : 2022-06-01 -> 2024-01-20 (J-X before the downloading)
- Consolidated : Data consolidated with snips from the various antennas
    Before -> 2022-06-01

I wanted to use only the TR for two reasons:
- you want to predict for the next day, so it makes more sense to use the previous day's values when starting out.
- to minimise the impact of COVID, as there's no point in going back too far because the COVID effect will have an impact.

## Initial Data

In [3]:
EnCours_Temps_R.head()

Unnamed: 0,Périmètre,Nature,Date,Heures,Consommation,Prévision J-1,Prévision J,Fioul,Charbon,Gaz,...,Hydraulique - Fil de l?eau + éclusée,Hydraulique - Lacs,Hydraulique - STEP turbinage,Bioénergies - Déchets,Bioénergies - Biomasse,Bioénergies - Biogaz,Stockage batterie,Déstockage batterie,Eolien terrestre,Eolien offshore
France,Données temps réel,2022-06-01,00:00,44940.0,44800,45100.0,144.0,0.0,3596.0,30207.0,...,1054,1677.0,170,584.0,286.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,00:15,43967.0,43700,43900.0,144.0,0.0,3716.0,30337.0,...,1419,581.0,171,560.0,276.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,00:30,42514.0,42600,42700.0,142.0,0.0,2880.0,29746.0,...,1280,530.0,169,561.0,276.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,00:45,41073.0,41450,41600.0,143.0,0.0,2699.0,29230.0,...,1120,361.0,170,563.0,276.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,01:00,40359.0,40300,40500.0,144.0,3.0,2718.0,29019.0,...,1101,373.0,170,563.0,276.0,ND,ND,ND,ND,


In [4]:
EnCours_Temps_R.tail()

Unnamed: 0,Périmètre,Nature,Date,Heures,Consommation,Prévision J-1,Prévision J,Fioul,Charbon,Gaz,...,Hydraulique - Fil de l?eau + éclusée,Hydraulique - Lacs,Hydraulique - STEP turbinage,Bioénergies - Déchets,Bioénergies - Biomasse,Bioénergies - Biogaz,Stockage batterie,Déstockage batterie,Eolien terrestre,Eolien offshore
France,Données temps réel,2024-03-13,23:00,,ND,,,,,,...,,,,,,,,,,
France,Données temps réel,2024-03-13,23:15,,ND,,,,,,...,,,,,,,,,,
France,Données temps réel,2024-03-13,23:30,,ND,,,,,,...,,,,,,,,,,
France,Données temps réel,2024-03-13,23:45,,ND,,,,,,...,,,,,,,,,,
"RTE ne pourra être tenu responsable de l'usage qui pourrait être fait des données mises à disposition, ni en cas de prévisions qui se révèleraient imprécises.",,,,,,,,,,,...,,,,,,,,,,


In [5]:
Temp_Nationale.head()

Unnamed: 0,horodate,temperature_realisee_lissee_degc,temperature_normale_lissee_degc,temperature_realisee_lissee_temperature_normale_lissee_degc,pseudo_rayonnement,annee,mois,jour,annee_mois_jour
0,2023-01-18T03:30:00+00:00,4.4,4.7,-0.3,54.0,2023,1,18,2023-01-18
1,2023-01-17T19:00:00+00:00,5.3,5.0,0.3,33.0,2023,1,17,2023-01-17
2,2023-01-17T18:30:00+00:00,5.4,5.0,0.4,33.0,2023,1,17,2023-01-17
3,2023-01-17T15:30:00+00:00,6.1,6.0,0.1,17.0,2023,1,17,2023-01-17
4,2023-01-17T15:00:00+00:00,6.2,6.1,0.1,14.0,2023,1,17,2023-01-17


In [6]:
Tempo_RTE_2324.head()

Unnamed: 0,Date,Type de jour TEMPO
0,2023-09-01,BLEU
1,2023-09-02,BLEU
2,2023-09-03,BLEU
3,2023-09-04,BLEU
4,2023-09-05,BLEU


Tempo electricity supply contracts have different price levels, depending on the time of day and the day of the year.

- Red days correspond to periods of the year when consumption is high,
- White days are at an intermediate level,
- Blue days are when consumption is lowest.

## Création Data

### Preprocessing RTE Consommation

In [7]:
df = preprocessing_RTE_encours(EnCours_Temps_R);
df.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['date_hour'] = pd.to_datetime(df_final['Date']+ " " + df_final['Heures'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['Date'] = pd.to_datetime(df_final['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['Heures'] = df_final['date_hour'].map(lambda x : x.time())
A 

Unnamed: 0,Date,Heures,date_hour,Consommation
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0
1,2022-06-01,00:15:00,2022-06-01 00:15:00,43967.0
2,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0
3,2022-06-01,00:45:00,2022-06-01 00:45:00,41073.0
4,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0


### Température Nationale

In [8]:
df_temp = preproc_temperature_ENEDIS(Temp_Nationale)
df_temp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reduced['horodate'] = df_reduced['horodate'].map(lambda x : x[:-6])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reduced['horodate'] = pd.to_datetime(df_reduced['horodate'],utc=False)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reduced.drop(columns='annee_mois_jour',inplace=True)
A value is trying to be set on a copy of a slice from a DataF

Unnamed: 0,date_hour,temperature_realisee_lissee_degc,annee,mois,jour
87695,2019-03-09 23:00:00,10.7,2019,3,9
65525,2019-03-09 23:30:00,10.7,2019,3,9
21804,2019-03-10 00:00:00,10.7,2019,3,10
76658,2019-03-10 00:30:00,10.7,2019,3,10
43689,2019-03-10 01:00:00,10.6,2019,3,10
...,...,...,...,...,...
68152,2024-03-09 20:30:00,8.4,2024,3,9
44966,2024-03-09 21:00:00,8.4,2024,3,9
44965,2024-03-09 21:30:00,8.4,2024,3,9
34015,2024-03-09 22:00:00,8.4,2024,3,9


### Add Column (Weekday, Holidays) to global_df

In [9]:
df = add_weekday(df)
df = add_schoolholidays(df)
df = add_public_holidays(df)
df

Unnamed: 0,Date,Heures,date_hour,Consommation,weekday,school_holiday,public_holiday
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0,2,0,0
1,2022-06-01,00:15:00,2022-06-01 00:15:00,43967.0,2,0,0
2,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0,2,0,0
3,2022-06-01,00:45:00,2022-06-01 00:45:00,41073.0,2,0,0
4,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0,2,0,0
...,...,...,...,...,...,...,...
62340,2024-03-11,09:00:00,2024-03-11 09:00:00,62577.0,0,0,0
62341,2024-03-11,09:15:00,2024-03-11 09:15:00,62649.0,0,0,0
62342,2024-03-11,09:30:00,2024-03-11 09:30:00,62440.0,0,0,0
62343,2024-03-11,09:45:00,2024-03-11 09:45:00,62242.0,0,0,0


### Processing TEMPO Dataframes

In [10]:
Tempo_RTE_2122_preproc = preproc_tempo(Tempo_RTE_2122)
Tempo_RTE_2223_preproc = preproc_tempo(Tempo_RTE_2223)
Tempo_RTE_2324_preproc = preproc_tempo(Tempo_RTE_2324)

In [11]:
tempo_global = pd.concat([Tempo_RTE_2122_preproc, Tempo_RTE_2223_preproc, Tempo_RTE_2324_preproc], axis=0)
tempo_global

Unnamed: 0,Date,Type de jour TEMPO
0,2021-09-01,BLEU
1,2021-09-02,BLEU
2,2021-09-03,BLEU
3,2021-09-04,BLEU
4,2021-09-05,BLEU
...,...,...
189,2024-03-08,BLANC
190,2024-03-09,BLEU
191,2024-03-10,BLEU
192,2024-03-11,ROUGE


### Merge Global

In [12]:
df_merged_temperature = df.merge(df_temp,how='inner',on="date_hour")
df_merged_temperature

Unnamed: 0,Date,Heures,date_hour,Consommation,weekday,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,mois,jour
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0,2,0,0,15.9,2022,6,1
1,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0,2,0,0,15.9,2022,6,1
2,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0,2,0,0,15.8,2022,6,1
3,2022-06-01,01:30:00,2022-06-01 01:30:00,40049.0,2,0,0,15.8,2022,6,1
4,2022-06-01,02:00:00,2022-06-01 02:00:00,39328.0,2,0,0,15.7,2022,6,1
...,...,...,...,...,...,...,...,...,...,...,...
31097,2024-03-09,20:30:00,2024-03-09 20:30:00,55220.0,5,1,0,8.4,2024,3,9
31098,2024-03-09,21:00:00,2024-03-09 21:00:00,54044.0,5,1,0,8.4,2024,3,9
31099,2024-03-09,21:30:00,2024-03-09 21:30:00,52928.0,5,1,0,8.4,2024,3,9
31100,2024-03-09,22:00:00,2024-03-09 22:00:00,52428.0,5,1,0,8.4,2024,3,9


In [13]:
df_merged_temperature_tempo = df_merged_temperature.merge(tempo_global,how='left',on='Date')
df_merged_temperature_tempo

Unnamed: 0,Date,Heures,date_hour,Consommation,weekday,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,mois,jour,Type de jour TEMPO
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0,2,0,0,15.9,2022,6,1,BLANC
1,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0,2,0,0,15.9,2022,6,1,BLANC
2,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0,2,0,0,15.8,2022,6,1,BLANC
3,2022-06-01,01:30:00,2022-06-01 01:30:00,40049.0,2,0,0,15.8,2022,6,1,BLANC
4,2022-06-01,02:00:00,2022-06-01 02:00:00,39328.0,2,0,0,15.7,2022,6,1,BLANC
...,...,...,...,...,...,...,...,...,...,...,...,...
31097,2024-03-09,20:30:00,2024-03-09 20:30:00,55220.0,5,1,0,8.4,2024,3,9,BLEU
31098,2024-03-09,21:00:00,2024-03-09 21:00:00,54044.0,5,1,0,8.4,2024,3,9,BLEU
31099,2024-03-09,21:30:00,2024-03-09 21:30:00,52928.0,5,1,0,8.4,2024,3,9,BLEU
31100,2024-03-09,22:00:00,2024-03-09 22:00:00,52428.0,5,1,0,8.4,2024,3,9,BLEU


## Setting up Circular Variable + Lagged Variable + Encoding TEMPO

### Circular Variable

In [14]:
df_final = set_time_columuns(df_merged_temperature_tempo)
df_final

Unnamed: 0,date_hour,Consommation,weekday,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,mois,jour,Type de jour TEMPO,hour,minute
0,2022-06-01 00:00:00,44940.0,2,0,0,15.9,2022,6,1,BLANC,0,0
1,2022-06-01 00:30:00,42514.0,2,0,0,15.9,2022,6,1,BLANC,0,30
2,2022-06-01 01:00:00,40359.0,2,0,0,15.8,2022,6,1,BLANC,1,0
3,2022-06-01 01:30:00,40049.0,2,0,0,15.8,2022,6,1,BLANC,1,30
4,2022-06-01 02:00:00,39328.0,2,0,0,15.7,2022,6,1,BLANC,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...
31097,2024-03-09 20:30:00,55220.0,5,1,0,8.4,2024,3,9,BLEU,20,30
31098,2024-03-09 21:00:00,54044.0,5,1,0,8.4,2024,3,9,BLEU,21,0
31099,2024-03-09 21:30:00,52928.0,5,1,0,8.4,2024,3,9,BLEU,21,30
31100,2024-03-09 22:00:00,52428.0,5,1,0,8.4,2024,3,9,BLEU,22,0


In [15]:
col = ['mois','jour','hour','minute','weekday']
df_final = sin_cos_colonne(df_final,col)

### Lagged Variable

In [16]:
shift_col = ["Consommation","temperature_realisee_lissee_degc"]
shift_lagged = [49,25]

df_final = shit_colonne(df_final,shift_col,shift_lagged)
df_final

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,Type de jour TEMPO,sin_mois,cos_mois,sin_jour,...,temperature_realisee_lissee_degc_shift16,temperature_realisee_lissee_degc_shift17,temperature_realisee_lissee_degc_shift18,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.6,19.7,19.8,19.9,19.7,19.5,19.3,19.1,18.9,18.7
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.4,19.6,19.7,19.8,19.9,19.7,19.5,19.3,19.1,18.9
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.2,19.4,19.6,19.7,19.8,19.9,19.7,19.5,19.3,19.1
52,2022-06-02 02:00:00,39840.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.0,19.2,19.4,19.6,19.7,19.8,19.9,19.7,19.5,19.3
53,2022-06-02 02:30:00,38895.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,18.6,19.0,19.2,19.4,19.6,19.7,19.8,19.9,19.7,19.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31097,2024-03-09 20:30:00,55220.0,1,0,8.4,2024,BLEU,1.000000e+00,6.123234e-17,0.968077,...,9.9,9.9,9.6,9.4,9.2,8.9,8.7,8.5,8.3,8.1
31098,2024-03-09 21:00:00,54044.0,1,0,8.4,2024,BLEU,1.000000e+00,6.123234e-17,0.968077,...,9.8,9.9,9.9,9.6,9.4,9.2,8.9,8.7,8.5,8.3
31099,2024-03-09 21:30:00,52928.0,1,0,8.4,2024,BLEU,1.000000e+00,6.123234e-17,0.968077,...,9.8,9.8,9.9,9.9,9.6,9.4,9.2,8.9,8.7,8.5
31100,2024-03-09 22:00:00,52428.0,1,0,8.4,2024,BLEU,1.000000e+00,6.123234e-17,0.968077,...,9.8,9.8,9.8,9.9,9.9,9.6,9.4,9.2,8.9,8.7


### OneHotEncoder TEMPO

In [17]:
from sklearn.preprocessing import OneHotEncoder
# Instantiate the OneHotEncoder
ohe = OneHotEncoder(sparse =False)

# Fit encoder
ohe.fit(df_final[['Type de jour TEMPO']])

# Display the detected categories
print(f"The categories detected by the OneHotEncoder are {ohe.categories_}")

# Transform the current "Street" column
df_final[ohe.get_feature_names_out()] = ohe.transform(df_final[['Type de jour TEMPO']])

# Drop the column "Street" which has been encoded
df_final.drop(columns = ["Type de jour TEMPO"], inplace = True)

# Show the dataset
df_final.head(3)

The categories detected by the OneHotEncoder are [array(['BLANC', 'BLEU', 'ROUGE'], dtype=object)]




Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.9,19.7,19.5,19.3,19.1,18.9,18.7,0.0,1.0,0.0
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.8,19.9,19.7,19.5,19.3,19.1,18.9,0.0,1.0,0.0
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.7,19.8,19.9,19.7,19.5,19.3,19.1,0.0,1.0,0.0


# Final Dataframe presentation

In [18]:
df_final.head()

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.9,19.7,19.5,19.3,19.1,18.9,18.7,0.0,1.0,0.0
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.8,19.9,19.7,19.5,19.3,19.1,18.9,0.0,1.0,0.0
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.7,19.8,19.9,19.7,19.5,19.3,19.1,0.0,1.0,0.0
52,2022-06-02 02:00:00,39840.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.6,19.7,19.8,19.9,19.7,19.5,19.3,0.0,1.0,0.0
53,2022-06-02 02:30:00,38895.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.4,19.6,19.7,19.8,19.9,19.7,19.5,0.0,1.0,0.0


In [19]:
df_final.head()

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.9,19.7,19.5,19.3,19.1,18.9,18.7,0.0,1.0,0.0
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.8,19.9,19.7,19.5,19.3,19.1,18.9,0.0,1.0,0.0
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.7,19.8,19.9,19.7,19.5,19.3,19.1,0.0,1.0,0.0
52,2022-06-02 02:00:00,39840.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.6,19.7,19.8,19.9,19.7,19.5,19.3,0.0,1.0,0.0
53,2022-06-02 02:30:00,38895.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.4,19.6,19.7,19.8,19.9,19.7,19.5,0.0,1.0,0.0


In [22]:
df_final.tail()

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
31097,2024-03-09 20:30:00,55220.0,1,0,8.4,2024,1.0,6.123234000000001e-17,0.968077,-0.250653,...,9.4,9.2,8.9,8.7,8.5,8.3,8.1,0.0,1.0,0.0
31098,2024-03-09 21:00:00,54044.0,1,0,8.4,2024,1.0,6.123234000000001e-17,0.968077,-0.250653,...,9.6,9.4,9.2,8.9,8.7,8.5,8.3,0.0,1.0,0.0
31099,2024-03-09 21:30:00,52928.0,1,0,8.4,2024,1.0,6.123234000000001e-17,0.968077,-0.250653,...,9.9,9.6,9.4,9.2,8.9,8.7,8.5,0.0,1.0,0.0
31100,2024-03-09 22:00:00,52428.0,1,0,8.4,2024,1.0,6.123234000000001e-17,0.968077,-0.250653,...,9.9,9.9,9.6,9.4,9.2,8.9,8.7,0.0,1.0,0.0
31101,2024-03-09 22:30:00,53859.0,1,0,8.5,2024,1.0,6.123234000000001e-17,0.968077,-0.250653,...,9.8,9.9,9.9,9.6,9.4,9.2,8.9,0.0,1.0,0.0


# Recording Final Data

In [21]:
df_final.to_csv('data_final/data_final.csv', index=False)