# Global Preprocessing Data

## Importation

In [1]:
############ Usual Import ############
import pandas as pd
import numpy as np
import seaborn as sns
import datetime

############ SKLearn ############
from sklearn.metrics import mean_squared_error,mean_absolute_percentage_error

#Sclaing
from sklearn.preprocessing import RobustScaler

#Machine Learning
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression


############ Internal Importation ############
#Importation Données
from analyse.import_data import import_TEMPO, import_RTE_TR, import_temp_ENEDIS

#Preprocessing Interne
from analyse.preproc import preprocessing_RTE_encours, preproc_temperature_ENEDIS, preproc_tempo
from analyse.feat_eng import add_schoolholidays, add_weekday, add_public_holidays

from analyse.feat_eng import set_time_columuns,sin_cos_colonne,shit_colonne


############ Others ############
from lineartree import LinearBoostRegressor
from vacances_scolaires_france import SchoolHolidayDates


############ Vizualisation ############
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.rcParams.update({'font.size': 16})


############ MagicLines ############
%load_ext autoreload
%autoreload 2

In [2]:
############ Data Import ############
EnCours_Temps_R = import_RTE_TR()

Temp_Nationale = import_temp_ENEDIS()

#Importation Données Tempo
TEMPOs = import_TEMPO(deb=20,fin=23)
Tempo_RTE_2122 = TEMPOs['eCO2mix_RTE_tempo_2021-2022']
Tempo_RTE_2223 = TEMPOs['eCO2mix_RTE_tempo_2022-2023']
Tempo_RTE_2324 = TEMPOs['eCO2mix_RTE_tempo_2023-2024']

RTE Data Retrieval online
[34m
Load data from local CSV...[0m
[34m
Load data 2020-2021 from  local CSV...[0m
[34m
Load data 2021-2022 from  local CSV...[0m
[34m
Load data 2022-2023 from  local CSV...[0m
[34m
Load data 2023-2024 from  local CSV...[0m


  df = pd.read_csv('data_raw/eCO2mix_RTE_En-cours-TR.xls',encoding = "ISO-8859-1", delimiter='\t')


## Why did I choose to process the data "TR"?

2 databases in RTE 
- TR : Real Time, i.e. updated with forecasts and modelling of what can be recovered 
    Exemple : 2022-06-01 -> 2024-01-20 (J-X before the downloading)
- Consolidated : Data consolidated with snips from the various antennas
    Before -> 2022-06-01

I wanted to use only the TR for two reasons:
- you want to predict for the next day, so it makes more sense to use the previous day's values when starting out.
- to minimise the impact of COVID, as there's no point in going back too far because the COVID effect will have an impact.

## Initial Data

In [3]:
EnCours_Temps_R.head()

Unnamed: 0,Périmètre,Nature,Date,Heures,Consommation,Prévision J-1,Prévision J,Fioul,Charbon,Gaz,...,Hydraulique - Fil de l?eau + éclusée,Hydraulique - Lacs,Hydraulique - STEP turbinage,Bioénergies - Déchets,Bioénergies - Biomasse,Bioénergies - Biogaz,Stockage batterie,Déstockage batterie,Eolien terrestre,Eolien offshore
France,Données temps réel,2022-06-01,00:00,44940.0,44800,45100.0,144.0,0.0,3596.0,30207.0,...,1054,1677.0,170,584.0,286.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,00:15,43967.0,43700,43900.0,144.0,0.0,3716.0,30337.0,...,1419,581.0,171,560.0,276.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,00:30,42514.0,42600,42700.0,142.0,0.0,2880.0,29746.0,...,1280,530.0,169,561.0,276.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,00:45,41073.0,41450,41600.0,143.0,0.0,2699.0,29230.0,...,1120,361.0,170,563.0,276.0,ND,ND,ND,ND,
France,Données temps réel,2022-06-01,01:00,40359.0,40300,40500.0,144.0,3.0,2718.0,29019.0,...,1101,373.0,170,563.0,276.0,ND,ND,ND,ND,


In [4]:
EnCours_Temps_R.tail()

Unnamed: 0,Périmètre,Nature,Date,Heures,Consommation,Prévision J-1,Prévision J,Fioul,Charbon,Gaz,...,Hydraulique - Fil de l?eau + éclusée,Hydraulique - Lacs,Hydraulique - STEP turbinage,Bioénergies - Déchets,Bioénergies - Biomasse,Bioénergies - Biogaz,Stockage batterie,Déstockage batterie,Eolien terrestre,Eolien offshore
France,Données temps réel,2024-03-20,23:00,,ND,,,,,,...,,,,,,,,,,
France,Données temps réel,2024-03-20,23:15,,ND,,,,,,...,,,,,,,,,,
France,Données temps réel,2024-03-20,23:30,,ND,,,,,,...,,,,,,,,,,
France,Données temps réel,2024-03-20,23:45,,ND,,,,,,...,,,,,,,,,,
"RTE ne pourra être tenu responsable de l'usage qui pourrait être fait des données mises à disposition, ni en cas de prévisions qui se révèleraient imprécises.",,,,,,,,,,,...,,,,,,,,,,


In [5]:
Temp_Nationale.head()

Unnamed: 0,horodate,temperature_realisee_lissee_degc,temperature_normale_lissee_degc,temperature_realisee_lissee_temperature_normale_lissee_degc,pseudo_rayonnement,annee,mois,jour,annee_mois_jour
0,2021-12-05T14:00:00+00:00,6.1,7.3,-1.2,37.0,2021,12,5,2021-12-05
1,2021-12-05T11:30:00+00:00,6.0,7.0,-1.0,32.0,2021,12,5,2021-12-05
2,2021-12-05T08:30:00+00:00,5.5,6.0,-0.5,46.0,2021,12,5,2021-12-05
3,2021-12-05T08:00:00+00:00,5.5,6.0,-0.5,47.0,2021,12,5,2021-12-05
4,2021-12-05T06:00:00+00:00,5.8,6.0,-0.2,50.0,2021,12,5,2021-12-05


In [6]:
Tempo_RTE_2324.head()

Unnamed: 0,Date,Type de jour TEMPO
0,2023-09-01,BLEU
1,2023-09-02,BLEU
2,2023-09-03,BLEU
3,2023-09-04,BLEU
4,2023-09-05,BLEU


Tempo electricity supply contracts have different price levels, depending on the time of day and the day of the year.

- Red days correspond to periods of the year when consumption is high,
- White days are at an intermediate level,
- Blue days are when consumption is lowest.

## Création Data

### Preprocessing RTE Consommation

In [7]:
df = preprocessing_RTE_encours(EnCours_Temps_R);
df.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['date_hour'] = pd.to_datetime(df_final['Date']+ " " + df_final['Heures'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['Date'] = pd.to_datetime(df_final['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['Heures'] = df_final['date_hour'].map(lambda x : x.time())
A 

Unnamed: 0,Date,Heures,date_hour,Consommation
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0
1,2022-06-01,00:15:00,2022-06-01 00:15:00,43967.0
2,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0
3,2022-06-01,00:45:00,2022-06-01 00:45:00,41073.0
4,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0


### Température Nationale

In [8]:
df_temp = preproc_temperature_ENEDIS(Temp_Nationale)
df_temp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reduced['horodate'] = df_reduced['horodate'].map(lambda x : x[:-6])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reduced['horodate'] = pd.to_datetime(df_reduced['horodate'],utc=False)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reduced.drop(columns='annee_mois_jour',inplace=True)
A value is trying to be set on a copy of a slice from a DataF

Unnamed: 0,date_hour,temperature_realisee_lissee_degc,annee,mois,jour
43688,2019-03-17 23:00:00,9.0,2019,3,17
32762,2019-03-17 23:30:00,8.9,2019,3,17
21809,2019-03-18 00:00:00,8.9,2019,3,18
65529,2019-03-18 00:30:00,8.9,2019,3,18
76651,2019-03-18 01:00:00,8.9,2019,3,18
...,...,...,...,...,...
3771,2024-03-17 20:30:00,12.7,2024,3,17
55831,2024-03-17 21:00:00,12.6,2024,3,17
68121,2024-03-17 21:30:00,12.7,2024,3,17
68120,2024-03-17 22:00:00,12.6,2024,3,17


### Add Column (Weekday, Holidays) to global_df

In [9]:
df = add_weekday(df)
df = add_schoolholidays(df)
df = add_public_holidays(df)
df

Unnamed: 0,Date,Heures,date_hour,Consommation,weekday,school_holiday,public_holiday
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0,2,0,0
1,2022-06-01,00:15:00,2022-06-01 00:15:00,43967.0,2,0,0
2,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0,2,0,0
3,2022-06-01,00:45:00,2022-06-01 00:45:00,41073.0,2,0,0
4,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0,2,0,0
...,...,...,...,...,...,...,...
63036,2024-03-18,15:00:00,2024-03-18 15:00:00,47943.0,0,0,0
63037,2024-03-18,15:15:00,2024-03-18 15:15:00,47768.0,0,0,0
63038,2024-03-18,15:30:00,2024-03-18 15:30:00,47326.0,0,0,0
63039,2024-03-18,15:45:00,2024-03-18 15:45:00,47430.0,0,0,0


### Processing TEMPO Dataframes

In [10]:
Tempo_RTE_2122_preproc = preproc_tempo(Tempo_RTE_2122)
Tempo_RTE_2223_preproc = preproc_tempo(Tempo_RTE_2223)
Tempo_RTE_2324_preproc = preproc_tempo(Tempo_RTE_2324)

In [11]:
tempo_global = pd.concat([Tempo_RTE_2122_preproc, Tempo_RTE_2223_preproc, Tempo_RTE_2324_preproc], axis=0)
tempo_global

Unnamed: 0,Date,Type de jour TEMPO
0,2021-09-01,BLEU
1,2021-09-02,BLEU
2,2021-09-03,BLEU
3,2021-09-04,BLEU
4,2021-09-05,BLEU
...,...,...
196,2024-03-15,BLEU
197,2024-03-16,BLEU
198,2024-03-17,BLEU
199,2024-03-18,BLEU


### Merge Global

In [12]:
df_merged_temperature = df.merge(df_temp,how='inner',on="date_hour")
df_merged_temperature

Unnamed: 0,Date,Heures,date_hour,Consommation,weekday,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,mois,jour
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0,2,0,0,15.9,2022,6,1
1,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0,2,0,0,15.9,2022,6,1
2,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0,2,0,0,15.8,2022,6,1
3,2022-06-01,01:30:00,2022-06-01 01:30:00,40049.0,2,0,0,15.8,2022,6,1
4,2022-06-01,02:00:00,2022-06-01 02:00:00,39328.0,2,0,0,15.7,2022,6,1
...,...,...,...,...,...,...,...,...,...,...,...
31481,2024-03-17,20:30:00,2024-03-17 20:30:00,47805.0,6,0,0,12.7,2024,3,17
31482,2024-03-17,21:00:00,2024-03-17 21:00:00,46502.0,6,0,0,12.6,2024,3,17
31483,2024-03-17,21:30:00,2024-03-17 21:30:00,45401.0,6,0,0,12.7,2024,3,17
31484,2024-03-17,22:00:00,2024-03-17 22:00:00,44710.0,6,0,0,12.6,2024,3,17


In [13]:
df_merged_temperature_tempo = df_merged_temperature.merge(tempo_global,how='left',on='Date')
df_merged_temperature_tempo

Unnamed: 0,Date,Heures,date_hour,Consommation,weekday,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,mois,jour,Type de jour TEMPO
0,2022-06-01,00:00:00,2022-06-01 00:00:00,44940.0,2,0,0,15.9,2022,6,1,BLANC
1,2022-06-01,00:30:00,2022-06-01 00:30:00,42514.0,2,0,0,15.9,2022,6,1,BLANC
2,2022-06-01,01:00:00,2022-06-01 01:00:00,40359.0,2,0,0,15.8,2022,6,1,BLANC
3,2022-06-01,01:30:00,2022-06-01 01:30:00,40049.0,2,0,0,15.8,2022,6,1,BLANC
4,2022-06-01,02:00:00,2022-06-01 02:00:00,39328.0,2,0,0,15.7,2022,6,1,BLANC
...,...,...,...,...,...,...,...,...,...,...,...,...
31481,2024-03-17,20:30:00,2024-03-17 20:30:00,47805.0,6,0,0,12.7,2024,3,17,BLEU
31482,2024-03-17,21:00:00,2024-03-17 21:00:00,46502.0,6,0,0,12.6,2024,3,17,BLEU
31483,2024-03-17,21:30:00,2024-03-17 21:30:00,45401.0,6,0,0,12.7,2024,3,17,BLEU
31484,2024-03-17,22:00:00,2024-03-17 22:00:00,44710.0,6,0,0,12.6,2024,3,17,BLEU


## Setting up Circular Variable + Lagged Variable + Encoding TEMPO

### Circular Variable

In [14]:
df_final = set_time_columuns(df_merged_temperature_tempo)
df_final

Unnamed: 0,date_hour,Consommation,weekday,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,mois,jour,Type de jour TEMPO,hour,minute
0,2022-06-01 00:00:00,44940.0,2,0,0,15.9,2022,6,1,BLANC,0,0
1,2022-06-01 00:30:00,42514.0,2,0,0,15.9,2022,6,1,BLANC,0,30
2,2022-06-01 01:00:00,40359.0,2,0,0,15.8,2022,6,1,BLANC,1,0
3,2022-06-01 01:30:00,40049.0,2,0,0,15.8,2022,6,1,BLANC,1,30
4,2022-06-01 02:00:00,39328.0,2,0,0,15.7,2022,6,1,BLANC,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...
31481,2024-03-17 20:30:00,47805.0,6,0,0,12.7,2024,3,17,BLEU,20,30
31482,2024-03-17 21:00:00,46502.0,6,0,0,12.6,2024,3,17,BLEU,21,0
31483,2024-03-17 21:30:00,45401.0,6,0,0,12.7,2024,3,17,BLEU,21,30
31484,2024-03-17 22:00:00,44710.0,6,0,0,12.6,2024,3,17,BLEU,22,0


In [15]:
col = ['mois','jour','hour','minute','weekday']
df_final = sin_cos_colonne(df_final,col)

### Lagged Variable

In [16]:
shift_col = ["Consommation","temperature_realisee_lissee_degc"]
shift_lagged = [49,25]

df_final = shit_colonne(df_final,shift_col,shift_lagged)
df_final

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,Type de jour TEMPO,sin_mois,cos_mois,sin_jour,...,temperature_realisee_lissee_degc_shift16,temperature_realisee_lissee_degc_shift17,temperature_realisee_lissee_degc_shift18,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.6,19.7,19.8,19.9,19.7,19.5,19.3,19.1,18.9,18.7
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.4,19.6,19.7,19.8,19.9,19.7,19.5,19.3,19.1,18.9
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.2,19.4,19.6,19.7,19.8,19.9,19.7,19.5,19.3,19.1
52,2022-06-02 02:00:00,39840.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,19.0,19.2,19.4,19.6,19.7,19.8,19.9,19.7,19.5,19.3
53,2022-06-02 02:30:00,38895.0,0,0,16.8,2022,BLEU,1.224647e-16,-1.000000e+00,0.394356,...,18.6,19.0,19.2,19.4,19.6,19.7,19.8,19.9,19.7,19.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31481,2024-03-17 20:30:00,47805.0,0,0,12.7,2024,BLEU,1.000000e+00,6.123234e-17,-0.299363,...,13.1,13.0,12.8,12.6,12.4,12.2,12.0,11.8,11.7,11.7
31482,2024-03-17 21:00:00,46502.0,0,0,12.6,2024,BLEU,1.000000e+00,6.123234e-17,-0.299363,...,13.2,13.1,13.0,12.8,12.6,12.4,12.2,12.0,11.8,11.7
31483,2024-03-17 21:30:00,45401.0,0,0,12.7,2024,BLEU,1.000000e+00,6.123234e-17,-0.299363,...,13.3,13.2,13.1,13.0,12.8,12.6,12.4,12.2,12.0,11.8
31484,2024-03-17 22:00:00,44710.0,0,0,12.6,2024,BLEU,1.000000e+00,6.123234e-17,-0.299363,...,13.3,13.3,13.2,13.1,13.0,12.8,12.6,12.4,12.2,12.0


### OneHotEncoder TEMPO

In [17]:
from sklearn.preprocessing import OneHotEncoder
# Instantiate the OneHotEncoder
ohe = OneHotEncoder(sparse =False)

# Fit encoder
ohe.fit(df_final[['Type de jour TEMPO']])

# Display the detected categories
print(f"The categories detected by the OneHotEncoder are {ohe.categories_}")

# Transform the current "Street" column
df_final[ohe.get_feature_names_out()] = ohe.transform(df_final[['Type de jour TEMPO']])

# Drop the column "Street" which has been encoded
df_final.drop(columns = ["Type de jour TEMPO"], inplace = True)

# Show the dataset
df_final.head(3)

The categories detected by the OneHotEncoder are [array(['BLANC', 'BLEU', 'ROUGE'], dtype=object)]




Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.9,19.7,19.5,19.3,19.1,18.9,18.7,0.0,1.0,0.0
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.8,19.9,19.7,19.5,19.3,19.1,18.9,0.0,1.0,0.0
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.7,19.8,19.9,19.7,19.5,19.3,19.1,0.0,1.0,0.0


# Final Dataframe presentation

In [18]:
df_final.head()

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.9,19.7,19.5,19.3,19.1,18.9,18.7,0.0,1.0,0.0
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.8,19.9,19.7,19.5,19.3,19.1,18.9,0.0,1.0,0.0
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.7,19.8,19.9,19.7,19.5,19.3,19.1,0.0,1.0,0.0
52,2022-06-02 02:00:00,39840.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.6,19.7,19.8,19.9,19.7,19.5,19.3,0.0,1.0,0.0
53,2022-06-02 02:30:00,38895.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.4,19.6,19.7,19.8,19.9,19.7,19.5,0.0,1.0,0.0


In [19]:
df_final.head()

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
49,2022-06-02 00:30:00,42420.0,0,0,16.9,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.9,19.7,19.5,19.3,19.1,18.9,18.7,0.0,1.0,0.0
50,2022-06-02 01:00:00,40356.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.8,19.9,19.7,19.5,19.3,19.1,18.9,0.0,1.0,0.0
51,2022-06-02 01:30:00,40299.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.7,19.8,19.9,19.7,19.5,19.3,19.1,0.0,1.0,0.0
52,2022-06-02 02:00:00,39840.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.6,19.7,19.8,19.9,19.7,19.5,19.3,0.0,1.0,0.0
53,2022-06-02 02:30:00,38895.0,0,0,16.8,2022,1.224647e-16,-1.0,0.394356,0.918958,...,19.4,19.6,19.7,19.8,19.9,19.7,19.5,0.0,1.0,0.0


In [20]:
df_final.tail()

Unnamed: 0,date_hour,Consommation,school_holiday,public_holiday,temperature_realisee_lissee_degc,annee,sin_mois,cos_mois,sin_jour,cos_jour,...,temperature_realisee_lissee_degc_shift19,temperature_realisee_lissee_degc_shift20,temperature_realisee_lissee_degc_shift21,temperature_realisee_lissee_degc_shift22,temperature_realisee_lissee_degc_shift23,temperature_realisee_lissee_degc_shift24,temperature_realisee_lissee_degc_shift25,Type de jour TEMPO_BLANC,Type de jour TEMPO_BLEU,Type de jour TEMPO_ROUGE
31481,2024-03-17 20:30:00,47805.0,0,0,12.7,2024,1.0,6.123234000000001e-17,-0.299363,-0.954139,...,12.6,12.4,12.2,12.0,11.8,11.7,11.7,0.0,1.0,0.0
31482,2024-03-17 21:00:00,46502.0,0,0,12.6,2024,1.0,6.123234000000001e-17,-0.299363,-0.954139,...,12.8,12.6,12.4,12.2,12.0,11.8,11.7,0.0,1.0,0.0
31483,2024-03-17 21:30:00,45401.0,0,0,12.7,2024,1.0,6.123234000000001e-17,-0.299363,-0.954139,...,13.0,12.8,12.6,12.4,12.2,12.0,11.8,0.0,1.0,0.0
31484,2024-03-17 22:00:00,44710.0,0,0,12.6,2024,1.0,6.123234000000001e-17,-0.299363,-0.954139,...,13.1,13.0,12.8,12.6,12.4,12.2,12.0,0.0,1.0,0.0
31485,2024-03-17 22:30:00,45630.0,0,0,12.7,2024,1.0,6.123234000000001e-17,-0.299363,-0.954139,...,13.2,13.1,13.0,12.8,12.6,12.4,12.2,0.0,1.0,0.0


# Recording Final Data

In [21]:
df_final.to_csv('data_final/data_final.csv', index=False)