In [42]:
import requests
import pandas as pd

# URL de base pour accéder à l'API
base_url = "https://odre.opendatasoft.com/api/explore/v2.1"

# Chemin pour accéder aux enregistrements du dataset
dataset_path = "/catalog/datasets/consommation-quotidienne-brute-regionale/exports/json?lang=fr&timezone=Europe%2FBerlin"

# Construction de l'URL complète
url = f"{base_url}{dataset_path}"

# Exécution de la requête GET
response = requests.get(url)

# Vérification si la requête a réussi
if response.status_code == 200:
    # Extraction des données
    data = response.json()

    # Conversion des résultats en DataFrame
    df = pd.DataFrame(data)
    df.head()
else:
    print(f"Erreur lors de la requête: {response.status_code}")


In [43]:
df.to_csv('data_consumption.csv', index=False)

In [44]:
df.tail(10)

Unnamed: 0,date_heure,date,heure,code_insee_region,region,consommation_brute_gaz_grtgaz,statut_grtgaz,consommation_brute_gaz_terega,statut_terega,consommation_brute_gaz_totale,consommation_brute_electricite_rte,statut_rte,consommation_brute_totale
2243498,2023-05-21T18:30:00+02:00,2023-05-21,18:30,76,Occitanie,,,,,,3338,Consolidé,
2243499,2023-05-21T19:00:00+02:00,2023-05-21,19:00,24,Centre-Val de Loire,510.0,Définitif,,,510.0,1765,Consolidé,2275.0
2243500,2023-05-21T19:30:00+02:00,2023-05-21,19:30,28,Normandie,,,,,,2226,Consolidé,
2243501,2023-05-21T20:00:00+02:00,2023-05-21,20:00,93,Provence-Alpes-Côte d'Azur,2105.0,Définitif,,,2105.0,3906,Consolidé,6011.0
2243502,2023-05-21T20:30:00+02:00,2023-05-21,20:30,28,Normandie,,,,,,2276,Consolidé,
2243503,2023-06-01T07:00:00+02:00,2023-06-01,07:00,24,Centre-Val de Loire,772.0,Définitif,,,772.0,1710,Consolidé,2482.0
2243504,2023-06-06T16:30:00+02:00,2023-06-06,16:30,27,Bourgogne-Franche-Comté,,,,,,2023,Consolidé,
2243505,2023-06-06T17:00:00+02:00,2023-06-06,17:00,75,Nouvelle-Aquitaine,527.0,Définitif,633.0,Définitif,1160.0,4259,Consolidé,5419.0
2243506,2023-06-06T17:30:00+02:00,2023-06-06,17:30,32,Hauts-de-France,,,,,,4723,Consolidé,
2243507,2023-06-06T17:30:00+02:00,2023-06-06,17:30,53,Bretagne,,,,,,2200,Consolidé,


In [45]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

In [46]:
df.columns

Index(['date_heure', 'heure', 'code_insee_region', 'region',
       'consommation_brute_gaz_grtgaz', 'statut_grtgaz',
       'consommation_brute_gaz_terega', 'statut_terega',
       'consommation_brute_gaz_totale', 'consommation_brute_electricite_rte',
       'statut_rte', 'consommation_brute_totale'],
      dtype='object')

In [56]:
col_del = ['date_heure', 'consommation_brute_gaz_grtgaz', 'statut_grtgaz', 'consommation_brute_gaz_terega', 'statut_terega','consommation_brute_gaz_totale','statut_rte', 'consommation_brute_totale']
df = df.drop(col_del, axis=1)

KeyError: "['date_heure', 'consommation_brute_gaz_grtgaz', 'statut_grtgaz', 'consommation_brute_gaz_terega', 'statut_terega', 'consommation_brute_gaz_totale', 'statut_rte'] not found in axis"

In [48]:
len(df)

2243508

In [58]:
df.head()

Unnamed: 0_level_0,heure,code_insee_region,region,consommation_brute_electricite_rte
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-01,01:30,44,Grand Est,4840
2013-01-01,11:30,28,Normandie,3183
2013-01-01,11:00,27,Bourgogne-Franche-Comté,2280
2013-01-01,10:30,76,Occitanie,4392
2013-01-01,10:30,75,Nouvelle-Aquitaine,4636


In [76]:
grouped_df = df.groupby(['code_insee_region','date'])
grouped_df_hour = df.groupby(['code_insee_region','date','heure'])

In [77]:
result_df = grouped_df['consommation_brute_electricite_rte'].mean()
result_df_hour = grouped_df_hour['consommation_brute_electricite_rte'].mean()

In [78]:
result_df = result_df.to_frame(name = 'consommation_brute_electricite_rte')
result_df_hour = result_df_hour.to_frame(name = 'consommation_brute_electricite_rte')

In [81]:
result_df.columns

Index(['consommation_brute_electricite_rte'], dtype='object')

In [80]:
result_df_hour

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,consommation_brute_electricite_rte
code_insee_region,date,heure,Unnamed: 3_level_1
11,2013-01-01,00:30,9134.0
11,2013-01-01,01:00,8822.0
11,2013-01-01,01:30,8499.0
11,2013-01-01,02:00,8229.0
11,2013-01-01,02:30,8150.0
...,...,...,...
93,2023-08-31,21:30,4120.0
93,2023-08-31,22:00,3978.0
93,2023-08-31,22:30,4188.0
93,2023-08-31,23:00,4244.0


In [82]:
result_df

Unnamed: 0_level_0,Unnamed: 1_level_0,consommation_brute_electricite_rte
code_insee_region,date,Unnamed: 2_level_1
11,2013-01-01,8497.702128
11,2013-01-02,10253.270833
11,2013-01-03,10148.145833
11,2013-01-04,9792.770833
11,2013-01-05,9036.083333
...,...,...
93,2023-08-27,3913.729167
93,2023-08-28,3947.125000
93,2023-08-29,4002.145833
93,2023-08-30,4075.666667


In [75]:
len(result_df)

46740

In [50]:
df = df.sort_index()
df_aggregated = df.groupby(df.index.date)['consommation_brute_electricite_rte'].mean()
df_aggregated = df_aggregated.to_frame(name='consommation_brute_electricite_rte')

In [51]:
df_aggregated.head()

Unnamed: 0,consommation_brute_electricite_rte
2013-01-01,4532.969858
2013-01-02,5374.295139
2013-01-03,5553.409722
2013-01-04,5436.456597
2013-01-05,5011.84375


In [52]:
len(df_aggregated)


3895

In [55]:
df_resampled = df.resample('D')['consommation_brute_electricite_rte'].mean()
df_resampled.head()

date
2013-01-01    4532.969858
2013-01-02    5374.295139
2013-01-03    5553.409722
2013-01-04    5436.456597
2013-01-05    5011.843750
Freq: D, Name: consommation_brute_electricite_rte, dtype: float64