![Enedis](https://www.fournisseurs-electricite.com/sites/fournisseurs-electricite.com/files/2019-12/enedis-presentation.png)

### Import packages

In [5]:
import pandas as pd
import requests
import json
import csv
import datetime
import time

### Create an empty csv

In [18]:
import csv
def create_csv():
    header = ["horodate",
    "mois",
    "injection_rte",
    "soutirage_rte",
    "pertes",
    "consommation_totale",
    "consommation_telerelevee",
    "consommation_telerelevee_hta",
    "consommation_profilee",
    "consommation_profilee_ent_hta",
    "consommation_profilee_ent_bt",
    "consommation_profilee_pro",
    "consommation_profilee_res",
    "production_totale",
    "production_telerelevee",
    "production_profilee",
    "production_profilee_photovoltaique",
    "production_profilee_aut",
    "temperature_reelle_lissee",
    "temperature_normale_lissee",
    "production_eolien",
    "production_photovoltaique",
    "pseudo_rayonnement",
    "consommation_hta",
    "soutirage_vers_autres_grd"]

    with open("Puissances_Electriques_au_pas_demi_heure_from_29_4_2023.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)

### Function to get data from API, this function will save results in the same csv created later

In [2]:
json_columns = [
    "horodate",
    "mois",
    "injection_rte",
    "soutirage_rte",
    "pertes",
    "consommation_totale",
    "consommation_telerelevee",
    "consommation_telerelevee_hta",
    "consommation_profilee",
    "consommation_profilee_ent_hta",
    "consommation_profilee_ent_bt",
    "consommation_profilee_pro",
    "consommation_profilee_res",
    "production_totale",
    "production_telerelevee",
    "production_profilee",
    "production_profilee_photovoltaique",
    "production_profilee_aut",
    "temperature_reelle_lissee",
    "temperature_normale_lissee",
    "production_eolien",
    "production_photovoltaique",
    "pseudo_rayonnement",
    "consommation_hta",
    "soutirage_vers_autres_grd"
]


In [19]:
def request_get(url):
    # Make API request
    response = requests.get(url)

    # Check if response is successful
    if response.status_code == 200:
        # Parse JSON data from response
        data = response.json()

        # Open existing CSV file in write mode
        with open('Puissances_Electriques_au_pas_demi_heure_from_29_4_2023.csv', mode='a', newline='') as csv_file:
            # Create CSV writer object
            writer = csv.writer(csv_file)

            # Write data to CSV file
            for item in data['records']:
                row = []
                for col_name in json_columns:
                    try:
                        col_value = item['fields'][col_name]
                    except:
                        col_value = None
                    row.append(col_value)
                writer.writerow(row)
    else:
        print('API request failed with status code', response.status_code)


## Main function

- Call function to create csv
- Loop over date and hour
- Call API
- Save results to the same csv

In [21]:

create_csv()

base_url = "https://data.enedis.fr/api/records/1.0/search/?dataset=bilan-electrique-demi-heure&q={}&rows=-1&facet=horodate&facet=region&facet=profil&facet=plage_de_puissance_souscrite&facet=indice_representativite_courbe_ndeg1&facet=indice_representativite_courbe_ndeg2&facet=indice_representativite_courbe_ndeg1_ndeg2&facet=jour_max_du_mois_0_1&facet=semaine_max_du_mois_0_1"

start_date = datetime.date(2023, 4, 29)
end_date = datetime.date(2023, 5, 12)
delta = datetime.timedelta(days=1)

current_date = start_date
while current_date <= end_date:
    current_hour = datetime.datetime.combine(current_date, datetime.time.min)
    while current_hour <= datetime.datetime.combine(current_date, datetime.time.max):
        query_string = "horodate%3A%5B{}Z+TO+{}Z%5D".format(current_date.isoformat() + "T00:00:00", (current_date+delta).isoformat() + "T00:00:00")
        url = base_url.format(query_string)
        
        
        
        request_get(url)



        print(f"Done: {current_hour}")
        # process the response as needed
        time.sleep(0.005)
        current_hour += delta


    time.sleep(0.5)
    current_date += datetime.timedelta(days=1)

Done: 2023-04-29 00:00:00
Done: 2023-04-30 00:00:00
Done: 2023-05-01 00:00:00
Done: 2023-05-02 00:00:00
Done: 2023-05-03 00:00:00
Done: 2023-05-04 00:00:00
Done: 2023-05-05 00:00:00
Done: 2023-05-06 00:00:00
Done: 2023-05-07 00:00:00
Done: 2023-05-08 00:00:00
Done: 2023-05-09 00:00:00
Done: 2023-05-10 00:00:00
Done: 2023-05-11 00:00:00
Done: 2023-05-12 00:00:00


### Read data

In [22]:
df_all = pd.read_csv("Puissances_Electriques_au_pas_demi_heure.csv")
df_from23_4_2023 = pd.read_csv("Puissances_Electriques_au_pas_demi_heure_from_29_4_2023.csv")

### concatenate data

In [23]:
df = pd.concat([df_all, df_from23_4_2023])

In [24]:
df.shape

(87770, 25)

In [25]:
# remove duplicate rows
df.drop_duplicates(inplace=True)
print("Number of rows in concatenated_df without duplicated:", len(df))
# Write the DataFrame to a CSV file
df.to_csv('./data/Puissances_Electriques_au_pas_demi_heure/Puissances_Electriques_au_pas_demi_heure_without_dublicates.csv', index=False)

Number of rows in concatenated_df without duplicated: 87648


### Split data to mutilple csv by day

In [None]:
# Convert the horodate column to a datetime object
data['horodate'] = pd.to_datetime(data['horodate'])

# Group the data by the horodate column
groups = data.groupby(pd.Grouper(key='horodate', freq='D'))

# Loop through the groups and write each group to a separate CSV file
for name, group in groups:
    filename = 'data/consommation_electrique/' + name.strftime('%Y-%m-%d') + '.csv'
    group.to_csv(filename, index=False)
