# Importacion de funciones del script "funciones.ipynb"

In [110]:
import import_ipynb
import pandas as pd
import os
from funciones import get_data, build_table, save_to_csv, date_fromTo, agg_code

# Conexion a API.

 Se realiza la conexion a la API, realizando solicitudes GET para obtener datos en formato JSON, que posteriormente serán convertidos en DataFrames.

### Endpoint factors

In [111]:
base_url= 'http://api.carbonintensity.org.uk'
endpoint = 'intensity/factors'
factors = get_data(base_url, endpoint)

if factors:
  df_factors = build_table(factors)

# Reestructuracion de DataFrame
df_factors = df_factors.melt()

# Renombrar columnas
df_factors = df_factors.rename(columns = {'variable':'Fuels'})

In [112]:
df_factors

Unnamed: 0,Fuels,value
0,Biomass,120
1,Coal,937
2,Dutch Imports,474
3,French Imports,53
4,Gas (Combined Cycle),394
5,Gas (Open Cycle),651
6,Hydro,0
7,Irish Imports,458
8,Nuclear,0
9,Oil,935


### Endpoint intensity

In [113]:
base_url= 'http://api.carbonintensity.org.uk'
endpoint= 'intensity'
params= date_fromTo()

intensity = get_data(base_url, endpoint, params)
if intensity:
  df_intensity = build_table(intensity)

In [114]:
df_intensity

Unnamed: 0,from,to,intensity.forecast,intensity.actual,intensity.index
0,2023-09-06T15:30Z,2023-09-06T16:00Z,208,236,high
1,2023-09-06T16:00Z,2023-09-06T16:30Z,215,240,high
2,2023-09-06T16:30Z,2023-09-06T17:00Z,240,249,high
3,2023-09-06T17:00Z,2023-09-06T17:30Z,247,255,high
4,2023-09-06T17:30Z,2023-09-06T18:00Z,255,264,high
...,...,...,...,...,...
1436,2023-10-06T13:30Z,2023-10-06T14:00Z,52,63,low
1437,2023-10-06T14:00Z,2023-10-06T14:30Z,51,78,low
1438,2023-10-06T14:30Z,2023-10-06T15:00Z,63,97,low
1439,2023-10-06T15:00Z,2023-10-06T15:30Z,80,110,low


## Endpoint regional

 A partir de este endpoint se busca obtener 4 dataframes distintos, por lo que es necesario realizar transformaciones.


*   **df_date**: Almacena fechas.
*   **df_regions**: Almacena regiones.
*   **df_regional**: Su funcion es almacenar los registros obtenidos mediante el uso de indices.
*   **df_generation**: Almacena percentiles de emision por cada una de las fuentes de energia estudiadas, respectivamente por region y por fecha.


In [115]:
base_url= 'http://api.carbonintensity.org.uk'
endpoint = 'regional/intensity'
params= date_fromTo(8) # 7 dias.

regional = get_data(base_url,endpoint, params)
if regional:
  df_regional = build_table(regional)

In [116]:
# Cambiar de tipo de datos.
df_regional['from'] = pd.to_datetime(df_regional['from'])

# Eliminar columna to
df_regional = df_regional.drop(columns= ['to'])

# Crear columnas de fechas y hora.

df_regional['hour'] = df_regional['from'].dt.time
df_regional['month'] = df_regional['from'].dt.month
df_regional['day'] = df_regional['from'].dt.day
df_regional['year'] = df_regional['from'].dt.year

# Filtrar los registros obtenidos por hora
df_regional = df_regional[df_regional['hour'].apply(lambda x: x.minute == 0)]

# Renombrar columna from a date
df_regional = df_regional.rename(columns = {'from':'date'})

# Resetear indices
df_regional.reset_index(drop=True, inplace= True)


# Se crea una columna que contiene un codigo de identificacion correspondiente a una fecha y una hora determinada.
# El formato corresponde a: COD/año+mes+dia+hora

df_regional['cod_date'] = df_regional['date'].apply(lambda x: 'COD' + x.strftime('%Y%m%d%H'))


### df_date

In [117]:
df_date = df_regional[['date', 'year', 'month', 'day', 'hour', 'cod_date']]
df_date

Unnamed: 0,date,year,month,day,hour,cod_date
0,2023-09-29 16:00:00+00:00,2023,9,29,16:00:00,COD2023092916
1,2023-09-29 17:00:00+00:00,2023,9,29,17:00:00,COD2023092917
2,2023-09-29 18:00:00+00:00,2023,9,29,18:00:00,COD2023092918
3,2023-09-29 19:00:00+00:00,2023,9,29,19:00:00,COD2023092919
4,2023-09-29 20:00:00+00:00,2023,9,29,20:00:00,COD2023092920
...,...,...,...,...,...,...
163,2023-10-06 11:00:00+00:00,2023,10,6,11:00:00,COD2023100611
164,2023-10-06 12:00:00+00:00,2023,10,6,12:00:00,COD2023100612
165,2023-10-06 13:00:00+00:00,2023,10,6,13:00:00,COD2023100613
166,2023-10-06 14:00:00+00:00,2023,10,6,14:00:00,COD2023100614


### df_regions

In [118]:
# Para la creacion de este DataFrame se hace uso de un único registro debido a que el dato de las regiones son estáticas, o sea nunca cambian.

regions = df_regional['regions'][0]
dict_regions = {
    'region_id' : [],
    'shortname' : []
}

for region in regions:
  id = region['regionid']
  name = region['shortname']
  if id not in dict_regions:
    dict_regions['region_id'].append(id)
    dict_regions['shortname'].append(name)

df_regions = pd.DataFrame(dict_regions)
df_regions

Unnamed: 0,region_id,shortname
0,1,North Scotland
1,2,South Scotland
2,3,North West England
3,4,North East England
4,5,Yorkshire
5,6,North Wales & Merseyside
6,7,South Wales
7,8,West Midlands
8,9,East Midlands
9,10,East England


### df_regional

In [119]:
serie_reg = df_regional['regions']  # Trabajaremos con la columna 'regions' de df_regional original.
n=0
for raw in serie_reg:
  for i in range(len(raw)):
    code = agg_code(raw[i],df_regional['cod_date'][n])
    raw[i]['cod_date'] = code   # Se crea un código que identifique cada registro segun hora y fecha.
    new_key = {
        'cod_date': code,
        'regionid': raw[i]['regionid']
    }

    raw[i]['generationmix'].append(new_key)

  n = n+1

df_regional = pd.json_normalize(serie_reg)    # Nuevo DataFrame regional.

"""
 El nuevo dataFrame df_regional contiene registros en formato json.
 Se busca crear una lista de DataFrames a partir de esos archivos .json que posteriormente serán concatenados.
"""

list_DF = [pd.json_normalize(df_regional[i]) for i in range(18)] #18 correspondiente a las 18 regiones registradas.
df_regional = pd.DataFrame()

for i in range( len(list_DF) ):
  df_regional = pd.concat([df_regional, list_DF[i]], ignore_index = True)
  
df_regional.rename(columns={'regionid':'region_id'}, inplace=True)
df_regional

Unnamed: 0,region_id,dnoregion,shortname,generationmix,cod_date,intensity.forecast,intensity.index
0,1,Scottish Hydro Electric Power Distribution,North Scotland,"[{'fuel': 'biomass', 'perc': 0}, {'fuel': 'coa...",COD2023092916,0,very low
1,1,Scottish Hydro Electric Power Distribution,North Scotland,"[{'fuel': 'biomass', 'perc': 0}, {'fuel': 'coa...",COD2023092917,0,very low
2,1,Scottish Hydro Electric Power Distribution,North Scotland,"[{'fuel': 'biomass', 'perc': 0}, {'fuel': 'coa...",COD2023092918,0,very low
3,1,Scottish Hydro Electric Power Distribution,North Scotland,"[{'fuel': 'biomass', 'perc': 0}, {'fuel': 'coa...",COD2023092919,0,very low
4,1,Scottish Hydro Electric Power Distribution,North Scotland,"[{'fuel': 'biomass', 'perc': 0}, {'fuel': 'coa...",COD2023092920,0,very low
...,...,...,...,...,...,...,...
3019,18,GB,GB,"[{'fuel': 'biomass', 'perc': 3.4}, {'fuel': 'c...",COD2023100611,46,low
3020,18,GB,GB,"[{'fuel': 'biomass', 'perc': 3.6}, {'fuel': 'c...",COD2023100612,51,low
3021,18,GB,GB,"[{'fuel': 'biomass', 'perc': 3.3}, {'fuel': 'c...",COD2023100613,53,low
3022,18,GB,GB,"[{'fuel': 'biomass', 'perc': 4.5}, {'fuel': 'c...",COD2023100614,74,low


### df_generation

In [120]:
df_aux = df_regional['generationmix'] # dataframe auxiliar, contiene los percentiles

df_generation = pd.DataFrame()

for raw in df_aux:  # recorre cada registro del dataframe auxiliar.
  df_raw = pd.json_normalize(raw)
  df_raw = df_raw.fillna(df_raw.iloc[-1])[:-1]    #Rellena los valores nulos de las columnas 'cod_date', y 'regionid'.
  df_generation = pd.concat([df_generation, df_raw], ignore_index = True)   #Concatena a medida que itera.
  
df_generation

Unnamed: 0,fuel,perc,cod_date,regionid
0,biomass,0.0,COD2023092916,1.0
1,coal,0.0,COD2023092916,1.0
2,imports,0.0,COD2023092916,1.0
3,gas,0.0,COD2023092916,1.0
4,nuclear,0.0,COD2023092916,1.0
...,...,...,...,...
27211,nuclear,12.0,COD2023100615,18.0
27212,other,0.0,COD2023100615,18.0
27213,hydro,5.5,COD2023100615,18.0
27214,solar,6.5,COD2023100615,18.0


# Almacenamiento

 Se realiza el almacenamiento de los datos obtenidos, en formato .csv en la carpeta archivos.

In [121]:
# Variable que almacena la ruta de la carpeta que almacenará los datos extraidos, en formato.csv

dir_path = f"{os.path.dirname(os.getcwd())}/archivos"

In [122]:
df_list = [df_factors, df_intensity, df_regional, df_date, df_regions, df_generation]
file_name_list = ['factors.csv', 'intensity.csv', 'regional.csv', 'date.csv', 'regions.csv', 'generation.csv']


# Debido a que la funcion


for df, file_name in zip(df_list, file_name_list):
    save_to_csv(df, f'{dir_path}/{file_name}')

DataFrame guardado exitosamente en formato CSV.
DataFrame guardado exitosamente en formato CSV.
DataFrame guardado exitosamente en formato CSV.
DataFrame guardado exitosamente en formato CSV.
DataFrame guardado exitosamente en formato CSV.
DataFrame guardado exitosamente en formato CSV.
