In [8]:
# Importaciones esenciales para el manejo de datos y servicios de XM
from pydataxm import *  # Incluye todas las funcionalidades de pydataxm
import datetime as dt  # Módulo para operaciones de fecha y hora
from pydataxm.pydataxm import ReadDB as apiXM  # Clase para interactuar con la base de datos de XM
import pandas as pd  # Biblioteca de análisis de datos con estructuras de datos optimizadas

In [9]:
# Instanciación del cliente de la API para consultas a la base de datos XM
objetoAPI = pydataxm.ReadDB()

In [10]:
# Obtención de colecciones de datos disponibles y almacenamiento en DataFrame
df = objetoAPI.get_collections()

In [11]:
# Exportación del DataFrame a un archivo Excel para análisis externo
df.to_excel('Collections.xlsx')


PermissionError: [Errno 13] Permission denied: 'Collections.xlsx'

In [None]:
# Definición de rangos de fechas para la consulta de datos
FechaIni = dt.date(2016, 1, 1)  # Fecha de inicio: 1 de enero de 2016
FechaFin = dt.date(2024, 3, 31)  # Fecha de fin: 31 de marzo de 2024

In [None]:
# Consulta de demanda real de energía y almacenamiento en DataFrame
# Descripción de la caracteristica: Demanda de usuarios regulados y no regulados que hacen parte del Sistema Interconectado Nacional (no incluye Alumbrado Público)
# Unidad: kWh
df_DemandaReal = apiXM.request_data(
    pydataxm.ReadDB(),  # Instancia de conexión a la base de datos
    "DemaReal",  # Identificador de la métrica de demanda real
    "Sistema",  # Contexto de la consulta (Sistema, Agente, Recurso, Comercializador)
    FechaIni,  # Fecha inicial
    FechaFin  # Fecha final
)


  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
# Inspección del DataFrame para verificar la estructura y los tipos de datos
df_DemandaReal.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3013 entries, 0 to 29
Data columns (total 27 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Id             3013 non-null   object        
 1   Values_code    3013 non-null   object        
 2   Values_Hour01  3013 non-null   float64       
 3   Values_Hour02  3013 non-null   float64       
 4   Values_Hour03  3013 non-null   float64       
 5   Values_Hour04  3013 non-null   float64       
 6   Values_Hour05  3013 non-null   float64       
 7   Values_Hour06  3013 non-null   float64       
 8   Values_Hour07  3013 non-null   float64       
 9   Values_Hour08  3013 non-null   float64       
 10  Values_Hour09  3013 non-null   float64       
 11  Values_Hour10  3013 non-null   float64       
 12  Values_Hour11  3013 non-null   float64       
 13  Values_Hour12  3013 non-null   float64       
 14  Values_Hour13  3013 non-null   float64       
 15  Values_Hour14  3013 non-null

In [None]:
# Visualización de las primeras 5 filas para confirmar la correcta carga de datos
df_DemandaReal.head(5)

Unnamed: 0,Id,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Date
0,Sistema,Sistema,9022910.99,8717002.5,8455331.7,8321012.21,8490561.35,8972793.43,9086139.28,9488891.47,...,11175118.2,10979429.87,10646908.54,11071413.28,11266601.17,11131735.51,10723914.57,9960508.14,9244263.73,2024-03-19
1,Sistema,Sistema,8826137.11,8473844.37,8253345.12,8149395.38,8292560.3,8772027.43,8897405.85,9296201.92,...,11169129.77,10948888.41,10688970.92,11068439.78,11265759.48,11066405.4,10534842.05,9901810.77,9388418.39,2024-03-20
2,Sistema,Sistema,8750266.1,8466633.57,8220863.34,8092842.26,8288369.5,8754509.08,8886286.61,9291850.65,...,11096512.85,10937710.13,10518017.49,11028799.87,11172188.6,11018741.25,10531203.46,9938341.27,9340223.2,2024-03-21
3,Sistema,Sistema,8818982.07,8487870.88,8277646.19,8182367.85,8343691.19,8738461.72,8841526.46,9257423.57,...,11124231.56,10979160.29,10585816.71,11096328.39,11475255.64,11312600.1,10733207.25,10123725.42,9500902.7,2024-03-22
4,Sistema,Sistema,9074095.51,8773310.81,8573595.87,8424851.44,8404944.85,8308649.76,8376542.15,8879669.15,...,10054593.59,9916164.16,9905067.53,10569881.56,10717042.58,10426598.6,10119694.42,9672544.43,9227762.0,2024-03-23


In [None]:
# Enriquecimiento del DataFrame con columnas de fecha desglosadas
df_DemandaReal['Year'] = df_DemandaReal['Date'].dt.year  # Año extraído de la columna 'Date'
df_DemandaReal['Month'] = df_DemandaReal['Date'].dt.month  # Mes extraído de la columna 'Date'
df_DemandaReal['Day'] = df_DemandaReal['Date'].dt.day  # Día extraído de la columna 'Date'

In [None]:
df_DemandaReal.head(5)

Unnamed: 0,Id,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Date,Year,Month,Day
0,Sistema,Sistema,9022910.99,8717002.5,8455331.7,8321012.21,8490561.35,8972793.43,9086139.28,9488891.47,...,11071413.28,11266601.17,11131735.51,10723914.57,9960508.14,9244263.73,2024-03-19,2024,3,19
1,Sistema,Sistema,8826137.11,8473844.37,8253345.12,8149395.38,8292560.3,8772027.43,8897405.85,9296201.92,...,11068439.78,11265759.48,11066405.4,10534842.05,9901810.77,9388418.39,2024-03-20,2024,3,20
2,Sistema,Sistema,8750266.1,8466633.57,8220863.34,8092842.26,8288369.5,8754509.08,8886286.61,9291850.65,...,11028799.87,11172188.6,11018741.25,10531203.46,9938341.27,9340223.2,2024-03-21,2024,3,21


In [None]:
# Preparación de listas de nombres de columnas para transformaciones de datos
columns_name = list(df_DemandaReal.columns)[2:26]  # Nombres de columnas de la tercera a la vigésimo sexta
index_name = list(df_DemandaReal.columns)[26::]  # Nombres de columnas desde la vigésimo séptima en adelante


In [None]:
# Renombrado de columnas por índices numéricos para facilitar la manipulación
icont = 1  # Inicialización del contador para el renombrado
for iname in columns_name:
    df_DemandaReal.rename(columns={iname: icont}, inplace=True)  # Asignación de índices numéricos
    icont += 1  # Incremento del contador

In [None]:
df_DemandaReal.head(3)

Unnamed: 0,Id,Values_code,1,2,3,4,5,6,7,8,...,19,20,21,22,23,24,Date,Year,Month,Day
0,Sistema,Sistema,9022910.99,8717002.5,8455331.7,8321012.21,8490561.35,8972793.43,9086139.28,9488891.47,...,11071413.28,11266601.17,11131735.51,10723914.57,9960508.14,9244263.73,2024-03-19,2024,3,19
1,Sistema,Sistema,8826137.11,8473844.37,8253345.12,8149395.38,8292560.3,8772027.43,8897405.85,9296201.92,...,11068439.78,11265759.48,11066405.4,10534842.05,9901810.77,9388418.39,2024-03-20,2024,3,20
2,Sistema,Sistema,8750266.1,8466633.57,8220863.34,8092842.26,8288369.5,8754509.08,8886286.61,9291850.65,...,11028799.87,11172188.6,11018741.25,10531203.46,9938341.27,9340223.2,2024-03-21,2024,3,21


In [None]:
# Transformación del DataFrame de formato ancho a largo (unpivot/melt)
df_unpivot_dem = pd.melt(
    df_DemandaReal,
    id_vars=index_name,  # Columnas de identificación
    value_vars=columns_name,  # Columnas de valores a transformar
    var_name='Hour',  # Nombre de la nueva columna de horas
    value_name='Demanda'  # Nombre de la nueva columna de demanda
)
df_unpivot_dem.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72312 entries, 0 to 72311
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     72312 non-null  datetime64[ns]
 1   Year     72312 non-null  int32         
 2   Month    72312 non-null  int32         
 3   Day      72312 non-null  int32         
 4   Hour     72312 non-null  object        
 5   Demanda  72312 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int32(3), object(1)
memory usage: 2.5+ MB


In [None]:
df_unpivot_dem.head(30)

Unnamed: 0,Date,Year,Month,Day,Hour,Demanda
0,2024-03-19,2024,3,19,1,9022910.99
1,2024-03-20,2024,3,20,1,8826137.11
2,2024-03-21,2024,3,21,1,8750266.1
3,2024-03-22,2024,3,22,1,8818982.07
4,2024-03-23,2024,3,23,1,9074095.51
5,2024-03-24,2024,3,24,1,8745515.37
6,2024-03-25,2024,3,25,1,8368009.87
7,2024-03-26,2024,3,26,1,8539653.0
8,2024-03-27,2024,3,27,1,8489793.54
9,2024-03-28,2024,3,28,1,8381845.24


In [None]:
df_unpivot_dem.to_excel('DemandaReal_hora.xlsx')

In [None]:
# Agrupación y cálculo de la media de demanda por día
group_dem_day = df_unpivot_dem.groupby('Date').mean()


In [None]:
group_dem_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3013 entries, 2016-01-01 to 2024-03-31
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     3013 non-null   float64
 1   Month    3013 non-null   float64
 2   Day      3013 non-null   float64
 3   Hour     3013 non-null   object 
 4   Demanda  3013 non-null   float64
dtypes: float64(4), object(1)
memory usage: 141.2+ KB


In [None]:
group_dem_day.head(30)

Unnamed: 0_level_0,Year,Month,Day,Hour,Demanda
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,2016.0,1.0,1.0,12.5,5885745.0
2016-01-02,2016.0,1.0,2.0,12.5,6352403.0
2016-01-03,2016.0,1.0,3.0,12.5,6285871.0
2016-01-04,2016.0,1.0,4.0,12.5,7226923.0
2016-01-05,2016.0,1.0,5.0,12.5,7485122.0
2016-01-06,2016.0,1.0,6.0,12.5,7541161.0
2016-01-07,2016.0,1.0,7.0,12.5,7585950.0
2016-01-08,2016.0,1.0,8.0,12.5,7665034.0
2016-01-09,2016.0,1.0,9.0,12.5,7319285.0
2016-01-10,2016.0,1.0,10.0,12.5,6542415.0


In [None]:
# Consulta de volumen útil diario de energía
# Descripción de la caracteristica: Volumen almacenado por encima del Nivel Minimo Tecnico, reportado diariamente por los agentes. 
# En % corresponde a la relacion entre el Volumen Util Diario y la Capacidad Util del Embalse
# Unidad: kwh
df_VolumenU = apiXM.request_data(
    pydataxm.ReadDB(),  # Instancia de conexión a la base de datos
    "VoluUtilDiarEner",  # Identificador de la métrica de volumen útil diario de energía
    "Sistema",  # Contexto de la consulta
    FechaIni,  # Fecha inicial
    FechaFin  # Fecha final
)
# Se renombra la columna 'Value' a 'VolUtil' para reflejar más claramente que representa el volumen útil
df_VolumenU.rename(columns={'Value': 'VolUtil'}, inplace=True)

  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
df_VolumenU.head(30)   

Unnamed: 0,Id,VolUtil,Date
0,Sistema,6133242000.0,2024-03-19
1,Sistema,6046994000.0,2024-03-20
2,Sistema,5978091000.0,2024-03-21
3,Sistema,5900957000.0,2024-03-22
4,Sistema,5823071000.0,2024-03-23
5,Sistema,5756010000.0,2024-03-24
6,Sistema,5700606000.0,2024-03-25
7,Sistema,5633694000.0,2024-03-26
8,Sistema,5585317000.0,2024-03-27
9,Sistema,5555902000.0,2024-03-28


In [None]:
df_VolumenU.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3013 entries, 0 to 29
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Id       3013 non-null   object        
 1   VolUtil  3013 non-null   float64       
 2   Date     3013 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 94.2+ KB


In [None]:
# Fusión de DataFrames para crear un modelo de datos con demanda y volumen útil
df_DataModel = group_dem_day.merge(
    df_VolumenU,
    on='Date',  # Columna clave para la fusión
    how='inner'  # Tipo de fusión: solo se incluyen coincidencias
)[['Date', 'Demanda', 'VolUtil']]  # Selección de columnas relevantes

In [None]:
df_DataModel.info(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3013 entries, 0 to 3012
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     3013 non-null   datetime64[ns]
 1   Demanda  3013 non-null   float64       
 2   VolUtil  3013 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 70.7 KB


In [None]:
df_DataModel.head(24)

Unnamed: 0,Date,Demanda,VolUtil
0,2016-01-01,5885745.0,10555410000.0
1,2016-01-02,6352403.0,10537370000.0
2,2016-01-03,6285871.0,10528410000.0
3,2016-01-04,7226923.0,10507660000.0
4,2016-01-05,7485122.0,10474680000.0
5,2016-01-06,7541161.0,10443030000.0
6,2016-01-07,7585950.0,10412610000.0
7,2016-01-08,7665034.0,10358770000.0
8,2016-01-09,7319285.0,10315240000.0
9,2016-01-10,6542415.0,10281740000.0


In [None]:
# Consulta de aportes de energía al sistema y almacenamiento en DataFrame
# Descripción de la caracteristica: Caudales en energia de los rios que aportan agua a algun embalse del SIN
# Unidad: kWh
df_AporEner = apiXM.request_data(
    pydataxm.ReadDB(),  # Instancia de conexión a la base de datos
    "AporEner",         # Identificador de la métrica de aportes de energía
    "Sistema",          # Contexto de la consulta (Sistema, Agente, Recurso, Comercializador)
    FechaIni,           # Fecha inicial para el rango de la consulta
    FechaFin            # Fecha final para el rango de la consulta
)

  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
# Renombrado de la columna 'Value' a 'Aportes' para una mayor claridad semántica en el DataFrame 'df_AporEner'
df_AporEner.rename(columns={'Value': 'Aportes'}, inplace=True)

In [None]:
df_AporEner.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3013 entries, 0 to 29
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Id       3013 non-null   object        
 1   Aportes  3013 non-null   float64       
 2   Date     3013 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 94.2+ KB


In [None]:
df_AporEner.head(40)

Unnamed: 0,Id,Aportes,Date
0,Sistema,75342800.0,2024-03-19
1,Sistema,68679500.0,2024-03-20
2,Sistema,77489500.0,2024-03-21
3,Sistema,77256700.0,2024-03-22
4,Sistema,63960200.0,2024-03-23
5,Sistema,56055600.0,2024-03-24
6,Sistema,70352000.0,2024-03-25
7,Sistema,77876900.0,2024-03-26
8,Sistema,72693500.0,2024-03-27
9,Sistema,65809800.0,2024-03-28


In [None]:
# Fusión del DataFrame 'df_DataModel' con 'df_AporEner' para integrar los aportes de energía
df_DataModel = df_DataModel.merge(
    df_AporEner,  # DataFrame que contiene los aportes de energía
    on='Date',  # Clave de fusión basada en la columna 'Date'
    how='inner'  # Tipo de fusión: 'inner join' para incluir solo filas con fechas coincidentes
)[['Date', 'Demanda', 'VolUtil', 'Aportes']] 


In [None]:
# Impresión de las columnas del DataFrame 'df_DataModel' para verificación y depuración
print("Columns in df_DataModel: ", df_DataModel.columns)

# Impresión de las columnas del DataFrame 'df_AporEner' para asegurar la consistencia en la estructura de datos
print("Columns in group_PO_day: ", df_AporEner.columns)

Columns in df_DataModel:  Index(['Date', 'Demanda', 'VolUtil', 'Aportes'], dtype='object')
Columns in group_PO_day:  Index(['Id', 'Aportes', 'Date'], dtype='object')


In [None]:
df_DataModel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3013 entries, 0 to 3012
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     3013 non-null   datetime64[ns]
 1   Demanda  3013 non-null   float64       
 2   VolUtil  3013 non-null   float64       
 3   Aportes  3013 non-null   float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 94.3 KB


In [None]:
df_DataModel.head(20)

Unnamed: 0,Date,Demanda,VolUtil,Aportes
0,2016-01-01,5885745.0,10555410000.0,63496200.0
1,2016-01-02,6352403.0,10537370000.0,55776100.0
2,2016-01-03,6285871.0,10528410000.0,61744800.0
3,2016-01-04,7226923.0,10507660000.0,70137500.0
4,2016-01-05,7485122.0,10474680000.0,64121400.0
5,2016-01-06,7541161.0,10443030000.0,66516300.0
6,2016-01-07,7585950.0,10412610000.0,66291100.0
7,2016-01-08,7665034.0,10358770000.0,56916800.0
8,2016-01-09,7319285.0,10315240000.0,58531700.0
9,2016-01-10,6542415.0,10281740000.0,55702100.0


In [None]:
# Realización de una consulta para obtener los precios de bolsa nacionales y almacenamiento en DataFrame
# Descripción de la caracteristica: Precio de oferta de la última planta flexible para atender la demanda 
# comercial nacional, más delta de incremento para remunerar los costos no cubiertos de las plantas térmicas en el despacho ideal.
# Unidad: COP/kWh
df_PrecioBolsa = apiXM.request_data(
    pydataxm.ReadDB(),  # Instancia de conexión a la base de datos
    "PrecBolsNaci",     # Identificador de la métrica de precios de bolsa nacionales
    "Sistema",          # Contexto de la consulta (Sistema, Agente, Recurso, Comercializador)
    FechaIni,           # Fecha inicial para el rango de la consulta
    FechaFin            # Fecha final para el rango de la consulta
)


  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
df_PrecioBolsa.head(5)  

Unnamed: 0,Id,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Date
0,Sistema,Sistema,595.35896,574.35896,574.35896,574.35896,574.35896,595.35896,598.35896,598.35896,...,649.35896,649.35896,630.35896,654.35896,654.35896,654.35896,649.35896,625.35896,598.35896,2024-03-19
1,Sistema,Sistema,641.9728,630.7728,571.7728,550.7728,550.7728,628.7728,628.7728,628.7728,...,641.9728,645.7728,641.9728,641.9728,645.7728,641.9728,641.9728,628.7728,628.7728,2024-03-20
2,Sistema,Sistema,658.67004,658.67004,641.79304,641.79304,641.79304,658.67004,658.67004,641.79304,...,658.67004,658.67004,658.67004,665.67004,668.67004,668.67004,658.67004,658.67004,641.79304,2024-03-21
3,Sistema,Sistema,713.51441,708.51441,696.51441,696.51441,708.51441,671.51441,671.51441,671.51441,...,718.51441,728.63741,728.63741,728.63741,728.63741,728.63741,713.51441,713.51441,708.51441,2024-03-22
4,Sistema,Sistema,773.68371,718.86971,718.86971,718.86971,718.86971,718.86971,464.95071,638.68371,...,718.86971,718.86971,718.86971,799.68371,798.68371,773.68371,773.68371,718.86971,718.86971,2024-03-23


In [None]:
# Enriquecimiento del DataFrame 'df_PrecioBolsa' con columnas de fecha desglosadas para análisis temporal
df_PrecioBolsa['Year'] = df_PrecioBolsa['Date'].dt.year  # Extracción del año de la columna 'Date'
df_PrecioBolsa['Month'] = df_PrecioBolsa['Date'].dt.month  # Extracción del mes de la columna 'Date'
df_PrecioBolsa['Day'] = df_PrecioBolsa['Date'].dt.day  # Extracción del día de la columna 'Date'


In [None]:
# Generación de listas para segmentación de columnas, facilitando operaciones de transformación de datos
columns_name = list(df_PrecioBolsa.columns)[2:26]  # Nombres de columnas para variables independientes
index_name = list(df_PrecioBolsa.columns)[26::]  # Nombres de columnas para índices o variables dependientes

In [None]:
icont=1  # Inicializa el contador que se usará para los nuevos nombres de las columnas.
# Bucle para iterar a través de cada nombre de columna en 'columns_name'.
for iname in columns_name:
    # Renombra la columna actual 'iname' a un valor numérico basado en 'icont'.
    df_PrecioBolsa.rename(columns={iname:icont}, inplace=True)
    # Incrementa 'icont' en 1 para el siguiente nombre de columna.
    icont+=1


In [None]:
df_PrecioBolsa.head(10)

Unnamed: 0,Id,Values_code,1,2,3,4,5,6,7,8,...,19,20,21,22,23,24,Date,Year,Month,Day
0,Sistema,Sistema,595.35896,574.35896,574.35896,574.35896,574.35896,595.35896,598.35896,598.35896,...,654.35896,654.35896,654.35896,649.35896,625.35896,598.35896,2024-03-19,2024,3,19
1,Sistema,Sistema,641.9728,630.7728,571.7728,550.7728,550.7728,628.7728,628.7728,628.7728,...,641.9728,645.7728,641.9728,641.9728,628.7728,628.7728,2024-03-20,2024,3,20
2,Sistema,Sistema,658.67004,658.67004,641.79304,641.79304,641.79304,658.67004,658.67004,641.79304,...,665.67004,668.67004,668.67004,658.67004,658.67004,641.79304,2024-03-21,2024,3,21
3,Sistema,Sistema,713.51441,708.51441,696.51441,696.51441,708.51441,671.51441,671.51441,671.51441,...,728.63741,728.63741,728.63741,713.51441,713.51441,708.51441,2024-03-22,2024,3,22
4,Sistema,Sistema,773.68371,718.86971,718.86971,718.86971,718.86971,718.86971,464.95071,638.68371,...,799.68371,798.68371,773.68371,773.68371,718.86971,718.86971,2024-03-23,2024,3,23
5,Sistema,Sistema,729.0539,729.0539,717.1579,716.9579,700.9579,558.8909,517.2169,528.2259,...,717.1579,729.0539,729.0539,717.1579,717.1579,716.9579,2024-03-24,2024,3,24
6,Sistema,Sistema,787.5415,758.5415,719.5415,719.5415,658.5415,658.5415,658.5415,658.5415,...,838.5415,848.5415,848.5415,838.5415,787.5415,758.5415,2024-03-25,2024,3,25
7,Sistema,Sistema,848.48844,838.48844,838.48844,838.48844,802.85844,787.48844,798.48844,798.48844,...,853.48844,853.48844,853.48844,848.48844,838.48844,823.48844,2024-03-26,2024,3,26
8,Sistema,Sistema,993.43951,988.43951,858.43951,858.43951,858.43951,858.43951,858.43951,858.43951,...,1013.43951,1013.43951,1013.43951,1008.43951,858.43951,858.43951,2024-03-27,2024,3,27
9,Sistema,Sistema,1014.95783,981.55283,863.83483,863.83483,863.83483,863.83483,858.83483,598.83483,...,863.83483,1014.95783,1014.95783,1014.95783,1014.95783,858.83483,2024-03-28,2024,3,28


In [None]:
# Utiliza 'pd.melt' para convertir el DataFrame 'df_PrecioBolsa' de un formato ancho a uno largo.
df_unpivotPB = pd.melt(
    df_PrecioBolsa,  # DataFrame original que se va a transformar.
    id_vars=index_name,  # Columnas que se mantendrán como identificadores.
    value_vars=columns_name,  # Columnas cuyos valores se convertirán en valores de una sola columna.
    var_name='Hour',  # Nombre de la nueva columna que contendrá los nombres de las columnas originales.
    value_name='PrecioB'  # Nombre de la nueva columna que contendrá los valores de las columnas originales.
)


In [None]:
df_unpivotPB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72312 entries, 0 to 72311
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     72312 non-null  datetime64[ns]
 1   Year     72312 non-null  int32         
 2   Month    72312 non-null  int32         
 3   Day      72312 non-null  int32         
 4   Hour     72312 non-null  object        
 5   PrecioB  72312 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int32(3), object(1)
memory usage: 2.5+ MB


In [None]:
df_unpivotPB.head(30)

Unnamed: 0,Date,Year,Month,Day,Hour,PrecioB
0,2024-03-19,2024,3,19,1,595.35896
1,2024-03-20,2024,3,20,1,641.9728
2,2024-03-21,2024,3,21,1,658.67004
3,2024-03-22,2024,3,22,1,713.51441
4,2024-03-23,2024,3,23,1,773.68371
5,2024-03-24,2024,3,24,1,729.0539
6,2024-03-25,2024,3,25,1,787.5415
7,2024-03-26,2024,3,26,1,848.48844
8,2024-03-27,2024,3,27,1,993.43951
9,2024-03-28,2024,3,28,1,1014.95783


In [None]:
# Agrupa los datos en 'df_unpivotPB' por la columna 'Date' y calcula la media de cada grupo.
group_PB_day = df_unpivotPB.groupby('Date').mean()


In [None]:
group_PB_day.head(10)

Unnamed: 0_level_0,Year,Month,Day,Hour,PrecioB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,2016.0,1.0,1.0,12.5,407.565972
2016-01-02,2016.0,1.0,2.0,12.5,407.627913
2016-01-03,2016.0,1.0,3.0,12.5,499.329203
2016-01-04,2016.0,1.0,4.0,12.5,535.140967
2016-01-05,2016.0,1.0,5.0,12.5,573.129733
2016-01-06,2016.0,1.0,6.0,12.5,633.454963
2016-01-07,2016.0,1.0,7.0,12.5,629.537537
2016-01-08,2016.0,1.0,8.0,12.5,587.248523
2016-01-09,2016.0,1.0,9.0,12.5,496.745422
2016-01-10,2016.0,1.0,10.0,12.5,533.0209


In [None]:
# Combina 'df_DataModel' con 'group_PB_day' basándose en la columna 'Date'.
df_DataModel = df_DataModel.merge(
    group_PB_day,  # DataFrame que contiene los promedios diarios de precios.
    on='Date',  # Columna clave común para la combinación.
    how='inner'  # Tipo de combinación: solo se mantienen las filas que tienen coincidencia en ambos DataFrames.
)[['Date', 'Demanda', 'VolUtil', 'Aportes', 'PrecioB']]  # Selecciona columnas específicas para el DataFrame resultante.


In [None]:
print("Columnas en df_DataModel: ", df_DataModel.columns)
print("Columnas en group_PO_day: ", group_PB_day.columns)

Columns in df_DataModel:  Index(['Date', 'Demanda', 'VolUtil', 'Aportes', 'PrecioB'], dtype='object')
Columns in group_PO_day:  Index(['Year', 'Month', 'Day', 'Hour', 'PrecioB'], dtype='object')


In [None]:
df_DataModel.head(10)

Unnamed: 0,Date,Demanda,VolUtil,Aportes,PrecioB
0,2016-01-01,5885745.0,10555410000.0,63496200.0,407.565972
1,2016-01-02,6352403.0,10537370000.0,55776100.0,407.627913
2,2016-01-03,6285871.0,10528410000.0,61744800.0,499.329203
3,2016-01-04,7226923.0,10507660000.0,70137500.0,535.140967
4,2016-01-05,7485122.0,10474680000.0,64121400.0,573.129733
5,2016-01-06,7541161.0,10443030000.0,66516300.0,633.454963
6,2016-01-07,7585950.0,10412610000.0,66291100.0,629.537537
7,2016-01-08,7665034.0,10358770000.0,56916800.0,587.248523
8,2016-01-09,7319285.0,10315240000.0,58531700.0,496.745422
9,2016-01-10,6542415.0,10281740000.0,55702100.0,533.0209


In [None]:
df_DataModel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3013 entries, 0 to 3012
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     3013 non-null   datetime64[ns]
 1   Demanda  3013 non-null   float64       
 2   VolUtil  3013 non-null   float64       
 3   Aportes  3013 non-null   float64       
 4   PrecioB  3013 non-null   float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 117.8 KB


In [None]:
# Realización de una consulta para obtener los precios de oferta nacionales y almacenamiento en DataFrame
# Descripción de la caracteristica: Es el precio de la energia de una recurso de generación para cada una de las 24 horas de un dia. 
# Difiere del precio de oferta de despacho en que incluye el CERE en lugar del CEE
# Unidad: COP/kWh
df_PrecioOfe= apiXM.request_data(pydataxm.ReadDB(),    #Se indica el objeto que contiene el serivicio
                        "PrecOferIdeal",                #Se indica el nombre de la métrica tal como se llama en el campo metricID
                        "Recurso",                       #Campo que indica si es Sistema, Agente, Recurso, Comercializador
                        FechaIni,                       #Corresponde a la fecha inicial de la consulta
                        FechaFin)                       #Corresponde a la fecha final de la consulta

  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
df_PrecioOfe.head(30) 

Unnamed: 0,Id,Code,Value,Date
0,Recurso,2QEK,91.23404,2024-03-19
1,Recurso,3ENA,225.58204,2024-03-19
2,Recurso,3IRX,406.43404,2024-03-19
3,Recurso,ALBG,607.43404,2024-03-19
4,Recurso,CHBG,597.43404,2024-03-19
5,Recurso,CHVR,833.63404,2024-03-19
6,Recurso,CLL1,79.43404,2024-03-19
7,Recurso,CLMG,1018.43404,2024-03-19
8,Recurso,CTG1,1320.55004,2024-03-19
9,Recurso,CTG2,1385.35804,2024-03-19


In [None]:
# Agrupa 'df_PrecioOfe' por 'Date' y calcula la media de la columna 'Value'.
group_PO_day = df_PrecioOfe.groupby('Date')['Value'].mean()

In [None]:
group_PO_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3013 entries, 2016-01-01 to 2024-03-31
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Value   3013 non-null   float64
dtypes: float64(1)
memory usage: 47.1 KB


In [None]:
# Cambia el nombre de la columna 'Value' a 'PrecioO' en el DataFrame 'group_PO_day'.
group_PO_day.rename(columns={'Value':'PrecioO'}, inplace=True)

In [None]:
group_PO_day.head()

Unnamed: 0_level_0,PrecioO
Date,Unnamed: 1_level_1
2016-01-01,414.206983
2016-01-02,418.642247
2016-01-03,426.29004
2016-01-04,414.717455
2016-01-05,427.959078


In [None]:
# Combina 'df_DataModel' con 'group_PO_day' basándose en la columna 'Date'.
df_DataModel = df_DataModel.merge(
    group_PO_day,  # DataFrame que contiene los promedios diarios de precios.
    on='Date',  # Columna clave común para la combinación.
    how='inner'  # Tipo de combinación: solo se mantienen las filas que tienen coincidencia en ambos DataFrames.
)[['Date', 'Demanda', 'VolUtil', 'Aportes', 'PrecioB', 'PrecioO']]  # Selecciona columnas específicas para el DataFrame resultante.
s

In [None]:
print("Columnas en df_DataModel: ", df_DataModel.columns)
print("Columnas en group_PO_day: ", group_PO_day.columns)


Columns in df_DataModel:  Index(['Date', 'Demanda', 'VolUtil', 'Aportes', 'PrecioB', 'PrecioO'], dtype='object')
Columns in group_PO_day:  Index(['PrecioO'], dtype='object')


In [None]:
df_DataModel.head(30)

Unnamed: 0,Date,Demanda,VolUtil,Aportes,PrecioB,PrecioO
0,2016-01-01,5885745.0,10555410000.0,63496200.0,407.565972,414.206983
1,2016-01-02,6352403.0,10537370000.0,55776100.0,407.627913,418.642247
2,2016-01-03,6285871.0,10528410000.0,61744800.0,499.329203,426.29004
3,2016-01-04,7226923.0,10507660000.0,70137500.0,535.140967,414.717455
4,2016-01-05,7485122.0,10474680000.0,64121400.0,573.129733,427.959078
5,2016-01-06,7541161.0,10443030000.0,66516300.0,633.454963,415.004776
6,2016-01-07,7585950.0,10412610000.0,66291100.0,629.537537,415.149078
7,2016-01-08,7665034.0,10358770000.0,56916800.0,587.248523,430.419115
8,2016-01-09,7319285.0,10315240000.0,58531700.0,496.745422,453.829644
9,2016-01-10,6542415.0,10281740000.0,55702100.0,533.0209,449.891662


In [None]:
df_DataModel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3013 entries, 0 to 3012
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     3013 non-null   datetime64[ns]
 1   Demanda  3013 non-null   float64       
 2   VolUtil  3013 non-null   float64       
 3   Aportes  3013 non-null   float64       
 4   PrecioB  3013 non-null   float64       
 5   PrecioO  3013 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 141.4 KB


In [None]:
# Realización de una consulta para obtener la disponibilidad real y almacenamiento en DataFrame
# Descripción de la caracteristica: Disponibilidad promedio calculada a partir de la fecha de los eventos que modifican la 
# disponibilidad de las unidades de generacion de los generadores, asi como de la disponibilidad reportada al Centr
# Unidad: kWh
df_Disp= apiXM.request_data(pydataxm.ReadDB(),    #Se indica el objeto que contiene el serivicio
                        "DispoReal",                #Se indica el nombre de la métrica tal como se llama en el campo metricID
                        "Recurso",                 #Campo que indica si es Sistema, Agente, Recurso, Comercializador
                        FechaIni,       #Corresponde a la fecha inicial de la consulta
                        FechaFin)      #Corresponde a la fecha final de la consulta

  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
df_Disp.head(10)

Unnamed: 0,Id,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Date
0,Recurso,2QBW,,,,,,,,,...,,,,,,,,,,2024-03-19
1,Recurso,2QEK,,,,,,,,,...,,,,,,,,,,2024-03-19
2,Recurso,2QRL,,,,,,,,,...,,,,,,,,,,2024-03-19
3,Recurso,2QV2,,,,,,,,,...,,,,,,,,,,2024-03-19
4,Recurso,2R22,,,,,,,,,...,,,,,,,,,,2024-03-19
5,Recurso,2S6Q,,,,,,,,,...,,,,,,,,,,2024-03-19
6,Recurso,2S6S,,,,,,,,,...,,,,,,,,,,2024-03-19
7,Recurso,2S6U,,,,,,,,,...,,,,,,,,,,2024-03-19
8,Recurso,2S78,,,,,,,,,...,,,,,,,,,,2024-03-19
9,Recurso,2S8I,,,,,,,,,...,,,,,,,,,,2024-03-19


In [None]:
df_Disp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 445352 entries, 0 to 2138
Data columns (total 27 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   Id             445352 non-null  object        
 1   Values_code    445352 non-null  object        
 2   Values_Hour01  176396 non-null  float64       
 3   Values_Hour02  175967 non-null  float64       
 4   Values_Hour03  175932 non-null  float64       
 5   Values_Hour04  175933 non-null  float64       
 6   Values_Hour05  176035 non-null  float64       
 7   Values_Hour06  176986 non-null  float64       
 8   Values_Hour07  179627 non-null  float64       
 9   Values_Hour08  180407 non-null  float64       
 10  Values_Hour09  180952 non-null  float64       
 11  Values_Hour10  181012 non-null  float64       
 12  Values_Hour11  181031 non-null  float64       
 13  Values_Hour12  181112 non-null  float64       
 14  Values_Hour13  180918 non-null  float64       
 15  Values_

In [None]:
# Reemplaza todos los valores NaN en el DataFrame 'df_Disp' por 0.
df_Disp.fillna(0, inplace=True)

# Muestra un resumen informativo del DataFrame 'df_Disp'.
df_Disp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 445352 entries, 0 to 2138
Data columns (total 27 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   Id             445352 non-null  object        
 1   Values_code    445352 non-null  object        
 2   Values_Hour01  445352 non-null  float64       
 3   Values_Hour02  445352 non-null  float64       
 4   Values_Hour03  445352 non-null  float64       
 5   Values_Hour04  445352 non-null  float64       
 6   Values_Hour05  445352 non-null  float64       
 7   Values_Hour06  445352 non-null  float64       
 8   Values_Hour07  445352 non-null  float64       
 9   Values_Hour08  445352 non-null  float64       
 10  Values_Hour09  445352 non-null  float64       
 11  Values_Hour10  445352 non-null  float64       
 12  Values_Hour11  445352 non-null  float64       
 13  Values_Hour12  445352 non-null  float64       
 14  Values_Hour13  445352 non-null  float64       
 15  Values_

In [None]:
df_Disp.head(10)

Unnamed: 0,Id,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Date
0,Recurso,2QBW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
1,Recurso,2QEK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
2,Recurso,2QRL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
3,Recurso,2QV2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
4,Recurso,2R22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
5,Recurso,2S6Q,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
6,Recurso,2S6S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
7,Recurso,2S6U,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
8,Recurso,2S78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19
9,Recurso,2S8I,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-03-19


In [None]:
# Realización de una consulta para obtener el listado de los recursos  y almacenamiento en DataFrame
# Descripción de la caracteristica: Listado de recursos con sus atributos y que se encuentran en operación comercial 
# y pruebas conectados al Sistema Interconectado Nacional SIN
# Unidad: No aplica
df_Recurso= apiXM.request_data(pydataxm.ReadDB(),    #Se indica el objeto que contiene el serivicio
                        "ListadoRecursos",                #Se indica el nombre de la métrica tal como se llama en el campo metricID
                        "Sistema",                 #Campo que indica si es Sistema, Agente, Recurso, Comercializador
                        FechaIni,       #Corresponde a la fecha inicial de la consulta
                        FechaFin)      #Corresponde a la fecha final de la consulta

  data[col] = pd.to_numeric(data[col],errors='ignore')
  data['Date'] = pd.to_datetime(data['Date'],errors='ignore', format= '%Y-%m-%d')


In [None]:
df_Recurso.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 668 entries, 0 to 667
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Id                    668 non-null    object        
 1   Values_Code           668 non-null    object        
 2   Values_Name           668 non-null    object        
 3   Values_Type           668 non-null    object        
 4   Values_Disp           668 non-null    object        
 5   Values_RecType        668 non-null    object        
 6   Values_CompanyCode    668 non-null    object        
 7   Values_EnerSource     668 non-null    object        
 8   Values_OperStartdate  668 non-null    object        
 9   Values_State          668 non-null    object        
 10  Date                  668 non-null    datetime64[ns]
dtypes: datetime64[ns](1), object(10)
memory usage: 57.5+ KB


In [None]:
df_Recurso.head(10)

Unnamed: 0,Id,Values_Code,Values_Name,Values_Type,Values_Disp,Values_RecType,Values_CompanyCode,Values_EnerSource,Values_OperStartdate,Values_State,Date
0,Sistema,2QBW,EL POPAL,HIDRAULICA,NO DESPACHADO CENTRALMENTE,FILO DE AGUA,ISGG,AGUA,2014-03-31,OPERACION,2024-04-26
1,Sistema,2QEK,SALTO II,HIDRAULICA,DESPACHADO CENTRALMENTE,FILO DE AGUA,ENDG,AGUA,2014-06-25,OPERACION,2024-04-26
2,Sistema,2QRL,LA REBUSCA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,HZEG,AGUA,2014-07-24,OPERACION,2024-04-26
3,Sistema,2QV2,BAJO TULUA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,EPSG,AGUA,2015-01-30,OPERACION,2024-04-26
4,Sistema,2R22,LAGUNETA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,ENDG,AGUA,2014-12-17,OPERACION,2024-04-26
5,Sistema,2S6Q,AUTOG YAGUARITO,TERMICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EMSG,BIOGAS,2016-03-22,OPERACION,2024-04-26
6,Sistema,2S6S,AUTOG ARGOS YUMBO,TERMICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EPSG,CARBON,2016-03-20,OPERACION,2024-04-26
7,Sistema,2S6U,AUTOG ARGOS EL CAIRO,HIDRAULICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EPMG,AGUA,2017-09-29,OPERACION,2024-04-26
8,Sistema,2S78,LA FRISOLERA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,GPYG,AGUA,2016-04-29,OPERACION,2024-04-26
9,Sistema,2S8G,AUTOG ARGOS TOLUVIEJO,TERMICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EPSG,CARBON,2016-03-03,PRUEBAS,2024-04-26


In [None]:
# Combina 'df_Disp' con 'df_Recurso' utilizando una combinación izquierda (left join).
df_Disp_Type = df_Disp.merge(
    df_Recurso,  # DataFrame que contiene información adicional de recursos.
    left_on=['Values_code'],  # Columna del DataFrame de la izquierda para la combinación.
    right_on=['Values_Code'],  # Columna del DataFrame de la derecha para la combinación.
    how='left'  # Tipo de combinación: se incluyen todas las filas de 'df_Disp' y las coincidencias de 'df_Recurso'.
)

In [None]:
df_Disp_Type.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 445352 entries, 0 to 445351
Data columns (total 38 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   Id_x                  445352 non-null  object        
 1   Values_code           445352 non-null  object        
 2   Values_Hour01         445352 non-null  float64       
 3   Values_Hour02         445352 non-null  float64       
 4   Values_Hour03         445352 non-null  float64       
 5   Values_Hour04         445352 non-null  float64       
 6   Values_Hour05         445352 non-null  float64       
 7   Values_Hour06         445352 non-null  float64       
 8   Values_Hour07         445352 non-null  float64       
 9   Values_Hour08         445352 non-null  float64       
 10  Values_Hour09         445352 non-null  float64       
 11  Values_Hour10         445352 non-null  float64       
 12  Values_Hour11         445352 non-null  float64       
 13 

In [None]:
df_Disp_Type.head(10)

Unnamed: 0,Id_x,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Code,Values_Name,Values_Type,Values_Disp,Values_RecType,Values_CompanyCode,Values_EnerSource,Values_OperStartdate,Values_State,Date_y
0,Recurso,2QBW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QBW,EL POPAL,HIDRAULICA,NO DESPACHADO CENTRALMENTE,FILO DE AGUA,ISGG,AGUA,2014-03-31,OPERACION,2024-04-26
1,Recurso,2QEK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QEK,SALTO II,HIDRAULICA,DESPACHADO CENTRALMENTE,FILO DE AGUA,ENDG,AGUA,2014-06-25,OPERACION,2024-04-26
2,Recurso,2QRL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QRL,LA REBUSCA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,HZEG,AGUA,2014-07-24,OPERACION,2024-04-26
3,Recurso,2QV2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QV2,BAJO TULUA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,EPSG,AGUA,2015-01-30,OPERACION,2024-04-26
4,Recurso,2R22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2R22,LAGUNETA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,ENDG,AGUA,2014-12-17,OPERACION,2024-04-26
5,Recurso,2S6Q,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S6Q,AUTOG YAGUARITO,TERMICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EMSG,BIOGAS,2016-03-22,OPERACION,2024-04-26
6,Recurso,2S6S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S6S,AUTOG ARGOS YUMBO,TERMICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EPSG,CARBON,2016-03-20,OPERACION,2024-04-26
7,Recurso,2S6U,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S6U,AUTOG ARGOS EL CAIRO,HIDRAULICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EPMG,AGUA,2017-09-29,OPERACION,2024-04-26
8,Recurso,2S78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S78,LA FRISOLERA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,GPYG,AGUA,2016-04-29,OPERACION,2024-04-26
9,Recurso,2S8I,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S8I,AUTOG REFICAR,TERMICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,GECG,GAS,2016-04-26,OPERACION,2024-04-26


In [None]:
# Filtra 'df_Disp_Type' para obtener solo las filas donde 'Values_Type' es igual a 'TERMICA'.
df_Termica = df_Disp_Type[df_Disp_Type['Values_Type'] == 'TERMICA']

In [None]:
df_Termica.info()

<class 'pandas.core.frame.DataFrame'>
Index: 132299 entries, 5 to 445351
Data columns (total 38 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   Id_x                  132299 non-null  object        
 1   Values_code           132299 non-null  object        
 2   Values_Hour01         132299 non-null  float64       
 3   Values_Hour02         132299 non-null  float64       
 4   Values_Hour03         132299 non-null  float64       
 5   Values_Hour04         132299 non-null  float64       
 6   Values_Hour05         132299 non-null  float64       
 7   Values_Hour06         132299 non-null  float64       
 8   Values_Hour07         132299 non-null  float64       
 9   Values_Hour08         132299 non-null  float64       
 10  Values_Hour09         132299 non-null  float64       
 11  Values_Hour10         132299 non-null  float64       
 12  Values_Hour11         132299 non-null  float64       
 13  Valu

In [None]:
# Selecciona las columnas por hora y luego agrupa por 'Date_x' para sumar los valores.
columns_to_sum = ['Values_Hour01', 'Values_Hour02', 'Values_Hour03', 'Values_Hour04', 'Values_Hour05',
                  'Values_Hour06', 'Values_Hour07', 'Values_Hour08', 'Values_Hour09', 'Values_Hour10',
                  'Values_Hour11', 'Values_Hour12', 'Values_Hour13', 'Values_Hour14', 'Values_Hour15',
                  'Values_Hour16', 'Values_Hour17', 'Values_Hour18', 'Values_Hour19', 'Values_Hour20',
                  'Values_Hour21', 'Values_Hour22', 'Values_Hour23', 'Values_Hour24']
group_Dis_Ter_hour = df_Termica.groupby('Date_x')[columns_to_sum].sum()


In [None]:
# Restablece el índice del DataFrame 'group_Dis_Ter_hour', convirtiendo el índice en una columna.
group_Dis_Ter_hour.reset_index(inplace=True)


In [None]:
group_Dis_Ter_hour.head(10)

Unnamed: 0,Date_x,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,Values_Hour09,...,Values_Hour15,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24
0,2016-01-01,4012000.0,4051000.0,4102000.0,4102000.0,4102000.0,4102000.0,4102000.0,4087500.0,3982650.0,...,3985000.0,3985000.0,3724000.0,3724000.0,3990000.0,3934200.0,3860000.0,3879500.0,3976400.0,4013000.0
1,2016-01-02,4013000.0,4013000.0,4013000.0,3981500.0,3923000.0,3923000.0,3955250.0,4012000.0,4012000.0,...,3985000.0,3923133.0,3942333.0,3985000.0,3992833.0,4021000.0,4023000.0,4023000.0,4023000.0,4023000.0
2,2016-01-03,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4022000.0,4022000.0,...,3865350.0,3862000.0,3876516.0,3862000.0,3867000.0,3888000.0,3890000.0,3890000.0,3890000.0,3890000.0
3,2016-01-04,3881833.0,3873000.0,3873000.0,3873000.0,3873000.0,3873000.0,3872000.0,3870000.0,3869000.0,...,3835000.0,3835000.0,3905333.0,3911000.0,3916000.0,3937000.0,3939000.0,3939000.0,3940000.0,3940000.0
4,2016-01-05,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3916000.0,3894000.0,3893000.0,...,3867000.0,3867000.0,3867000.0,3868000.0,3873000.0,3894000.0,3896000.0,3896000.0,3897000.0,3897000.0
5,2016-01-06,3892000.0,3892000.0,3892000.0,3892000.0,3892000.0,3631000.0,3630000.0,3628000.0,3627000.0,...,4007000.0,4007000.0,4007000.0,4008000.0,4013000.0,4034000.0,4036000.0,4036000.0,4037000.0,4037000.0
6,2016-01-07,4108000.0,4108000.0,4108000.0,3967000.0,4006950.0,4108000.0,4107000.0,4105000.0,4104000.0,...,3816000.0,3817000.0,3817000.0,3800266.0,3830000.0,4166000.0,4204000.0,3943000.0,3944000.0,4205000.0
7,2016-01-08,4197000.0,4120933.0,4034000.0,4034000.0,3972716.0,3970000.0,3970000.0,4022000.0,3954000.0,...,3570000.0,3570000.0,3570000.0,3570000.0,3571000.0,3778000.0,3780000.0,3519000.0,3519000.0,3780000.0
8,2016-01-09,3683166.0,3652000.0,3652000.0,3652000.0,3652000.0,3667000.0,3637000.0,3636000.0,3636000.0,...,3572000.0,3592000.0,3556000.0,3472000.0,3734000.0,3755000.0,3757000.0,3496000.0,3496000.0,3757000.0
9,2016-01-10,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3496000.0,3756000.0,3756000.0,...,3471000.0,3472000.0,3472000.0,3472000.0,3734000.0,3755000.0,3920000.0,3639450.0,3608000.0,3869000.0


In [None]:
# Cambia el nombre de la columna 'Date_x' a 'Date' en el DataFrame 'group_Dis_Ter_hour'.
group_Dis_Ter_hour.rename(columns={'Date_x':'Date'}, inplace=True)

In [None]:
group_Dis_Ter_hour.head(10)

Unnamed: 0,Date,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,Values_Hour09,...,Values_Hour15,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24
0,2016-01-01,4012000.0,4051000.0,4102000.0,4102000.0,4102000.0,4102000.0,4102000.0,4087500.0,3982650.0,...,3985000.0,3985000.0,3724000.0,3724000.0,3990000.0,3934200.0,3860000.0,3879500.0,3976400.0,4013000.0
1,2016-01-02,4013000.0,4013000.0,4013000.0,3981500.0,3923000.0,3923000.0,3955250.0,4012000.0,4012000.0,...,3985000.0,3923133.0,3942333.0,3985000.0,3992833.0,4021000.0,4023000.0,4023000.0,4023000.0,4023000.0
2,2016-01-03,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4022000.0,4022000.0,...,3865350.0,3862000.0,3876516.0,3862000.0,3867000.0,3888000.0,3890000.0,3890000.0,3890000.0,3890000.0
3,2016-01-04,3881833.0,3873000.0,3873000.0,3873000.0,3873000.0,3873000.0,3872000.0,3870000.0,3869000.0,...,3835000.0,3835000.0,3905333.0,3911000.0,3916000.0,3937000.0,3939000.0,3939000.0,3940000.0,3940000.0
4,2016-01-05,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3916000.0,3894000.0,3893000.0,...,3867000.0,3867000.0,3867000.0,3868000.0,3873000.0,3894000.0,3896000.0,3896000.0,3897000.0,3897000.0
5,2016-01-06,3892000.0,3892000.0,3892000.0,3892000.0,3892000.0,3631000.0,3630000.0,3628000.0,3627000.0,...,4007000.0,4007000.0,4007000.0,4008000.0,4013000.0,4034000.0,4036000.0,4036000.0,4037000.0,4037000.0
6,2016-01-07,4108000.0,4108000.0,4108000.0,3967000.0,4006950.0,4108000.0,4107000.0,4105000.0,4104000.0,...,3816000.0,3817000.0,3817000.0,3800266.0,3830000.0,4166000.0,4204000.0,3943000.0,3944000.0,4205000.0
7,2016-01-08,4197000.0,4120933.0,4034000.0,4034000.0,3972716.0,3970000.0,3970000.0,4022000.0,3954000.0,...,3570000.0,3570000.0,3570000.0,3570000.0,3571000.0,3778000.0,3780000.0,3519000.0,3519000.0,3780000.0
8,2016-01-09,3683166.0,3652000.0,3652000.0,3652000.0,3652000.0,3667000.0,3637000.0,3636000.0,3636000.0,...,3572000.0,3592000.0,3556000.0,3472000.0,3734000.0,3755000.0,3757000.0,3496000.0,3496000.0,3757000.0
9,2016-01-10,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3496000.0,3756000.0,3756000.0,...,3471000.0,3472000.0,3472000.0,3472000.0,3734000.0,3755000.0,3920000.0,3639450.0,3608000.0,3869000.0


In [None]:
group_Dis_Ter_hour['Year']=group_Dis_Ter_hour['Date'].dt.year
group_Dis_Ter_hour['Month']=group_Dis_Ter_hour['Date'].dt.month
group_Dis_Ter_hour['Day']=group_Dis_Ter_hour['Date'].dt.day

In [None]:
group_Dis_Ter_hour.head(10)

Unnamed: 0,Date,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,Values_Hour09,...,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Year,Month,Day
0,2016-01-01,4012000.0,4051000.0,4102000.0,4102000.0,4102000.0,4102000.0,4102000.0,4087500.0,3982650.0,...,3724000.0,3990000.0,3934200.0,3860000.0,3879500.0,3976400.0,4013000.0,2016,1,1
1,2016-01-02,4013000.0,4013000.0,4013000.0,3981500.0,3923000.0,3923000.0,3955250.0,4012000.0,4012000.0,...,3985000.0,3992833.0,4021000.0,4023000.0,4023000.0,4023000.0,4023000.0,2016,1,2
2,2016-01-03,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4022000.0,4022000.0,...,3862000.0,3867000.0,3888000.0,3890000.0,3890000.0,3890000.0,3890000.0,2016,1,3
3,2016-01-04,3881833.0,3873000.0,3873000.0,3873000.0,3873000.0,3873000.0,3872000.0,3870000.0,3869000.0,...,3911000.0,3916000.0,3937000.0,3939000.0,3939000.0,3940000.0,3940000.0,2016,1,4
4,2016-01-05,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3916000.0,3894000.0,3893000.0,...,3868000.0,3873000.0,3894000.0,3896000.0,3896000.0,3897000.0,3897000.0,2016,1,5
5,2016-01-06,3892000.0,3892000.0,3892000.0,3892000.0,3892000.0,3631000.0,3630000.0,3628000.0,3627000.0,...,4008000.0,4013000.0,4034000.0,4036000.0,4036000.0,4037000.0,4037000.0,2016,1,6
6,2016-01-07,4108000.0,4108000.0,4108000.0,3967000.0,4006950.0,4108000.0,4107000.0,4105000.0,4104000.0,...,3800266.0,3830000.0,4166000.0,4204000.0,3943000.0,3944000.0,4205000.0,2016,1,7
7,2016-01-08,4197000.0,4120933.0,4034000.0,4034000.0,3972716.0,3970000.0,3970000.0,4022000.0,3954000.0,...,3570000.0,3571000.0,3778000.0,3780000.0,3519000.0,3519000.0,3780000.0,2016,1,8
8,2016-01-09,3683166.0,3652000.0,3652000.0,3652000.0,3652000.0,3667000.0,3637000.0,3636000.0,3636000.0,...,3472000.0,3734000.0,3755000.0,3757000.0,3496000.0,3496000.0,3757000.0,2016,1,9
9,2016-01-10,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3496000.0,3756000.0,3756000.0,...,3472000.0,3734000.0,3755000.0,3920000.0,3639450.0,3608000.0,3869000.0,2016,1,10


In [None]:
columns_name=list(group_Dis_Ter_hour.columns)[1:25]
index_name=list(group_Dis_Ter_hour.columns)[25::]

In [None]:
group_Dis_Ter_hour.head(10)

Unnamed: 0,Date,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,Values_Hour09,...,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24,Year,Month,Day
0,2016-01-01,4012000.0,4051000.0,4102000.0,4102000.0,4102000.0,4102000.0,4102000.0,4087500.0,3982650.0,...,3724000.0,3990000.0,3934200.0,3860000.0,3879500.0,3976400.0,4013000.0,2016,1,1
1,2016-01-02,4013000.0,4013000.0,4013000.0,3981500.0,3923000.0,3923000.0,3955250.0,4012000.0,4012000.0,...,3985000.0,3992833.0,4021000.0,4023000.0,4023000.0,4023000.0,4023000.0,2016,1,2
2,2016-01-03,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4022000.0,4022000.0,...,3862000.0,3867000.0,3888000.0,3890000.0,3890000.0,3890000.0,3890000.0,2016,1,3
3,2016-01-04,3881833.0,3873000.0,3873000.0,3873000.0,3873000.0,3873000.0,3872000.0,3870000.0,3869000.0,...,3911000.0,3916000.0,3937000.0,3939000.0,3939000.0,3940000.0,3940000.0,2016,1,4
4,2016-01-05,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3916000.0,3894000.0,3893000.0,...,3868000.0,3873000.0,3894000.0,3896000.0,3896000.0,3897000.0,3897000.0,2016,1,5
5,2016-01-06,3892000.0,3892000.0,3892000.0,3892000.0,3892000.0,3631000.0,3630000.0,3628000.0,3627000.0,...,4008000.0,4013000.0,4034000.0,4036000.0,4036000.0,4037000.0,4037000.0,2016,1,6
6,2016-01-07,4108000.0,4108000.0,4108000.0,3967000.0,4006950.0,4108000.0,4107000.0,4105000.0,4104000.0,...,3800266.0,3830000.0,4166000.0,4204000.0,3943000.0,3944000.0,4205000.0,2016,1,7
7,2016-01-08,4197000.0,4120933.0,4034000.0,4034000.0,3972716.0,3970000.0,3970000.0,4022000.0,3954000.0,...,3570000.0,3571000.0,3778000.0,3780000.0,3519000.0,3519000.0,3780000.0,2016,1,8
8,2016-01-09,3683166.0,3652000.0,3652000.0,3652000.0,3652000.0,3667000.0,3637000.0,3636000.0,3636000.0,...,3472000.0,3734000.0,3755000.0,3757000.0,3496000.0,3496000.0,3757000.0,2016,1,9
9,2016-01-10,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3496000.0,3756000.0,3756000.0,...,3472000.0,3734000.0,3755000.0,3920000.0,3639450.0,3608000.0,3869000.0,2016,1,10


In [None]:
icont=1
for iname in columns_name:
    group_Dis_Ter_hour.rename(columns={iname:icont},inplace=True)
    icont+=1

In [None]:
group_Dis_Ter_hour.head(10)

Unnamed: 0,Date,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,Year,Month,Day
0,2016-01-01,4012000.0,4051000.0,4102000.0,4102000.0,4102000.0,4102000.0,4102000.0,4087500.0,3982650.0,...,3724000.0,3990000.0,3934200.0,3860000.0,3879500.0,3976400.0,4013000.0,2016,1,1
1,2016-01-02,4013000.0,4013000.0,4013000.0,3981500.0,3923000.0,3923000.0,3955250.0,4012000.0,4012000.0,...,3985000.0,3992833.0,4021000.0,4023000.0,4023000.0,4023000.0,4023000.0,2016,1,2
2,2016-01-03,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4023000.0,4022000.0,4022000.0,...,3862000.0,3867000.0,3888000.0,3890000.0,3890000.0,3890000.0,3890000.0,2016,1,3
3,2016-01-04,3881833.0,3873000.0,3873000.0,3873000.0,3873000.0,3873000.0,3872000.0,3870000.0,3869000.0,...,3911000.0,3916000.0,3937000.0,3939000.0,3939000.0,3940000.0,3940000.0,2016,1,4
4,2016-01-05,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3917000.0,3916000.0,3894000.0,3893000.0,...,3868000.0,3873000.0,3894000.0,3896000.0,3896000.0,3897000.0,3897000.0,2016,1,5
5,2016-01-06,3892000.0,3892000.0,3892000.0,3892000.0,3892000.0,3631000.0,3630000.0,3628000.0,3627000.0,...,4008000.0,4013000.0,4034000.0,4036000.0,4036000.0,4037000.0,4037000.0,2016,1,6
6,2016-01-07,4108000.0,4108000.0,4108000.0,3967000.0,4006950.0,4108000.0,4107000.0,4105000.0,4104000.0,...,3800266.0,3830000.0,4166000.0,4204000.0,3943000.0,3944000.0,4205000.0,2016,1,7
7,2016-01-08,4197000.0,4120933.0,4034000.0,4034000.0,3972716.0,3970000.0,3970000.0,4022000.0,3954000.0,...,3570000.0,3571000.0,3778000.0,3780000.0,3519000.0,3519000.0,3780000.0,2016,1,8
8,2016-01-09,3683166.0,3652000.0,3652000.0,3652000.0,3652000.0,3667000.0,3637000.0,3636000.0,3636000.0,...,3472000.0,3734000.0,3755000.0,3757000.0,3496000.0,3496000.0,3757000.0,2016,1,9
9,2016-01-10,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3757000.0,3496000.0,3756000.0,3756000.0,...,3472000.0,3734000.0,3755000.0,3920000.0,3639450.0,3608000.0,3869000.0,2016,1,10


In [None]:
# Inserta el nombre de la primera columna de 'group_Dis_Ter_hour' al inicio de la lista 'index_name'.
index_name.insert(0, list(group_Dis_Ter_hour.columns)[0])


In [None]:
# Utiliza 'pd.melt' para convertir el DataFrame 'group_Dis_Ter_hour' de un formato ancho a uno largo.
df_unpivot_Disp_Ter = pd.melt(
    group_Dis_Ter_hour,  # DataFrame original que se va a transformar.
    id_vars=index_name,  # Columnas que se mantendrán como identificadores.
    value_vars=columns_name,  # Columnas cuyos valores se convertirán en valores de una sola columna.
    var_name='Hour',  # Nombre de la nueva columna que contendrá los nombres de las columnas originales.
    value_name='DispTer'  # Nombre de la nueva columna que contendrá los valores de las columnas originales.
)

In [None]:
# Agrupa 'df_unpivot_Disp_Ter' por 'Date' y calcula la media de los valores para cada fecha.
group_disp_Ter_day = df_unpivot_Disp_Ter.groupby('Date').mean()

In [None]:
group_disp_Ter_day

Unnamed: 0_level_0,Year,Month,Day,Hour,DispTer
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,2016.0,1.0,1.0,12.5,3.978219e+06
2016-01-02,2016.0,1.0,2.0,12.5,3.987585e+06
2016-01-03,2016.0,1.0,3.0,12.5,3.930220e+06
2016-01-04,2016.0,1.0,4.0,12.5,3.882154e+06
2016-01-05,2016.0,1.0,5.0,12.5,3.891292e+06
...,...,...,...,...,...
2024-03-27,2024.0,3.0,27.0,12.5,4.932176e+06
2024-03-28,2024.0,3.0,28.0,12.5,5.252463e+06
2024-03-29,2024.0,3.0,29.0,12.5,4.967830e+06
2024-03-30,2024.0,3.0,30.0,12.5,5.024550e+06


In [None]:
group_disp_Ter_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3013 entries, 2016-01-01 to 2024-03-31
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     3013 non-null   float64
 1   Month    3013 non-null   float64
 2   Day      3013 non-null   float64
 3   Hour     3013 non-null   object 
 4   DispTer  3013 non-null   float64
dtypes: float64(4), object(1)
memory usage: 141.2+ KB


In [None]:
# Combina 'df_DataModel' con 'group_disp_Ter_day' basándose en la columna 'Date'.
df_DataModel = df_DataModel.merge(
    group_disp_Ter_day,  # DataFrame que contiene los promedios diarios de disponibilidad térmica.
    on='Date',  # Columna clave común para la combinación.
    how='inner'  # Tipo de combinación: solo se mantienen las filas que tienen coincidencia en ambos DataFrames.
)[['Date', 'Demanda', 'VolUtil', 'Aportes', 'PrecioB', 'PrecioO', 'DispTer']]  # Selecciona columnas específicas para el DataFrame resultante.
df_DataModel

Unnamed: 0,Date,Demanda,VolUtil,Aportes,PrecioB,PrecioO,DispTer
0,2016-01-01,5.885745e+06,1.055541e+10,63496200.0,407.565972,414.206983,3.978219e+06
1,2016-01-02,6.352403e+06,1.053737e+10,55776100.0,407.627913,418.642247,3.987585e+06
2,2016-01-03,6.285871e+06,1.052841e+10,61744800.0,499.329203,426.290040,3.930220e+06
3,2016-01-04,7.226923e+06,1.050766e+10,70137500.0,535.140967,414.717455,3.882154e+06
4,2016-01-05,7.485122e+06,1.047468e+10,64121400.0,573.129733,427.959078,3.891292e+06
...,...,...,...,...,...,...,...
3008,2024-03-27,9.263611e+06,5.585317e+09,72693500.0,927.814510,607.823857,4.932176e+06
3009,2024-03-28,8.391533e+06,5.555902e+09,65809800.0,851.265372,659.285783,5.252463e+06
3010,2024-03-29,8.072650e+06,5.530486e+09,67556600.0,812.674193,658.555177,4.967830e+06
3011,2024-03-30,8.690871e+06,5.488068e+09,66343100.0,754.784805,635.040607,5.024550e+06


In [None]:
# Filtra 'df_Disp_Type' para obtener solo las filas donde 'Values_Type' no es igual a 'TERMICA'.
df_NoTermica = df_Disp_Type[df_Disp_Type['Values_Type'] != 'TERMICA']

In [None]:
df_NoTermica.head(10)

Unnamed: 0,Id_x,Values_code,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,...,Values_Code,Values_Name,Values_Type,Values_Disp,Values_RecType,Values_CompanyCode,Values_EnerSource,Values_OperStartdate,Values_State,Date_y
0,Recurso,2QBW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QBW,EL POPAL,HIDRAULICA,NO DESPACHADO CENTRALMENTE,FILO DE AGUA,ISGG,AGUA,2014-03-31,OPERACION,2024-04-26
1,Recurso,2QEK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QEK,SALTO II,HIDRAULICA,DESPACHADO CENTRALMENTE,FILO DE AGUA,ENDG,AGUA,2014-06-25,OPERACION,2024-04-26
2,Recurso,2QRL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QRL,LA REBUSCA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,HZEG,AGUA,2014-07-24,OPERACION,2024-04-26
3,Recurso,2QV2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2QV2,BAJO TULUA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,EPSG,AGUA,2015-01-30,OPERACION,2024-04-26
4,Recurso,2R22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2R22,LAGUNETA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,ENDG,AGUA,2014-12-17,OPERACION,2024-04-26
7,Recurso,2S6U,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S6U,AUTOG ARGOS EL CAIRO,HIDRAULICA,NO DESPACHADO CENTRALMENTE,AUTOGENERADOR,EPMG,AGUA,2017-09-29,OPERACION,2024-04-26
8,Recurso,2S78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S78,LA FRISOLERA,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,GPYG,AGUA,2016-04-29,OPERACION,2024-04-26
10,Recurso,2S8N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S8N,GUAVIO MENOR,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,ENDG,AGUA,2016-04-27,OPERACION,2024-04-26
12,Recurso,2S8U,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S8U,PORCE III MENOR,HIDRAULICA,NO DESPACHADO CENTRALMENTE,NORMAL,EPMG,AGUA,2016-04-25,OPERACION,2024-04-26
13,Recurso,2S9L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2S9L,EL COCUYO,HIDRAULICA,NO DESPACHADO CENTRALMENTE,GEN. DISTRIBUIDA,PCYG,AGUA,2016-05-20,OPERACION,2024-04-26


In [None]:
df_NoTermica.info()

<class 'pandas.core.frame.DataFrame'>
Index: 313053 entries, 0 to 445347
Data columns (total 38 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   Id_x                  313053 non-null  object        
 1   Values_code           313053 non-null  object        
 2   Values_Hour01         313053 non-null  float64       
 3   Values_Hour02         313053 non-null  float64       
 4   Values_Hour03         313053 non-null  float64       
 5   Values_Hour04         313053 non-null  float64       
 6   Values_Hour05         313053 non-null  float64       
 7   Values_Hour06         313053 non-null  float64       
 8   Values_Hour07         313053 non-null  float64       
 9   Values_Hour08         313053 non-null  float64       
 10  Values_Hour09         313053 non-null  float64       
 11  Values_Hour10         313053 non-null  float64       
 12  Values_Hour11         313053 non-null  float64       
 13  Valu

In [None]:
# Selecciona las columnas por hora y luego agrupa por 'Date_x' para sumar los valores.
columns_to_sum = ['Values_Hour01', 'Values_Hour02', 'Values_Hour03', 'Values_Hour04', 'Values_Hour05',
                  'Values_Hour06', 'Values_Hour07', 'Values_Hour08', 'Values_Hour09', 'Values_Hour10',
                  'Values_Hour11', 'Values_Hour12', 'Values_Hour13', 'Values_Hour14', 'Values_Hour15',
                  'Values_Hour16', 'Values_Hour17', 'Values_Hour18', 'Values_Hour19', 'Values_Hour20',
                  'Values_Hour21', 'Values_Hour22', 'Values_Hour23', 'Values_Hour24']
group_Dis_NoTer_hour = df_NoTermica.groupby('Date_x')[columns_to_sum].sum()


In [None]:
group_Dis_NoTer_hour.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3013 entries, 2016-01-01 to 2024-03-31
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Values_Hour01  3013 non-null   float64
 1   Values_Hour02  3013 non-null   float64
 2   Values_Hour03  3013 non-null   float64
 3   Values_Hour04  3013 non-null   float64
 4   Values_Hour05  3013 non-null   float64
 5   Values_Hour06  3013 non-null   float64
 6   Values_Hour07  3013 non-null   float64
 7   Values_Hour08  3013 non-null   float64
 8   Values_Hour09  3013 non-null   float64
 9   Values_Hour10  3013 non-null   float64
 10  Values_Hour11  3013 non-null   float64
 11  Values_Hour12  3013 non-null   float64
 12  Values_Hour13  3013 non-null   float64
 13  Values_Hour14  3013 non-null   float64
 14  Values_Hour15  3013 non-null   float64
 15  Values_Hour16  3013 non-null   float64
 16  Values_Hour17  3013 non-null   float64
 17  Values_Hour18  3013 non-null   flo

In [None]:
group_Dis_NoTer_hour.head(10)

Unnamed: 0_level_0,Values_Hour01,Values_Hour02,Values_Hour03,Values_Hour04,Values_Hour05,Values_Hour06,Values_Hour07,Values_Hour08,Values_Hour09,Values_Hour10,...,Values_Hour15,Values_Hour16,Values_Hour17,Values_Hour18,Values_Hour19,Values_Hour20,Values_Hour21,Values_Hour22,Values_Hour23,Values_Hour24
Date_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-01,8749000.0,8737333.0,8681000.0,8681000.0,8724000.0,8724000.0,8724000.0,8717000.0,8661500.0,8678916.0,...,8698333.0,8537750.0,8548000.0,8783999.0,9079716.0,9128000.0,9108000.0,8993366.0,8861000.0,8861000.0
2016-01-02,8886450.0,8904000.0,8861000.0,8862433.0,8904000.0,8904000.0,8904000.0,8904000.0,8911500.0,8949000.0,...,8903000.0,8903000.0,8903000.0,9050000.0,9103583.0,9133000.0,9142666.0,9128000.0,8916750.0,8907000.0
2016-01-03,8899000.0,8899000.0,8856716.0,8846366.0,8882000.0,8882000.0,8882000.0,8882000.0,8891000.0,8927000.0,...,8851000.0,8857000.0,8857000.0,9001000.0,9081500.0,9129000.0,9116500.0,9001000.0,8858000.0,8858000.0
2016-01-04,8989000.0,8989000.0,8946000.0,8955999.0,9031833.0,9034000.0,9034000.0,9025500.0,9019000.0,9058750.0,...,9018000.0,8761000.0,8761000.0,8812550.0,9121500.0,9159000.0,9118166.0,8999000.0,9040000.0,9017832.0
2016-01-05,8834683.0,8785600.0,8730833.0,8693866.0,8766083.0,8769000.0,8769000.0,8769000.0,8774250.0,8814000.0,...,8756000.0,8756000.0,8796000.0,9003750.0,9073500.0,9132833.0,9117833.0,9045750.0,9000750.0,8997000.0
2016-01-06,8788000.0,8788000.0,8745000.0,8760366.0,8813000.0,8813000.0,8780199.0,8791000.0,8808500.0,8885000.0,...,8808250.0,8794000.0,8794000.0,9044750.0,9125000.0,9175000.0,9170000.0,9048500.0,9030000.0,9006000.0
2016-01-07,8891416.0,8933666.0,8866000.0,8905150.0,8921000.0,8916000.0,8916000.0,8904333.0,8900750.0,9118666.0,...,9057000.0,9057000.0,9057000.0,9071500.0,9170500.0,9198000.0,9180500.0,9070000.0,9058000.0,9058000.0
2016-01-08,9114783.0,9113950.0,9023950.0,9026950.0,9113950.0,9113950.0,9113950.0,9113950.0,9121450.0,9158950.0,...,9112950.0,9112583.0,9109466.0,9130950.0,9208950.0,9256333.0,9249950.0,9139950.0,9159616.0,9164950.0
2016-01-09,9108232.0,9104900.0,9104650.0,9069566.0,9064900.0,9064900.0,9064900.0,9064900.0,9064900.0,9058400.0,...,9042500.0,9050900.0,9050900.0,9070650.0,9141400.0,9304133.0,9365566.0,9248900.0,9233900.0,9233400.0
2016-01-10,9227900.0,9227900.0,9227900.0,9227900.0,9227900.0,9227900.0,9227900.0,9227900.0,9227900.0,9237650.0,...,9226150.0,9226900.0,9218849.0,9289400.0,9317900.0,9317900.0,9317900.0,9317900.0,9188300.0,9045900.0


In [None]:
group_Dis_NoTer_hour.reset_index(inplace=True)

In [None]:
group_Dis_NoTer_hour.rename(columns={'Date_x':'Date'},inplace=True)

In [None]:
group_Dis_NoTer_hour['Year']=group_Dis_NoTer_hour['Date'].dt.year
group_Dis_NoTer_hour['Month']=group_Dis_Ter_hour['Date'].dt.month
group_Dis_NoTer_hour['Day']=group_Dis_NoTer_hour['Date'].dt.day

In [None]:
columns_name=list(group_Dis_NoTer_hour.columns)[1:25]

In [None]:
icont=1
for iname in columns_name:
    group_Dis_NoTer_hour.rename(columns={iname:icont},inplace=True)
    icont+=1

In [None]:
columns_name=list(group_Dis_NoTer_hour.columns)[1:25]
index_name=list(group_Dis_NoTer_hour.columns)[25::]
index_name.insert(0, list(group_Dis_NoTer_hour.columns)[0])

In [None]:
# Utiliza 'pd.melt' para convertir el DataFrame 'group_Dis_NoTer_hour' de un formato ancho a uno largo.
df_unpivot_Disp_NoTer = pd.melt(
    group_Dis_NoTer_hour,  # DataFrame original que se va a transformar.
    id_vars=index_name,  # Columnas que se mantendrán como identificadores.
    value_vars=columns_name,  # Columnas cuyos valores se convertirán en valores de una sola columna.
    var_name='Hour',  # Nombre de la nueva columna que contendrá los nombres de las columnas originales.
    value_name='DispNoTer'  # Nombre de la nueva columna que contendrá los valores de las columnas originales.
)

In [None]:
df_unpivot_Disp_NoTer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72312 entries, 0 to 72311
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       72312 non-null  datetime64[ns]
 1   Year       72312 non-null  int32         
 2   Month      72312 non-null  int32         
 3   Day        72312 non-null  int32         
 4   Hour       72312 non-null  object        
 5   DispNoTer  72312 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int32(3), object(1)
memory usage: 2.5+ MB


In [None]:
df_unpivot_Disp_NoTer.head(10)

Unnamed: 0,Date,Year,Month,Day,Hour,DispNoTer
0,2016-01-01,2016,1,1,1,8749000.0
1,2016-01-02,2016,1,2,1,8886450.0
2,2016-01-03,2016,1,3,1,8899000.0
3,2016-01-04,2016,1,4,1,8989000.0
4,2016-01-05,2016,1,5,1,8834683.0
5,2016-01-06,2016,1,6,1,8788000.0
6,2016-01-07,2016,1,7,1,8891416.0
7,2016-01-08,2016,1,8,1,9114783.0
8,2016-01-09,2016,1,9,1,9108232.0
9,2016-01-10,2016,1,10,1,9227900.0


In [None]:
group_disp_NoTer_day=df_unpivot_Disp_NoTer.groupby('Date').mean()
group_disp_NoTer_day

Unnamed: 0_level_0,Year,Month,Day,Hour,DispNoTer
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,2016.0,1.0,1.0,12.5,8.762403e+06
2016-01-02,2016.0,1.0,2.0,12.5,8.949453e+06
2016-01-03,2016.0,1.0,3.0,12.5,8.916347e+06
2016-01-04,2016.0,1.0,4.0,12.5,9.002401e+06
2016-01-05,2016.0,1.0,5.0,12.5,8.855396e+06
...,...,...,...,...,...
2024-03-27,2024.0,3.0,27.0,12.5,8.782151e+06
2024-03-28,2024.0,3.0,28.0,12.5,8.796540e+06
2024-03-29,2024.0,3.0,29.0,12.5,8.674165e+06
2024-03-30,2024.0,3.0,30.0,12.5,9.167520e+06


In [None]:
df_DataModel = df_DataModel.merge(group_disp_NoTer_day, 
                                  on='Date', 
                                  how='inner')[['Date', 'Demanda', 'VolUtil','Aportes','PrecioB','PrecioO','DispTer','DispNoTer']]
df_DataModel

Unnamed: 0,Date,Demanda,VolUtil,Aportes,PrecioB,PrecioO,DispTer,DispNoTer
0,2016-01-01,5.885745e+06,1.055541e+10,63496200.0,407.565972,414.206983,3.978219e+06,8.762403e+06
1,2016-01-02,6.352403e+06,1.053737e+10,55776100.0,407.627913,418.642247,3.987585e+06,8.949453e+06
2,2016-01-03,6.285871e+06,1.052841e+10,61744800.0,499.329203,426.290040,3.930220e+06,8.916347e+06
3,2016-01-04,7.226923e+06,1.050766e+10,70137500.0,535.140967,414.717455,3.882154e+06,9.002401e+06
4,2016-01-05,7.485122e+06,1.047468e+10,64121400.0,573.129733,427.959078,3.891292e+06,8.855396e+06
...,...,...,...,...,...,...,...,...
3008,2024-03-27,9.263611e+06,5.585317e+09,72693500.0,927.814510,607.823857,4.932176e+06,8.782151e+06
3009,2024-03-28,8.391533e+06,5.555902e+09,65809800.0,851.265372,659.285783,5.252463e+06,8.796540e+06
3010,2024-03-29,8.072650e+06,5.530486e+09,67556600.0,812.674193,658.555177,4.967830e+06,8.674165e+06
3011,2024-03-30,8.690871e+06,5.488068e+09,66343100.0,754.784805,635.040607,5.024550e+06,9.167520e+06


In [None]:
#Crea un archivo ".csv" con el dataframe
df_DataModel.to_csv('Data_XM.csv')