In [5]:
import numpy as np
import pandas as pd

np.random.seed(42)
n_years = 3
freq = "H"
fechas = pd.date_range("2020-01-01", periods=int(24 * 365 * n_years), freq=freq)
subregiones = ["Norte", "Centro", "Sur", "Occidente", "Oriente"]

# --- Funciones auxiliares ---
def gen_temp(series_index, base=18, amp_anual=12, ruido=2):
    t = np.arange(len(series_index))
    anual = amp_anual * np.sin(2 * np.pi * t / (24 * 365))
    diaria = 5 * np.sin(2 * np.pi * t / 24 - np.pi / 3)
    return base + anual + diaria + np.random.normal(0, ruido, len(series_index))

def gen_humidity(temp):
    """Genera humedad inversamente relacionada con la temperatura."""
    return np.clip(90 - 2.5 * (temp - 15) + np.random.normal(0, 5, len(temp)), 20, 100)

def gen_demanda(temp, humidity, nivel=200, sensibilidad=-2.0, trend=0.0005, ruido=8):
    """Demanda dependiente de temperatura y humedad."""
    t = np.arange(len(temp))
    dow = pd.Series(pd.date_range("2020-01-01", periods=len(temp), freq="H")).dt.dayofweek.values
    semanal = np.where(dow < 5, 1.0, 0.93)
    carga_temp = sensibilidad * (temp - 19) ** 2
    carga_hum = 0.5 * (humidity - 60)  # leve aumento con humedad alta
    base = nivel + 0.02 * t + trend * t
    y = (base + carga_temp + carga_hum + 10 * np.sin(2 * np.pi * t / 24)) * semanal \
        + np.random.normal(0, ruido, len(temp))
    return np.clip(y, 50, None)

# --- Generación del dataset ---
registros = []
for sr in subregiones:
    temp = gen_temp(fechas, base=18 + np.random.uniform(-1, 1))
    humidity = gen_humidity(temp)
    demanda = gen_demanda(temp, humidity, nivel=200 + np.random.uniform(-20, 20))
    
    df_sr = pd.DataFrame({
        "timestamp": fechas,
        "subregion": sr,
        "temp_c": temp,
        "humidity_pct": humidity,
        "demanda_mwh": demanda
    })
    registros.append(df_sr)

data = pd.concat(registros, ignore_index=True)

# --- Añadir variables calendario y festivos ---
data["hour"] = pd.to_datetime(data["timestamp"]).dt.hour
data["dow"] = pd.to_datetime(data["timestamp"]).dt.dayofweek
data["month"] = pd.to_datetime(data["timestamp"]).dt.month

# Ejemplo: marcar días festivos sintéticos (1 de enero, 25 de diciembre)
data["holiday"] = pd.to_datetime(data["timestamp"]).dt.strftime("%m-%d").isin(["01-01", "12-25"]).astype(int)

# --- Guardar ---
output_path = "synthetic_energy_demand_with_covariates.csv"
data.to_csv(output_path, index=False)
print(f"✅ Archivo guardado: {output_path}")
print(data.head())


  fechas = pd.date_range("2020-01-01", periods=int(24 * 365 * n_years), freq=freq)
  dow = pd.Series(pd.date_range("2020-01-01", periods=len(temp), freq="H")).dt.dayofweek.values


✅ Archivo guardado: synthetic_energy_demand_with_covariates.csv
            timestamp subregion     temp_c  humidity_pct  demanda_mwh  hour  \
0 2020-01-01 00:00:00     Norte  11.195193     96.912983    92.021472     0   
1 2020-01-01 01:00:00     Norte  14.859958     86.646666   166.795309     1   
2 2020-01-01 02:00:00     Norte  15.824377     86.669321   191.872985     2   
3 2020-01-01 03:00:00     Norte  18.501837     76.085419   212.770663     3   
4 2020-01-01 04:00:00     Norte  16.621752     77.909265   218.678552     4   

   dow  month  holiday  
0    2      1        1  
1    2      1        1  
2    2      1        1  
3    2      1        1  
4    2      1        1  


In [6]:
data

Unnamed: 0,timestamp,subregion,temp_c,humidity_pct,demanda_mwh,hour,dow,month,holiday
0,2020-01-01 00:00:00,Norte,11.195193,96.912983,92.021472,0,2,1,1
1,2020-01-01 01:00:00,Norte,14.859958,86.646666,166.795309,1,2,1,1
2,2020-01-01 02:00:00,Norte,15.824377,86.669321,191.872985,2,2,1,1
3,2020-01-01 03:00:00,Norte,18.501837,76.085419,212.770663,3,2,1,1
4,2020-01-01 04:00:00,Norte,16.621752,77.909265,218.678552,4,2,1,1
...,...,...,...,...,...,...,...,...,...
131395,2022-12-30 19:00:00,Oriente,15.009672,84.223676,702.818795,19,4,12,0
131396,2022-12-30 20:00:00,Oriente,15.587074,87.029141,726.683144,20,4,12,0
131397,2022-12-30 21:00:00,Oriente,10.466471,98.498559,601.817990,21,4,12,0
131398,2022-12-30 22:00:00,Oriente,15.042848,92.660106,706.109341,22,4,12,0
