In [5]:
import pandas as pd
import numpy as np
df = pd.read_csv("/Users/cgp/Portfolio/WeatherDemand/wede/data/silver/weather_features.csv", parse_dates=['datetime', 'sunrise', 'sunset'])
df.shape

(26520, 21)

In [6]:
def inject_anomalies(series, pct=0.05):
    """Inject anomalies into demand series"""
    n_anomalies = int(len(series) * pct)
    idx = np.random.choice(len(series), n_anomalies, replace=False)
    anomaly_flag = np.zeros(len(series), dtype=int)
    anomaly_flag[idx] = 1
    # Spike or drop
    series = series.copy()
    for i in idx:
        if np.random.random() > 0.5:
            series.iloc[i] *= np.random.uniform(1.5, 2.5)  # Spike
        else:
            series.iloc[i] *= np.random.uniform(0.2, 0.5)  # Drop
    return series.astype(int), anomaly_flag

In [7]:
def generate_delivery_demand(df):
    np.random.seed(42)
    base = 120
    precip_boost = df['precipitation_flag'] * 15
    cold_boost = np.where(df['temperature'] < 5, 25, 0)
    hot_boost = np.where(df['temperature'] > 25, 20, 0)
    lunch_boost = np.where(df['hour'].isin([11, 12, 13]), 30, 0)
    dinner_boost = np.where(df['hour'].isin([17, 18, 19, 20]), 45, 0)
    weekend_boost = df['is_weekend'] * 25
    bad_weather_boost = df['bad_weather_combo'] * 35  # heavy rain + low temp == increased deliveries
    demand = (base + precip_boost + cold_boost + hot_boost + lunch_boost 
              + dinner_boost + weekend_boost + bad_weather_boost
              + np.random.normal(0, 15, len(df))).clip(30, 400).astype(int)
    return demand

In [8]:
def generate_energy_demand(df):
    np.random.seed(43)
    base = 450
    is_wknd = df['is_weekend'] == 1
    heating = np.where(df['temperature'] < 10, (10 - df['temperature']) * 18, 0)
    cooling = np.where(df['temperature'] > 22, (df['temperature'] - 22) * 25, 0)
    dark_boost = np.where(df['daylight_duration'] < 10, 40, 0)
    business_hours = np.where(
        (df['hour'].isin([9,10,11,12,13,14,15,16,17])) & (~is_wknd),
        100, 0
    )
    weekend_pattern = np.where(
        (is_wknd) & (df['hour'].isin([8,9,10,18,19,20,21])),
        40,
        np.where(is_wknd, -30, 0)
    )
    winter_boost = np.where(df['season'] == 0, 50, 0)
    demand = (base + heating + cooling + dark_boost + business_hours 
              + weekend_pattern + winter_boost 
              + np.random.normal(0, 30, len(df))).clip(200, 1200).astype(int)
    return demand

In [9]:
def generate_retail_demand(df):
    np.random.seed(44)
    base = 200
    is_wknd = df['is_weekend'] == 1
    precip_penalty = df['precipitation_flag'] * -20
    wind_penalty = np.where(df['wind_speed'] > 25, -30, 0)
    bad_weather_penalty = df['bad_weather_combo'] * -50 
    nice_weather = np.where(
        (df['temperature'] > 15) & (df['temperature'] < 25) & (df['precipitation_flag'] == 0),
        40, 0
    )
    store_hours = np.where(df['hour'].isin([10,11,12,13,14,15,16,17,18,19]), 60, -80)
    weekend_boost = is_wknd.astype(int) * 50
    lunch_rush = np.where(df['hour'].isin([12, 13]), 25, 0)
    summer_boost = np.where(df['season'] == 2, 30, 0)
    demand = (base + precip_penalty + wind_penalty + bad_weather_penalty + nice_weather 
              + store_hours + weekend_boost + lunch_rush + summer_boost 
              + np.random.normal(0, 20, len(df))).clip(0, 500).astype(int)
    return demand


In [10]:
def generate_ecommerce_demand(df):
    np.random.seed(45)
    base = 150
    is_wknd = df['is_weekend'] == 1
    precip_boost = df['precipitation_flag'] * 12
    cold_boost = np.where(df['temperature'] < 5, 30, 0)
    bad_weather_boost = df['bad_weather_combo'] * 40  # bad weather = more online shopping
    evening_boost = np.where(df['hour'].isin([19, 20, 21, 22, 23]), 35, 0)
    weekend_boost = is_wknd.astype(int) * 20
    lunch_boost = np.where(df['hour'].isin([12, 13]), 15, 0)
    winter_boost = np.where(df['season'] == 0, 40, 0)
    demand = (base + precip_boost + cold_boost + bad_weather_boost + evening_boost 
              + weekend_boost + lunch_boost + winter_boost 
              + np.random.normal(0, 18, len(df))).clip(50, 400).astype(int)
    return demand

In [11]:
df['delivery_demand'] = generate_delivery_demand(df)
df['energy_demand'] = generate_energy_demand(df)
df['retail_demand'] = generate_retail_demand(df)
df['ecommerce_demand'] = generate_ecommerce_demand(df)

# Inject anomalies (5% each)
df['delivery_demand'], df['delivery_anomaly'] = inject_anomalies(df['delivery_demand'])
df['energy_demand'], df['energy_anomaly'] = inject_anomalies(df['energy_demand'])
df['retail_demand'], df['retail_anomaly'] = inject_anomalies(df['retail_demand'])
df['ecommerce_demand'], df['ecommerce_anomaly'] = inject_anomalies(df['ecommerce_demand'])


  series.iloc[i] *= np.random.uniform(1.5, 2.5)  # Spike
  series.iloc[i] *= np.random.uniform(1.5, 2.5)  # Spike
  series.iloc[i] *= np.random.uniform(0.2, 0.5)  # Drop
  series.iloc[i] *= np.random.uniform(1.5, 2.5)  # Spike


In [15]:
print(f"Rows: {len(df)} | Columns: {len(df.columns)}\nAnomalies: Delivery={df['delivery_anomaly'].sum()}, Energy={df['energy_anomaly'].sum()}, Retail={df['retail_anomaly'].sum()}, Ecommerce={df['ecommerce_anomaly'].sum()}")

Rows: 26520 | Columns: 29
Anomalies: Delivery=1326, Energy=1326, Retail=1326, Ecommerce=1326


In [14]:
df.to_csv("/Users/cgp/Portfolio/WeatherDemand/wede/data/gold/demand_forecast_data.csv", index=False)