In [5]:
import numpy as np
import pandas as pd

# --- Configuration ---
n_sensors = 10
duration_sec = 60  # 1 minute of high-res data
base_freq = 60.0   # 60 Hz Grid

sensors_data = {}

print("Generating high-frequency sensor streams...")

for i in range(n_sensors):
    sensor_id = f"Substation_{i+1:02d}"
    
    # 1. Randomized Start Times (Desync)
    start_offset = pd.Timedelta(milliseconds=np.random.randint(0, 500))
    start_time = pd.Timestamp("2025-01-01 10:00:00") + start_offset
    
    # 2. Randomized Sampling Rates (Jitter)
    n_samples = int(duration_sec * 50)
    intervals = np.random.normal(20, 5, n_samples) 
    intervals = np.maximum(1, intervals) 
    
    # --- THE FIX IS HERE ---
    # We calculate cumsum on the numbers first, THEN make them timestamps
    time_deltas = pd.to_timedelta(intervals.cumsum(), unit="ms")
    timestamps = start_time + time_deltas
    
    # 3. Simulate Physics
    voltage = 230 + np.random.normal(0, 0.5, n_samples)
    
    t = np.linspace(0, 10, n_samples)
    current = 100 + (10 * np.sin(t)) + np.random.normal(0, 1, n_samples)
    
    frequency = np.random.normal(base_freq, 0.01, n_samples)
    
    # 4. Inject the "Cascading Failure"
    if i < 5: 
        fault_start = 2000 + (i * 100) 
    else:     
        fault_start = 2000 + (i * 300)
        
    if fault_start < n_samples:
        frequency[fault_start:] -= np.linspace(0, 0.5, n_samples - fault_start)
        voltage[fault_start:] += np.random.normal(0, 5, n_samples - fault_start)

    # 5. Pack into DataFrame
    df = pd.DataFrame({
        "Timestamp": timestamps,
        "Sensor_ID": sensor_id,
        "Voltage": voltage,
        "Current": current,
        "Frequency": frequency
    })
    
    sensors_data[sensor_id] = df

# Concatenate
raw_grid_data = pd.concat(sensors_data.values()).sort_values("Timestamp").reset_index(drop=True)

print(f"--- Raw Grid Data Generated ---")
print(f"Total Readings: {len(raw_grid_data)}")
print(raw_grid_data.head())

Generating high-frequency sensor streams...
--- Raw Grid Data Generated ---
Total Readings: 30000
                      Timestamp      Sensor_ID     Voltage     Current  \
0 2025-01-01 10:00:00.074285036  Substation_02  229.262926  100.906494   
1 2025-01-01 10:00:00.099889661  Substation_02  230.120468   97.994723   
2 2025-01-01 10:00:00.125917159  Substation_02  230.481674  100.993603   
3 2025-01-01 10:00:00.143841282  Substation_02  230.186525  101.561265   
4 2025-01-01 10:00:00.145594988  Substation_04  229.929164  100.257909   

   Frequency  
0  59.992546  
1  60.008972  
2  60.002927  
3  60.002979  
4  60.003860  


In [7]:
# Save to CSV for further processing
raw_grid_data.to_csv('raw_grid_data.csv', index=False)