In [1]:
import pandas as pd
import numpy as np

# Setting a seed for reproducibility
np.random.seed(42)

# Generate a date range for one year
dates = pd.date_range(start="2021-01-01", end="2023-12-31", freq="D")

# Simulating data
data = {
    "Date": dates,
    "Energy_3d": np.random.normal(1000, 200, len(dates)),  # Simulated energy readings after 3 days
    "Energy_10d": np.random.normal(1200, 200, len(dates)),  # Updated readings after 10 days
    "Energy_50d": np.random.normal(1500, 200, len(dates)),  # Final true readings after 50 days
    "Temp": np.random.normal(20, 5, len(dates)),  # Daily temperature
    "Precip": np.random.uniform(0, 20, len(dates)),  # Daily precipitation
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Calculating additional metrics
df["Temp_7d_avg"] = df["Temp"].rolling(window=7).mean()
df["Precip_7d_avg"] = df["Precip"].rolling(window=7).mean()
df["Last_Year_Usage"] = np.random.normal(1500, 200, len(dates))  # Assuming last year's usage pattern was similar
df["30d_MA"] = df["Energy_3d"].rolling(window=30).mean()
df["8w_DayOfWeek_Avg"] = df.groupby(df["Date"].dt.dayofweek)["Energy_3d"].transform(lambda x: x.rolling(window=8, min_periods=1).mean())

# Statistical calculations for anomaly detection (placeholders for now)
df["Mean_Diff"] = np.random.normal(0, 10, len(dates))  # Difference from some expected value
df["Std_Dev_Diff"] = np.random.normal(5, 2, len(dates))  # Standard deviation of the difference
df["Anomaly_Flag"] = np.where(np.random.uniform(0, 1, len(dates)) < 0.05, 1, 0)  # Random anomalies

# Displaying the first few rows of the dataset to verify
df.head()


Unnamed: 0,Date,Energy_3d,Energy_10d,Energy_50d,Temp,Precip,Temp_7d_avg,Precip_7d_avg,Last_Year_Usage,30d_MA,8w_DayOfWeek_Avg,Mean_Diff,Std_Dev_Diff,Anomaly_Flag
0,2021-01-01,1099.342831,1183.856684,1180.37513,27.517099,17.359992,,,1340.734671,,1099.342831,18.075903,3.206667,0
1,2021-01-02,972.34714,1215.727038,1592.434533,18.3494,16.563717,,,1533.562649,,972.34714,2.995325,-1.062388,0
2,2021-01-03,1129.537708,800.359863,1904.861925,18.941665,12.521094,,,1816.292745,,1129.537708,-5.762619,6.02727,0
3,2021-01-04,1304.605971,1383.265535,1227.365199,16.86133,1.505574,,,1438.068038,,1304.605971,9.73366,1.466803,0
4,2021-01-05,953.169325,1269.297695,1537.941234,18.559806,2.247941,,,1494.649751,,953.169325,6.238959,3.269981,0


In [2]:
df.tail()

Unnamed: 0,Date,Energy_3d,Energy_10d,Energy_50d,Temp,Precip,Temp_7d_avg,Precip_7d_avg,Last_Year_Usage,30d_MA,8w_DayOfWeek_Avg,Mean_Diff,Std_Dev_Diff,Anomaly_Flag
1090,2023-12-27,985.679748,1506.68674,1327.659386,23.230031,13.315305,19.013341,11.349398,1444.015092,983.515671,1057.246678,-17.055516,7.558168,0
1091,2023-12-28,992.555553,915.20857,1506.006296,8.141703,10.493446,17.15982,10.540371,1401.79406,1002.257807,1014.079545,6.330367,3.818992,0
1092,2023-12-29,1145.525909,1146.669534,1069.523291,30.210348,12.787496,19.862885,10.387504,1419.605806,999.450232,1082.042218,0.692699,8.429108,0
1093,2023-12-30,1010.389177,1114.151116,1675.291129,21.4764,6.759976,20.463829,9.765012,1685.948045,1011.39463,1041.409487,-7.707874,7.278543,0
1094,2023-12-31,1146.528015,1317.710654,1187.701356,16.117504,18.137332,20.689274,10.357409,1413.591237,1018.69517,976.30448,14.34408,4.051106,0


In [3]:
df.to_csv('energy.csv', index=False) 