In [None]:
import pandas as pd

# Load the uploaded CSV file to inspect its contents
file_path = 'synthetic_power_load_data_realistic.csv'
data = pd.read_csv(file_path)

# Display the first few rows and basic information about the data
data_info = data.info()
data_head = data.head()
data_info, data_head


In [None]:
import matplotlib.pyplot as plt

# Step 2: Exploratory Data Analysis (EDA)

# Plot Load (kW) over time
plt.figure(figsize=(14, 6))
plt.plot(data['Datetime'], data['Load (kW)'], label='Load (kW)', color='blue', linewidth=0.5)
plt.title('Power Load Over Time')
plt.xlabel('Datetime')
plt.ylabel('Load (kW)')
plt.legend()
plt.grid()
plt.show()

# Analyze weekly load patterns
data['Day'] = data['Datetime'].dt.day_name()
weekly_load = data.groupby('Day')['Load (kW)'].mean()

# Plot average load by day of the week
plt.figure(figsize=(8, 4))
weekly_load = weekly_load.reindex(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
weekly_load.plot(kind='bar', color='orange')
plt.title('Average Load by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Average Load (kW)')
plt.grid(axis='y')
plt.show()

# Hourly load patterns
hourly_load = data.groupby('Hour')['Load (kW)'].mean()

# Plot average load by hour of the day
plt.figure(figsize=(8, 4))
hourly_load.plot(kind='bar', color='green')
plt.title('Average Load by Hour of the Day')
plt.xlabel('Hour of the Day')
plt.ylabel('Average Load (kW)')
plt.grid(axis='y')
plt.show()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
data = pd.read_csv("synthetic_power_load_data_realistic.csv")

# Convert the Datetime column to datetime format
data['Datetime'] = pd.to_datetime(data['Datetime'])

# Normalize the relevant features
scaler = MinMaxScaler()
features_to_scale = ['Load (kW)', 'Temperature (°C)', 'Humidity (%)', 'Wind Speed (m/s)', 'Day of Week', 'Hour']
data[features_to_scale] = scaler.fit_transform(data[features_to_scale])

# Create lag features for day-ahead forecasting (24-hour lag)
data['Lag_24'] = data['Load (kW)'].shift(24)

# Drop rows with NaN values caused by lagging
data = data.dropna()

# Select features and target
features = ['Lag_24', 'Temperature (°C)', 'Humidity (%)', 'Wind Speed (m/s)', 'Day of Week', 'Hour']
target = 'Load (kW)'
X = data[features]
y = data[target]

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Print shapes to confirm the split
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
