In [2]:
import os
import zipfile
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import urllib.request

In [3]:
# Step 1: Download the Dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'
zip_file_path = 'LD2011_2014.txt.zip'
extract_folder = './extracted_data'

In [4]:
# Download the dataset if it hasn't been downloaded already
if not os.path.exists(zip_file_path):
    print("Downloading dataset...")
    urllib.request.urlretrieve(url, zip_file_path)
    print("Download complete!")

# Step 2: Extract the ZIP File
os.makedirs(extract_folder, exist_ok=True)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Step 3: Load the Specific File from the Extracted Folder
extracted_file_path = os.path.join(extract_folder, 'LD2011_2014.txt')

In [5]:
df = pd.read_csv(extracted_file_path, sep=';', index_col=0, parse_dates=True, decimal=',')
df = df.resample('h').mean()  # Resample to hourly data using the new alias

In [6]:
df['hour'] = df.index.hour
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month

In [7]:
# Display the first 5 rows
print(df.head().to_markdown(index=True, numalign="left", stralign="left"))

# Print the column names and their data types
print(df.info())

|                     | MT_001   | MT_002   | MT_003   | MT_004   | MT_005   | MT_006   | MT_007   | MT_008   | MT_009   | MT_010   | MT_011   | MT_012   | MT_013   | MT_014   | MT_015   | MT_016   | MT_017   | MT_018   | MT_019   | MT_020   | MT_021   | MT_022   | MT_023   | MT_024   | MT_025   | MT_026   | MT_027   | MT_028   | MT_029   | MT_030   | MT_031   | MT_032   | MT_033   | MT_034   | MT_035   | MT_036   | MT_037   | MT_038   | MT_039   | MT_040   | MT_041   | MT_042   | MT_043   | MT_044   | MT_045   | MT_046   | MT_047   | MT_048   | MT_049   | MT_050   | MT_051   | MT_052   | MT_053   | MT_054   | MT_055   | MT_056   | MT_057   | MT_058   | MT_059   | MT_060   | MT_061   | MT_062   | MT_063   | MT_064   | MT_065   | MT_066   | MT_067   | MT_068   | MT_069   | MT_070   | MT_071   | MT_072   | MT_073   | MT_074   | MT_075   | MT_076   | MT_077   | MT_078   | MT_079   | MT_080   | MT_081   | MT_082   | MT_083   | MT_084   | MT_085   | MT_086   | MT_087   | MT_088   | MT_089  

In [8]:
df.shape

(35065, 373)

In [15]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
df = scaler.fit_transform(df)

# The original approach using create_sequences on individual columns would treat each client's electricity consumption as independent of the others. This means the model would learn patterns within each client's data independently.

# Create sequences
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 24
X, y = create_sequences(df, seq_length)

# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [16]:
df.shape, X.shape, y.shape

((35065, 373), torch.Size([35041, 24, 373]), torch.Size([35041, 373]))

In [None]:
# Step 6: Define the RNN Model
class ElectricityRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()  # Initialize nn.Module
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)  # Initial hidden state
        out, _ = self.rnn(x, h0)  # RNN layer
        out = self.fc(out[:, -1, :])  # Fully connected layer for the last time step
        return out

input_size = X.shape[2]  # Number of features (5 clients in this case)
hidden_size = 50  # Number of hidden units in the RNN
output_size = y.shape[1]  # Predicting the next electricity consumption value for all 5 clients
model = ElectricityRNN(input_size, hidden_size, output_size)

# Step 7: Train the RNN Model
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Step 8: Make Predictions and Evaluate the Model
model.eval()
with torch.no_grad():
    predictions = model(X).detach().numpy()

# Inverse transform to get actual values
predictions = scaler.inverse_transform(predictions)
y_actual = scaler.inverse_transform(y.numpy())

# Plot the results for the 5 clients
plt.figure(figsize=(14, 10))
for i in range(5):  # Plot for the 5 clients
    plt.subplot(5, 1, i + 1)
    plt.plot(y_actual[:, i], label=f'Actual Client {i+1}')
    plt.plot(predictions[:, i], label=f'Predicted Client {i+1}')
    plt.legend()

plt.show()