In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

df_1 = pd.read_csv('Residential_2.csv')
df_2 = pd.read_csv('Residential_4.csv')

data_frame = pd.concat([df_1, df_2], axis=0, ignore_index=True)
print(data_frame)
mean_energy = data_frame['energy_kWh'].mean()
data_frame['energy_kWh'].fillna(value=mean_energy, inplace=True)

# Extract the hour of the day and power used columns
X = data_frame[['hour']].values
y = data_frame[['energy_kWh']].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)  # Reshape to column vector
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.fc(lstm_out[:, -1, :])  # Take the last time step's output
        return output

input_size = X_train.shape[1]
hidden_size = 64
output_size = 1

model = LSTMModel(input_size, hidden_size, output_size)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 100
batch_size = 32

for epoch in range(num_epochs):
    permutation = torch.randperm(X_train.size()[0])
    for i in range(0, X_train.size()[0], batch_size):
        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X_train[indices], y_train[indices]

        # Forward pass
        outputs = model(batch_x.unsqueeze(1))

        # Compute the loss
        loss = criterion(outputs, batch_y)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation on the test set
with torch.no_grad():
    model.eval()
    test_outputs = model(X_test.unsqueeze(1))
    test_loss = criterion(test_outputs, y_test)
    print(f'Test Loss: {test_loss.item():.4f}')

             date  hour  energy_kWh  kmeans_3  kmeans_4  kmeans_5  kmeans_6
0      2016-06-09     0        0.12         1         1         4         3
1      2016-06-09     1        0.11         1         1         4         3
2      2016-06-09     2        0.11         1         1         4         3
3      2016-06-09     3        0.12         1         1         4         3
4      2016-06-09     4        0.12         1         1         4         3
...           ...   ...         ...       ...       ...       ...       ...
56539  2018-01-29    19        2.20         1         1         4         3
56540  2018-01-29    20        2.28         1         1         4         3
56541  2018-01-29    21        2.04         1         1         4         3
56542  2018-01-29    22        1.66         1         1         4         3
56543  2018-01-29    23        1.26         1         1         4         3

[56544 rows x 7 columns]
Epoch [1/100], Loss: 0.4590
Epoch [2/100], Loss: 0.6345
Epoch 