In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [11]:
#base for the autoencoder

class Autoencoder(nn.Module):
    def __init__(self, input_size = 4, output_size = 1):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 6),
            nn.ReLU(),
            nn.Linear(6, 3)
        )

        self.decoder = nn.Sequential(
            nn.Linear(3, 6),
            nn.ReLU(),
            nn.Linear(6, 8),
            nn.ReLU(),
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, output_size)
        )

    def forward(self, input):
        encoded = self.encoder(input)
        decoded = self.decoder(encoded)
        return decoded

In [12]:
# reading data

myDF = pd.read_excel("Copy of Folds5x2_pp.xlsx")
cleanDF = myDF.fillna(0)

In [13]:
#assigning columns

output_column = cleanDF.columns[-1]
input_columns = [col for col in cleanDF.columns if col != output_column]

print("Input columns:", input_columns)
print("Output column:", output_column)

Input columns: ['AT', 'V', 'AP', 'RH']
Output column: PE


In [14]:
#extracting data from columns

input_data = cleanDF[input_columns].values
output_data = cleanDF[output_column].values

print(f"Input shape: {input_data.shape}, Output shape: {output_data.shape}")
print(f"\nFirst 5 rows of real data:")
print(cleanDF[input_columns + [output_column]].head())

Input shape: (9568, 4), Output shape: (9568,)

First 5 rows of real data:
      AT      V       AP     RH      PE
0  14.96  41.76  1024.07  73.17  463.26
1  25.18  62.96  1020.04  59.08  444.37
2   5.11  39.40  1012.16  92.14  488.56
3  20.86  57.32  1010.24  76.64  446.48
4  10.82  37.50  1009.23  96.62  473.90


In [15]:
#normalizing data

scalar_input = StandardScaler().fit_transform(input_data)
scalar_output = StandardScaler().fit_transform(output_data.reshape(-1, 1)).flatten()

print(f"Original data:")
for i, col in enumerate(input_columns):
    print(f"{col}: {input_data[:, i].min():.2f} to {input_data[:, i].max():.2f}")
print(f"{output_column}: {output_data.min():.2f} to {output_data.max():.2f}")

print(f"\nNormalized data:")
for i, col in enumerate(input_columns):
    print(f"{col}: {scalar_input[:, i].min():.2f} to {scalar_input[:, i].max():.2f}")
print(f"{output_column}: {scalar_output.min():.2f} to {scalar_output.max():.2f}")

print(f"\nnormalized input shape: {scalar_input.shape}, normalized output shape: {scalar_output.shape}")

Original data:
AT: 1.81 to 37.11
V: 25.36 to 81.56
AP: 992.89 to 1033.30
RH: 25.56 to 100.16
PE: 420.26 to 495.76

Normalized data:
AT: -2.39 to 2.34
V: -2.28 to 2.14
AP: -3.43 to 3.37
RH: -3.27 to 1.84
PE: -2.00 to 2.43

normalized input shape: (9568, 4), normalized output shape: (9568,)


In [16]:
# splitting data into training and testing sets

input_train, input_test, output_train, output_test = train_test_split(
    scalar_input, scalar_output, test_size=0.2, random_state=42
)

input_train_tensor = torch.FloatTensor(input_train)
input_test_tensor = torch.FloatTensor(input_test)
output_train_tensor = torch.FloatTensor(output_train).unsqueeze(1)
output_test_tensor = torch.FloatTensor(output_test).unsqueeze(1)

model = Autoencoder(input_size=4, output_size=1)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
# training

no_of_epochs = 500
train_losses = []

for epoch in range(no_of_epochs):
    model.train()
    optimizer.zero_grad()
    
    predictions = model(input_train_tensor)
    
    loss = criterion(predictions, output_train_tensor)
    
    loss.backward()
    optimizer.step()
    
    train_losses.append(loss.item())
    
    if ((epoch + 1) % (no_of_epochs / 10)) == 0:
        print(f'Epoch [{epoch + 1}/{no_of_epochs}], Loss: {loss.item():.4f}')

print(f"\nTraining completed!")
print(f"Final training loss: {train_losses[-1]:.4f}")

Epoch [50/500], Loss: 0.9332
Epoch [100/500], Loss: 0.4106
Epoch [150/500], Loss: 0.2045
Epoch [200/500], Loss: 0.0790
Epoch [250/500], Loss: 0.0646
Epoch [300/500], Loss: 0.0627
Epoch [350/500], Loss: 0.0620
Epoch [400/500], Loss: 0.0615
Epoch [450/500], Loss: 0.0611
Epoch [500/500], Loss: 0.0608

Training completed!
Final training loss: 0.0608


In [18]:
# testing

model.eval()
with torch.no_grad():
    test_predictions = model(input_test_tensor)
    test_loss = criterion(test_predictions, output_test_tensor)

print(f"Test Loss on Real Data: {test_loss.item():.4f}")

test_predictions_numpy = test_predictions.numpy().flatten()
output_test_numpy = output_test_tensor.numpy().flatten()

mse = np.mean((test_predictions_numpy - output_test_numpy) ** 2)
mae = np.mean(np.abs(test_predictions_numpy - output_test_numpy))
rmse = np.sqrt(mse)

print(f"\nTest Metrics on Real Data:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

print(f"\nSample Predictions vs Actual on Real Data:")
for i in range(10):
    print(f"Sample {i+1}: Predicted={test_predictions_numpy[i]:.3f}, Actual={output_test_numpy[i]:.3f}")

Test Loss on Real Data: 0.0582

Test Metrics on Real Data:
Mean Squared Error (MSE): 0.0582
Mean Absolute Error (MAE): 0.1888
Root Mean Squared Error (RMSE): 0.2413

Sample Predictions vs Actual on Real Data:
Sample 1: Predicted=-0.008, Actual=0.053
Sample 2: Predicted=-0.920, Actual=-1.058
Sample 3: Predicted=-1.170, Actual=-0.802
Sample 4: Predicted=-0.907, Actual=-1.170
Sample 5: Predicted=1.495, Actual=1.623
Sample 6: Predicted=-0.960, Actual=-1.072
Sample 7: Predicted=-0.361, Actual=-0.110
Sample 8: Predicted=-1.165, Actual=-1.122
Sample 9: Predicted=-1.176, Actual=-1.256
Sample 10: Predicted=1.118, Actual=0.709
