# Simple Regression NN

In [15]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import  DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

from TestTrainData import TrainData, TestData

import sys

In [16]:
# Model imports
from MultiLabelRegression import MultiLabelRegression
from MultiLayerMultiLabelRegression import MultiLayerMultiLabelRegression

In [17]:
# Import modules from parent directory
sys.path.insert(0,'..')

In [108]:
from ipynb.fs.full.training_preprocessing import GetDataset

In [109]:
df = GetDataset()
#df

In [110]:
# set x equal to all columns except for the transaction_count and workforce_type_1
x = df.drop(['transaction_count', 'workforce_type_1','workforce_type_2', 'workforce_type_3','workforce_type_4'], axis=1)
# convert x to a 2d array
x = x.values.tolist()


# set y equal to the transaction_count and workforce_type_1 columns
y = df[['transaction_count', 'workforce_type_1', 'workforce_type_2', 'workforce_type_3','workforce_type_4']]
# convert y to an array
y = y.values.tolist()

In [111]:
TEST_SIZE = 0.33

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=TEST_SIZE)

In [112]:
train_data = TrainData(torch.IntTensor(X_train), torch.IntTensor(y_train))

test_data = TestData(torch.IntTensor(X_test))

  train_data = TrainData(torch.IntTensor(X_train), torch.IntTensor(y_train))
  test_data = TestData(torch.IntTensor(X_test))


In [113]:
#def train(model, optimizer, criterion, num_epochs, train_loader):
def train(model, scheduler, criterion, num_epochs, train_loader):
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            # Forward pass
            outputs = model(inputs)
            # Compute loss
            loss = criterion(outputs, targets)
            # Backward pass and optimization
            #optimizer.zero_grad()
            # If loss is chattering, reduce learning rate TODO
            scheduler.optimizer.zero_grad()
            loss.backward()
            scheduler.optimizer.step()
        scheduler.step(loss)
        print(scheduler.optimizer.param_groups[0]['lr'])
            #optimizer.step()

            
        # Print progress
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


In [114]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [115]:
# Hyperparameters
LEARNING_RATE = 0.001
EPOCHS = 1000

In [116]:
# Models
#model = MultiLayerMultiLabelRegression(11, 5)
model = MultiLabelRegression(11, 5)
model.to(device)

MultiLabelRegression(
  (linear): Linear(in_features=11, out_features=5, bias=True)
)

In [117]:
# Define the loss function and the optimizer
#criterion = nn.BCEWithLogitsLoss()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # step size = how many epochs to update lr after, gamma = how much to update lr by
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
#optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)


In [99]:
train(model, scheduler, criterion, EPOCHS, train_data)
#train(model, optimizer, criterion, EPOCHS, train_data)

0.001
Epoch [1/1000], Loss: 28.4051
0.001
Epoch [2/1000], Loss: 16.2821
0.001
Epoch [3/1000], Loss: 12.6094
0.001
Epoch [4/1000], Loss: 4.0243
0.001
Epoch [5/1000], Loss: 2.5313
0.001
Epoch [6/1000], Loss: 2.2027
0.001
Epoch [7/1000], Loss: 2.2848
0.001
Epoch [8/1000], Loss: 2.3536
0.001
Epoch [9/1000], Loss: 2.4564
0.001
Epoch [10/1000], Loss: 2.5729
0.001
Epoch [11/1000], Loss: 2.7121
Epoch 00012: reducing learning rate of group 0 to 1.0000e-04.
0.0001
Epoch [12/1000], Loss: 2.8196
0.0001
Epoch [13/1000], Loss: 3.0449
0.0001
Epoch [14/1000], Loss: 3.1839
0.0001
Epoch [15/1000], Loss: 3.1347
0.0001
Epoch [16/1000], Loss: 3.0759
0.0001
Epoch [17/1000], Loss: 3.0193
Epoch 00018: reducing learning rate of group 0 to 1.0000e-05.
1e-05
Epoch [18/1000], Loss: 2.9670
1e-05
Epoch [19/1000], Loss: 2.0920
1e-05
Epoch [20/1000], Loss: 1.8328
1e-05
Epoch [21/1000], Loss: 1.7730
1e-05
Epoch [22/1000], Loss: 1.7715
1e-05
Epoch [23/1000], Loss: 1.7833
1e-05
Epoch [24/1000], Loss: 1.7949
1e-05
Epoch 

KeyboardInterrupt: 

## Evaluating model performance

In [None]:
Xtest = torch.FloatTensor(X_test)
test_loader = DataLoader(dataset=Xtest, batch_size=1)

# Set the model to eval mode and generate a list of predictions for the test data
model.eval()
y_pred_list = []
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_pred_list

In [None]:
# create an array of only the first object of each array inside y_pred_list
transaction_count_predictions = [a[0] for a in y_pred_list]
transaction_count_actuals = [a[0] for a in y_test]

In [None]:
# Calculate the MSE and MAE between the predictions and the actual values
mse = mean_squared_error(y_test, y_pred_list)
mae = mean_absolute_error(y_test, y_pred_list)

print(f'MSE: {mse:.2f}, MAE: {mae:.2f}')


In [None]:
# Calculate the MSE and MAE between only the transaction_count prediction and the actual values
mse = mean_squared_error(transaction_count_actuals, transaction_count_predictions)
mae = mean_absolute_error(transaction_count_actuals, transaction_count_predictions)

print(f'MSE: {mse:.2f}, MAE: {mae:.2f}')

In [None]:
# Calculate SMAPE between actual and predicted values.
def smape(actual, predicted):
    actual = np.array(actual)
    predicted = np.array(predicted)
    smape_val = (100.0 / actual.size) * np.sum(2.0 * np.abs(predicted - actual) / (np.abs(actual) + np.abs(predicted)))
    return smape_val

In [None]:
smape = smape(y_pred_list, y_test)
#print(f'SMAPE: {smape:.2f}')
print(smape)

In [None]:
# Graph the predictions vs the actual values
import matplotlib.pyplot as plt

#plt.scatter(transaction_count_predictions, transaction_count_actuals, alpha=0.5)
plt.plot(transaction_count_actuals[400:600], label='Actual')
plt.plot(transaction_count_predictions[400:600], label='Predicted')
plt.xlabel('Data Point Index')
plt.ylabel('Value')
plt.title('Predictions vs Actual Values')
plt.legend()
plt.show()