In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

import sys


In [2]:
# Import modules from parent directory
sys.path.insert(0,'..')

In [4]:
from ipynb.fs.full.training_preprocessing import GetDataset

In [5]:
df = GetDataset()
df

Unnamed: 0,subtotal,transaction_count,rain,temperature,holiday,oil_price,workforce_type_1,workforce_type_2,workforce_type_3,workforce_type_4,time_idx,constant_group,year,month,day,hour
0,428.03,11.0,0.0,12.7,0,69.62,1.0,2.0,3.0,0.0,0,0,2021,5,17,10
1,324.78,15.0,0.0,13.4,0,69.62,1.0,2.0,3.0,1.0,1,0,2021,5,17,11
2,279.75,12.0,0.0,13.6,0,69.62,1.0,2.0,3.0,1.0,2,0,2021,5,17,12
3,1630.59,29.0,0.0,14.1,0,69.62,1.0,2.0,3.0,1.0,3,0,2021,5,17,13
4,1166.10,18.0,0.0,15.9,0,69.62,1.0,2.0,3.0,1.0,4,0,2021,5,17,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5062,4779.11,82.0,0.0,0.0,0,83.45,1.0,4.0,5.0,0.0,5062,0,2022,11,30,14
5063,2460.84,54.0,0.0,0.0,0,83.45,1.0,4.0,5.0,0.0,5063,0,2022,11,30,15
5064,2026.30,43.0,0.0,0.0,0,83.45,1.0,4.0,5.0,0.0,5064,0,2022,11,30,16
5065,2346.13,44.0,0.0,0.0,0,83.45,1.0,4.0,5.0,0.0,5065,0,2022,11,30,17


In [6]:
# set x equal to all columns except for the transaction_count and workforce_type_1
x = df.drop(['transaction_count', 'workforce_type_1','workforce_type_2', 'workforce_type_3','workforce_type_4'], axis=1)
# convert x to a 2d array
x = x.values.tolist()


# set y equal to the transaction_count and workforce_type_1 columns
y = df[['transaction_count', 'workforce_type_1', 'workforce_type_2', 'workforce_type_3','workforce_type_4']]
# convert y to an array
y = y.values.tolist()

In [7]:
TEST_SIZE = 0.33

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=TEST_SIZE)

In [8]:
X_train

[[1107.74, 0.0, 20.5, 0.0, 114.4, 3769.0, 0.0, 2022.0, 9.0, 7.0, 17.0],
 [2839.36, 0.0, 11.0, 0.0, 82.42, 1621.0, 0.0, 2021.0, 11.0, 13.0, 11.0],
 [1685.11, 0.0, 13.0, 0.0, 76.45, 357.0, 0.0, 2021.0, 6.0, 25.0, 16.0],
 [1591.38, 0.1, 9.5, 1.0, 108.16, 3029.0, 0.0, 2022.0, 4.0, 18.0, 15.0],
 [1382.78, 0.2, 7.7, 0.0, 99.29, 2546.0, 0.0, 2022.0, 2.0, 23.0, 18.0],
 [2052.54, 0.0, 14.1, 0.0, 91.04, 4668.0, 0.0, 2022.0, 10.0, 17.0, 16.0],
 [2000.83, 0.0, 18.9, 0.0, 108.54, 3742.0, 0.0, 2022.0, 6.0, 7.0, 17.0],
 [1623.97, 0.0, 13.9, 0.0, 127.94, 3520.0, 0.0, 2022.0, 12.0, 6.0, 11.0],
 [2917.62, 0.0, 10.3, 0.0, 82.85, 1649.0, 0.0, 2021.0, 11.0, 16.0, 12.0],
 [1480.01, 0.0, 23.6, 0.0, 95.06, 4162.0, 0.0, 2022.0, 8.0, 22.0, 14.0],
 [452.92, 0.0, 12.7, 0.0, 133.18, 2660.0, 0.0, 2022.0, 8.0, 3.0, 15.0],
 [355.45, 0.0, 17.2, 0.0, 86.83, 4310.0, 0.0, 2022.0, 7.0, 9.0, 18.0],
 [1823.67, 0.0, 17.7, 0.0, 97.99, 3997.0, 0.0, 2022.0, 4.0, 8.0, 11.0],
 [948.58, 0.0, 18.1, 0.0, 72.92, 282.0, 0.0, 2021.0, 6

In [9]:
# Defining custom dataloaders
## train data
class TrainData(Dataset):
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __getData__(self):
        return self.X_data, self.y_data
        
    def __len__ (self):
        return len(self.X_data)


train_data = TrainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))
## test data    
class TestData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = TestData(torch.FloatTensor(X_test))

In [10]:
class MultiLabelRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(MultiLabelRegression, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        out = self.linear(x)
        return out


In [11]:
def train(model, optimizer, criterion, num_epochs, train_loader):
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            # Forward pass
            outputs = model(inputs)
            # Compute loss
            loss = criterion(outputs, targets)
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # Print progress
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


In [12]:
# Different type of model to train with test_loader
def train(model, optimizer, criterion, num_epochs, train_loader, test_loader=None):
    for epoch in range(num_epochs):
        train_loss = 0.0
        test_loss = 0.0
        for inputs, targets in train_loader:
            # Forward pass
            outputs = model(inputs)
            # Compute loss
            loss = criterion(outputs, targets)
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        if test_loader:
            with torch.no_grad():
                for inputs, targets in test_loader:
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    test_loss += loss.item()
        # Print progress
        train_loss /= len(train_loader)
        test_loss /= len(test_loader) if test_loader else 1
        print('Epoch [{}/{}], Train Loss: {:.4f}, Test Loss: {:.4f}'.format(epoch+1, num_epochs, train_loss, test_loss))

In [15]:

# Define the model
model = MultiLabelRegression(9, 4)

# Define the loss function and the optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters())

# Create the data loader
#train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
#train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=10, shuffle=True)

#test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
#test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=10, shuffle=True)



In [16]:
train(model, optimizer, criterion, 100, train_data, test_data)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x11 and 9x4)