# This is the script for the linear regression model
## The imports

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import FinalBorealis as dg
import datetime
import json

I decided to create the module as a class so I could give it an input size and output size, and for future reusability

In [5]:
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)
    def forward(self, x):
        return self.linear(x)

This is how the training of the model is done. For this function I used inspiration from the example model given by Borealis, this function receives:
    - Model: In this case a linear regression model.
    - Opt: Optimization function.
    - loss_fn: Loss Function.
    - train_ld: the data.
    - n_epochs (default = 3): Epochs of training.
    - save_to (default = './trainedModel.pth'): The file generated for the model

In [6]:
def train(model, opt, loss_fn, train_ld, n_epoch=3, save_to='./trainedModel.pth'):
    ##Iterate through a number of epochs
    for epoch in range(n_epoch):
        ##Training with batches of data
        running_loss = 0.0
        num_iters = len(train_ld)
        for i, data in enumerate(train_ld,0):
            inputs, labels = data[:,:16], data[:,16:]
            #First step: Generate predictions
           
            pred = model(inputs)
           
            #2nd step: Calculate loss
            loss = loss_fn(pred, labels)
            loss.backward()
            opt.step()

            opt.zero_grad()
            running_loss += loss.item()
            if i % 1014 == 1013:
                print(f"Loss at epoch [{epoch}/{n_epoch}], iteration [{i + 1}/{num_iters}]:{running_loss /2000}")
                running_loss = 0.0
    print(f"Training done after {n_epoch} epochs!")

    torch.save(model.state_dict(), save_to)
    print(f"Trained model is saved to {save_to}.")


Converting the pandas dataframe to a torch tensor

In [7]:
def pdToTensor(dataframe, file=True):
    if not file:
        dataRefined = dg.filteredByHour(dataframe)
        ingredients = { 'tuna':'TunaWeight', 'meatball':'MeatballWeight', 'chicken':'ChickenWeight',
                'steak':'SteakWeight', 'chickenTeriyaki':'ChcknTkiWeight', 'cheese':'Cheese', 'tomato':'Tomato', 'olives':'Olives',
                'avocado':'Avocado'}
        finalDF = pd.DataFrame(columns=['Weather','Day','Hour','TypeOfFood','output'] )
        for row in dataRefined.iterrows():
            sample = {'Weather': row[1].Weather,'Day':row[0].dayofyear,'Hour':row[0].hour}
            for ing in ingredients:
                if row[1][ingredients[ing]] != 0:
                    sample['TypeOfFood'] = ing
                    sample['output'] = row[1][ingredients[ing]]
                    finalDF = finalDF.append(sample, ignore_index=True)
        finalDF = pd.concat([pd.get_dummies(finalDF['Weather'], prefix='', prefix_sep=''), finalDF], axis=1)
        finalDF = pd.concat([pd.get_dummies(finalDF['TypeOfFood'], prefix='', prefix_sep=''), finalDF], axis=1)
        del finalDF['Weather']
        del finalDF['TypeOfFood']
        finalDF.to_json('./data3.json')
        return torch.from_numpy(finalDF.values)
    newTensor = torch.from_numpy(dataframe.values)
    return newTensor

In [32]:
def main():
    ## TO DO: implement a tensorBoard to visualize the loss
    #writer = SummaryWriter()
    data1 = pd.read_json('./data1.json')
    data2 = pd.read_json('./data2.json')
    data3 = pd.read_json('./data3.json')

    data1 = pdToTensor(data1)
    data2 = pdToTensor(data2)
    data3 = pdToTensor(data3)
    ## Converting the dataframe to a tensor 
    #data4 = torch.cat([data1, torch.cat([data2, data3], dim=1)], dim=1)
    data = torch.cat((data1, torch.cat((data2, data3), 0)), 0)
    data = data.float()
    #print(data)
    #inputs (date/hour, type of ingredient)
    testSize = int(0.1 * len(data))
    trainSize = len(data)-testSize

    trainSet, testSet = torch.utils.data.random_split(data, [trainSize, testSize])

    trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=64, num_workers=0)
    testLoader = torch.utils.data.DataLoader(testSet, batch_size=64, num_workers=0)
    
    print(f"Training set consist of {len(trainSet)}, and the test set consists of {len(testSet)}")


    model = linearRegression(16,1)
    loss_fn = nn.MSELoss()
    opt = torch.optim.SGD(model.parameters(), lr=1e-5)
    #print(model.parameters)
    train(model, opt, loss_fn, trainLoader, n_epoch=100)

In [33]:
if __name__ == '__main__':
    main()

Training set consist of 73187, and the test set consists of 8131
Loss at epoch [0/100], iteration [1014/1144]:15638.813864746095
Loss at epoch [1/100], iteration [1014/1144]:14957.1813125
Loss at epoch [2/100], iteration [1014/1144]:14890.87196484375
Loss at epoch [3/100], iteration [1014/1144]:14857.631077148437
Loss at epoch [4/100], iteration [1014/1144]:14826.092557128906
Loss at epoch [5/100], iteration [1014/1144]:14794.764883789063
Loss at epoch [6/100], iteration [1014/1144]:14763.640761230468
Loss at epoch [7/100], iteration [1014/1144]:14732.73723828125
Loss at epoch [8/100], iteration [1014/1144]:14702.058151855468
Loss at epoch [9/100], iteration [1014/1144]:14671.602355957031
Loss at epoch [10/100], iteration [1014/1144]:14641.367877441406
Loss at epoch [11/100], iteration [1014/1144]:14611.352901367187
Loss at epoch [12/100], iteration [1014/1144]:14581.555279296876
Loss at epoch [13/100], iteration [1014/1144]:14551.972520019532
Loss at epoch [14/100], iteration [1014/11