<a href="https://colab.research.google.com/github/Valent0296/allvent.github.io/blob/master/Food_Delivery_Time_Prediction_RNN_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Food Delivery Time Prediction is a regression problem, since we want to find a black box law linking input features, and the predicted time taken to deliver food.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append("/MyDrive/dataset/")
import torch
from torch import Tensor
import torch.nn as nn
from torchvision import datasets, transforms, models
from torchvision.transforms import ToTensor, Lambda
from torch.utils.data import DataLoader, Dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

%cd /content/drive/MyDrive/dataset/
deliverytime = pd.read_csv("deliverytime.txt", sep=",", index_col='ID')


Mounted at /content/drive
/content/drive/MyDrive/dataset


In [None]:
print(deliverytime.head())
deliverytime.info()

     Delivery_person_ID  Delivery_person_Age  Delivery_person_Ratings  \
ID                                                                      
4607     INDORES13DEL02                   37                      4.9   
B379     BANGRES18DEL02                   34                      4.5   
5D6D     BANGRES19DEL01                   23                      4.4   
7A6A    COIMBRES13DEL02                   38                      4.7   
70A2     CHENRES12DEL01                   32                      4.6   

      Restaurant_latitude  Restaurant_longitude  Delivery_location_latitude  \
ID                                                                            
4607            22.745049             75.892471                   22.765049   
B379            12.913041             77.683237                   13.043041   
5D6D            12.914264             77.678400                   12.924264   
7A6A            11.003669             76.976494                   11.053669   
70A2          

In [None]:

stats_person = deliverytime[ ['Delivery_person_ID', 'Time_taken(min)'] ].groupby( ['Delivery_person_ID'] ).mean()
stats_person.head()
stats_person = deliverytime

In [None]:


#For categorical data, I'm gonna switch them with their average time taken per vehicle / order
mean_time_deliverer = deliverytime.groupby('Delivery_person_ID')['Time_taken(min)'].mean()
mean_time_veich = deliverytime.groupby('Type_of_vehicle')['Time_taken(min)'].mean()
mean_time_order = deliverytime.groupby('Type_of_order')['Time_taken(min)'].mean()

stats_person = stats_person.join(mean_time_deliverer, on='Delivery_person_ID', rsuffix='Mean_time_deliverer')
stats_person = stats_person.join(mean_time_veich, on='Type_of_vehicle', rsuffix='Mean_time_vehic')
stats_person = stats_person.join(mean_time_order, on='Type_of_order', rsuffix='Mean_time_order')


#Another option was to calulate the relative frequency of each class of vehicle/order


In [None]:
stats_person['Type_of_vehicle'], un_veich = pd.factorize(deliverytime['Type_of_vehicle'])
stats_person['Type_of_order'], un_order = pd.factorize(deliverytime['Type_of_order'])
stats_person['Delivery_person_Age'] = deliverytime['Delivery_person_Age'] * 365 * 24 * 3600
stats_person['Time_taken(min)']  = deliverytime['Time_taken(min)'] * 60
stats_person['Time_taken(min)Mean_time_deliverer'] = stats_person['Time_taken(min)Mean_time_deliverer'] * 60
stats_person['Time_taken(min)Mean_time_order'] = stats_person['Time_taken(min)Mean_time_order'] * 60
stats_person['Time_taken(min)Mean_time_vehic'] = stats_person['Time_taken(min)Mean_time_vehic'] * 60

stats_person['Distance'] = ((stats_person['Delivery_location_latitude'] - stats_person['Restaurant_latitude'])**2 + (stats_person['Delivery_location_longitude'] - stats_person['Restaurant_longitude'])**2)**(1/2)


means = {}
var = {}
for item in stats_person.columns:
  if item != 'Delivery_person_ID' and item != 'Type_of_order' and  item != 'Type_of_vehicle' and item != 'Delivery_person_ratings': #For intervallar or ratio variables I use normalization
    means.update( {item: stats_person[item].mean()})
    var.update({item: stats_person[item].std()})
    stats_person[item] -= stats_person[item].mean()
    stats_person[item] /= stats_person[item].std()
  elif item == 'Delivery_person_ratings': #For ordinal data I use min max scaling
    stats_person[item] -= deliverytime['Delivery_person_ratings'].min()
    stats_person[item] /= (deliverytime['Delivery_person_ratings'].max() - deliverytime['Delivery_person_ratings'].min())

print(var)

{'Delivery_person_Age': 179654062.06206816, 'Delivery_person_Ratings': 0.3277075288383162, 'Restaurant_latitude': 8.185108965214447, 'Restaurant_longitude': 22.883647223093075, 'Delivery_location_latitude': 7.335121994514343, 'Delivery_location_longitude': 21.118811879085786, 'Time_taken(min)': 563.0283687097862, 'Time_taken(min)Mean_time_deliverer': 91.82522109855185, 'Time_taken(min)Mean_time_vehic': 92.51496963020327, 'Time_taken(min)Mean_time_order': 4.939862086479378, 'Distance': 10.087992982248682}


Target Encoding (Mean Encoding)

How it works: This method involves replacing each category with the mean of the target variable for that category. It's typically used in supervised learning.


Example:
Suppose "Vehicle Type" is being used to predict "Fuel Efficiency." Each type would be replaced by the average fuel efficiency of that type.


Pros: Can capture the relationship between the category and the target variable.


Cons: Prone to overfitting, especially with small datasets.

In [None]:
stats_person.info()

#stats_person['Delivery_person_ID'], un_del_id = pd.factorize(deliverytime['Delivery_person_ID']) #For Delivery person Id, I'll use word embeddings, so the first thing to do is to factorize
#print(stats_person.head())
stats_person_1 = stats_person.iloc[:,[ 1, 2, 8, 9, 10, 11, 7]]

stats_person_1.info()

corr_matr = stats_person_1.corr()
corr_matr

<class 'pandas.core.frame.DataFrame'>
Index: 45593 entries, 4607 to 5FB2
Data columns (total 14 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Delivery_person_ID                  45593 non-null  object 
 1   Delivery_person_Age                 45593 non-null  float64
 2   Delivery_person_Ratings             45593 non-null  float64
 3   Restaurant_latitude                 45593 non-null  float64
 4   Restaurant_longitude                45593 non-null  float64
 5   Delivery_location_latitude          45593 non-null  float64
 6   Delivery_location_longitude         45593 non-null  float64
 7   Type_of_order                       45593 non-null  int64  
 8   Type_of_vehicle                     45593 non-null  int64  
 9   Time_taken(min)                     45593 non-null  float64
 10  Time_taken(min)Mean_time_deliverer  45593 non-null  float64
 11  Time_taken(min)Mean_time_vehic      45593 no

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Type_of_vehicle,Time_taken(min),Time_taken(min)Mean_time_deliverer,Time_taken(min)Mean_time_vehic,Type_of_order
Delivery_person_Age,1.0,-0.067449,0.003353,0.292708,0.050358,-0.000364,0.007789
Delivery_person_Ratings,-0.067449,1.0,0.015928,-0.331103,-0.051289,-0.02373,-0.003754
Type_of_vehicle,0.003353,0.015928,1.0,-0.148935,-0.029188,-0.90639,-0.003664
Time_taken(min),0.292708,-0.331103,-0.148935,1.0,0.163092,0.164317,0.005891
Time_taken(min)Mean_time_deliverer,0.050358,-0.051289,-0.029188,0.163092,1.0,0.033636,-0.000206
Time_taken(min)Mean_time_vehic,-0.000364,-0.02373,-0.90639,0.164317,0.033636,1.0,0.005002
Type_of_order,0.007789,-0.003754,-0.003664,0.005891,-0.000206,0.005002,1.0


In [None]:
###########
# Dataset #
###########
class FDTDataset( Dataset ):

  def __init__(self, data):
    self.data_tensor = data

  def __len__(self):
    return len(self.data_tensor)

  def __getitem__(self, idx):
    data = self.data_tensor[:,:-1]
    label = self.data_tensor[:,-1]
    return data[idx,:], label[idx]


dev = 0.01
test = 0.01
train = 1 - dev - test

data = stats_person_1.sample(frac=1)
train_len = np.arange(int(train*len(data)))
train_data = data.iloc[train_len].to_numpy()
dev_test_data = data.iloc[~train_len]

dev_len = np.arange(int(dev*len(dev_test_data)))
dev_data = dev_test_data.iloc[dev_len].to_numpy()
test_data = dev_test_data.iloc[~dev_len].to_numpy()

batch_size = 64



dev_loader = DataLoader(FDTDataset(dev_data), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(FDTDataset(test_data), batch_size=batch_size, shuffle=True)
train_loader = DataLoader(FDTDataset(train_data), batch_size=batch_size, shuffle=True)



In [None]:
class FDTModel(nn.Module):

  def __init__(self, i1_size, i2_size, i3_size, p_size, h_size, o1_size, o2_size,  o3_size, out_size):
    super(FDTModel, self).__init__()
    self.i1_size = i1_size
    self.i2_size = i2_size
    self.i3_size = i3_size
    self.h1_size = h_size
    self.o1_size = o1_size
    self.o2_size = o2_size
    self.o3_size = o3_size
    self.out_size = out_size

    self.dropout = nn.Dropout1d()
    self.norm = nn.BatchNorm1d(h_size)
    self.proj1 = nn.Sequential( nn.Linear(i1_size, p_size), nn.ReLU(), nn.Linear(p_size, p_size), nn.Softmax(-1) )
    self.proj2 = nn.Sequential( nn.Linear(i2_size, p_size), nn.ReLU(), nn.Linear(p_size, p_size), nn.Softmax(-1) )
    self.proj3 = nn.Sequential( nn.Linear(i3_size, p_size), nn.ReLU(), nn.Linear(p_size, p_size), nn.Softmax(-1) )

    #Input ID Delivery person, Deliver person age, Delivery person age
    self.rnncellh11 = nn.GRUCell(p_size, h_size) #No output
    self.rnncellh12 = nn.GRUCell(h_size, h_size)
    self.rnncellh13 = nn.GRUCell(h_size, h_size)
    self.rnncellh14 = nn.GRUCell(h_size, h_size)

    self.rnncellh15 = nn.GRUCell(h_size, h_size)
    self.rnncellh16 = nn.GRUCell(h_size, h_size)
    self.rnncellh17 = nn.GRUCell(h_size, h_size)

    self.rnncello18 = nn.Sequential( nn.Linear(h_size, o1_size), nn.Softmax(-1))

    #Input previous hidden state, Restaurant, Delivery location
    self.rnncellh21 = nn.GRUCell(p_size+o1_size, h_size)
    self.rnncellh22 = nn.GRUCell(h_size, h_size)
    self.rnncellh23 = nn.GRUCell(h_size, h_size)
    self.rnncellh24 = nn.GRUCell(h_size, h_size)

    self.rnncellh25 = nn.GRUCell(h_size, h_size)
    self.rnncellh26 = nn.GRUCell(h_size, h_size)
    self.rnncellh27 = nn.GRUCell(h_size, h_size)

    self.rnncello28 = nn.Sequential( nn.Linear(h_size, o2_size), nn.Softmax(-1) )

    #Input previous hidden state, previous output, Vehicle time taken avg, Order time taken avg
    self.rnncellh31 = nn.GRUCell(p_size+o2_size, h_size)
    self.rnncellh32 = nn.GRUCell(h_size, h_size)
    self.rnncellh33 = nn.GRUCell(h_size, h_size)
    self.rnncellh34 = nn.GRUCell(h_size, h_size)

    self.rnncellh35 = nn.GRUCell(h_size, h_size)
    self.rnncellh36 = nn.GRUCell(h_size, h_size)
    self.rnncellh37 = nn.GRUCell(h_size, h_size)

    self.rnncello38 = nn.Sequential( nn.Linear(h_size, o3_size), nn.Softmax(-1) )
    self.linearout = nn.Sequential(
        nn.Linear(o3_size, h_size * 4),
        nn.ReLU(),
        nn.Linear(h_size* 4, h_size* 4),
        nn.ReLU(),
        nn.Linear(h_size* 4, h_size* 4),
        nn.ReLU(),
        nn.Linear(h_size* 4, h_size* 4),
        nn.ReLU(),
        nn.BatchNorm1d(h_size* 4),
        nn.Dropout1d(),
        nn.Linear(h_size* 4, h_size* 2),
        nn.ReLU(),
        nn.Linear(h_size* 2, h_size* 2),
        nn.ReLU(),
        nn.Linear(h_size* 2, h_size* 2),
        nn.ReLU(),
        nn.BatchNorm1d(h_size* 2),
        nn.Dropout1d(),
        nn.Linear(h_size * 2, h_size),
        nn.ReLU(),
        nn.Linear(h_size, h_size),
        nn.ReLU(),
        nn.Linear(h_size, h_size),
        nn.ReLU(),
        nn.BatchNorm1d(h_size),
        nn.Dropout1d(),
        nn.Linear(h_size, out_size),
        nn.Tanh()
                                   )

  def forward(self,x1, x2, x3):
    #id_emb = self.embed(id)
    #x1 = torch.concat([id_emb, x1], axis=-1)
    i1 = self.proj1(x1)
    h11 = self.rnncellh11(i1)
    h12 = self.rnncellh12(h11)
    h13 = self.rnncellh13(h12)
    h14 = self.rnncellh14(h13)
    h14 = self.norm(h14) + h11
    h14 = self.dropout(h14)
    h15 = self.rnncellh15(h14)
    h16 = self.rnncellh16(h15)
    h17 = self.rnncellh17(h16)
    h17 = self.norm(h17) + h15
    h17 = self.dropout(h17)
    o14 = self.rnncello18( h17 )

    i2 = self.proj2(x2)
    i2 = torch.concat([i2, o14], axis=-1)
    h21 = self.rnncellh21( i2, h11 )
    h22 = self.rnncellh22( h21, h12 )
    h23 = self.rnncellh23( h22, h13 )
    h24 = self.rnncellh24( h23, h14 )
    h24 = self.norm(h24) + h21
    h24 = self.dropout(h24)
    h25 = self.rnncellh25( h24, h15 )
    h26 = self.rnncellh26( h25, h16 )
    h27 = self.rnncellh27( h26, h17 )
    h27 = self.norm(h27) + h25
    h27 = self.dropout(h27)
    o24 = self.rnncello28( h27 )

    i3 = self.proj3(x3)
    i3 = torch.concat([i3, o24], axis=-1)
    h31 = self.rnncellh31( i3, h21 )
    h32 = self.rnncellh32( h31, h22 )
    h33 = self.rnncellh33( h32, h23 )
    h34 = self.rnncellh34( h33, h24 )
    h34 = self.norm(h34) + h31
    h34 = self.dropout(h34)
    h35 = self.rnncellh35( h34, h25 )
    h36 = self.rnncellh36( h35, h26 )
    h37 = self.rnncellh37( h36, h27 )
    h37 = self.norm(h37) + h35
    h37 = self.dropout(h37)
    o34 = self.rnncello38( h37 )

    out = self.linearout( o34 )
    return out




In [None]:
loss_list_dev =  []
loss_list_train = []

#Define dev loop
def dev_loop(dataloader, model, loss_fn, optimizer):
  model.train()
  size = len(dataloader.dataset)
  dev_loss, correct = 0.0, 0.0
  num_batches = len(dataloader)
  y_dev = []
  pred_dev = []
  mae = 0.0
  R_squared = 0.0
  for batch, (X, y) in enumerate(dataloader):
    batch_size, batch_correct = 0, 0
    X, y = X.to(device).to(torch.float), y.to(device).to(torch.float)
    #Compute prediction and loss
    pred = model(X[:,[0, 1]], X[:, [2, 3, 4]], X[:,[5]]) #La prediction andrebbe denormalizzata essendo l'output
    RSS = torch.pow((y.detach() - pred.detach()), 2).sum()
    TSS = torch.pow((y.detach() - y.mean()), 2).sum()
    r_square = 1 - RSS/TSS
    r_square_a = 1 - (1-r_square) * (X.shape[0]-1)/(X.shape[0]-X.shape[1]-1)
    R_squared += r_square_a
    orig_y = torch.add(torch.multiply(y, var.get('Time_taken(min)')) , means.get('Time_taken(min)'))/60
    orig_pred = torch.add(torch.multiply(pred, var.get('Time_taken(min)')) , means.get('Time_taken(min)'))/60

    y_dev.append(orig_y)
    pred_dev.append(orig_pred)
    loss = loss_fn(pred.squeeze(), y)
    dev_loss += loss
    batch_size = X.size(0)
    mae += torch.abs(orig_y.detach() - orig_pred.detach()).sum()
    #Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_list_dev.append(loss.item()) #


  mae /= size
  R_squared /= num_batches
  print(f"Errore dev:  Media loss (Scarto quadratico della stima): {dev_loss**(1/2):>8f} \nErrore assoluto medio: {mae:>8f}\nCoefficiente di determinazione: {R_squared}")
  return dev_loss**(1/2)

#Define train loop
def train_loop(dataloader, model, loss_fn, optimizer):
  model.train()
  size = len(dataloader.dataset)
  train_loss, correct = 0.0, 0.0
  num_batches = len(dataloader)
  y_train = []
  pred_train = []
  mae = 0.0
  R_squared = 0.0
  for batch, (X, y) in enumerate(dataloader):
    batch_size, batch_correct = 0, 0
    X, y = X.to(device).to(torch.float), y.to(device).to(torch.float)
    #Compute prediction and loss
    pred = model(X[:,[0, 1]], X[:, [2, 3, 4]], X[:,[5]]) #La prediction andrebbe denormalizzata essendo l'output
    RSS = torch.pow((y.detach() - pred.detach()), 2).sum()
    TSS = torch.pow((y.detach() - y.mean()), 2).sum()
    r_square = 1 - RSS/TSS
    r_square_a = 1 - (1-r_square) * (X.size[0]-1)/(X.size[0]-X.size[1]-1)
    R_squared += r_square_a
    orig_y = torch.add(torch.multiply(y, var.get('Time_taken(min)')) , means.get('Time_taken(min)'))/60
    orig_pred = torch.add(torch.multiply(pred, var.get('Time_taken(min)')) , means.get('Time_taken(min)'))/60

    y_train.append(orig_y)
    pred_train.append(orig_pred.squeeze())
    loss = loss_fn(pred.squeeze(), y)
    train_loss += loss
    batch_size = X.size(0)
    mae += torch.abs(orig_y.detach() - orig_pred.detach()).sum()
    #Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_list_train.append(loss.item()) #

  mae /= size
  R_squared /= num_batches
  #dev_loss /= num_batches
  print(f"Errore train:  Media loss (Scarto quadratico della stima): {train_loss**(1/2):>8f} \nErrore assoluto medio: {mae:>8f}\nCoefficiente di determinazione: {R_squared}")

  return train_loss**(1/2), y_train, pred_train, mae

#Define test loop
def test_loop(dataloader, model, loss_fn):
  model.eval()
  size = len(dataloader.dataset)
  test_loss, correct = 0.0, 0.0
  num_batches = len(dataloader)
  y_test = []
  pred_test = []
  mae = 0.0
  R_squared = 0.0
  for batch, (X, y) in enumerate(dataloader):
    batch_size, batch_correct = 0, 0
    X, y = X.to(device).to(torch.float), y.to(device).to(torch.float)
    #Compute prediction and loss
    pred = model(X[:,[0, 1]], X[:, [2, 3, 4]], X[:,[5]]) #La prediction andrebbe denormalizzata essendo l'output
    RSS = torch.pow((y.detach() - pred.detach()), 2).sum()
    TSS = torch.pow((y.detach() - y.mean()), 2).sum()
    r_square = 1 - RSS/TSS
    r_square_a = 1 - (1-r_square) * (X.size[0]-1)/(X.size[0]-X.size[1]-1)
    R_squared += r_square_a

    orig_y = torch.add(torch.multiply(y, var.get('Time_taken(min)')) , means.get('Time_taken(min)'))/60
    orig_pred = torch.add(torch.multiply(pred, var.get('Time_taken(min)')) , means.get('Time_taken(min)'))/60

    y_test.append(orig_y)
    pred_test.append(orig_pred.squeeze())
    loss = loss_fn(pred.squeeze(), y)
    test_loss += loss
    batch_size = X.size(0)
    mae += torch.abs(orig_y.detach() - orig_pred.detach()).sum()

  mae /= size
  R_squared /= num_batches
  #dev_loss /= num_batches
  print(f"Errore test:  Media loss (Scarto quadratico della stima): {test_loss**(1/2):>8f} \nErrore assoluto medio: {mae:>8f}\nCoefficiente di determinazione: {R_squared}")

  return test_loss**(1/2), y_test, pred_test, mae

In [None]:
#Definiamo la loss function, metodo di minimizzazione e iperparametri
learning_rate = [i*10**(-i) for i in range(1,5)]
momentum = [i/10 for i in range(10)]
lr_mom_grid = [(l,m, 0) for l in learning_rate for m in momentum]

loss_fn = nn.MSELoss()

model_RNN = FDTModel(2, 3, 1, 32, 64, 32, 32, 32, 1)

def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight).to(device)
        m.bias.data.fill_(0.01).to(device)

model_RNN.apply(init_weights)
model_RNN.to(device)

for i, (l, m, mse) in enumerate(lr_mom_grid):
  print(f"Learning rate: {l}, Momentum: {m}")
  optimizer = torch.optim.SGD(model_RNN.parameters(), lr=l, momentum = m)
  epochs = 1
  for t in range(epochs):
      print(f"Epoch {t+1}\n-------------------------------")
      mse += dev_loop( dev_loader, model_RNN, loss_fn, optimizer)

  mse /= epochs
  lr_mom_grid[i] = (l,m, mse.item())


Learning rate: 0.1, Momentum: 0.0
Epoch 1
-------------------------------


InvalidIndexError: (slice(None, None, None), slice(None, -1, None))

In [None]:
hyper = np.array(lr_mom_grid)
min_err = np.argmin(hyper[:,-1])
dev_min = (np.min(dev_data[-1]).item() * var.get('Time_taken(min)') + means.get('Time_taken(min)')) /  60
dev_max = (np.max(dev_data[-1]).item() * var.get('Time_taken(min)') + means.get('Time_taken(min)')) /  60
print(hyper[min_err], '\n', hyper[min_err, -1].item()/dev_max, '\n', hyper[min_err, -1].item()/dev_min)

KeyError: -1

In [None]:
optimizer = torch.optim.SGD(model_RNN.parameters(), lr=hyper[min_err, 0], momentum = hyper[min_err, 1])
epochs = 10
train_rse = 0
mae = 0
size = len(train_loader.dataset)
for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train_rse_i, y_train, pred_train, mae_t = train_loop( train_loader, model_RNN, loss_fn, optimizer)
  train_rse += train_rse_i
  y_train_tens = torch.hstack(y_train)
  pred_train_tens = torch.hstack(pred_train)
train_rse = train_rse.item()
train_rse /= epochs
mae = mae_t / epochs
fig = plt.figure()
ax1 = fig.add_subplot(111)
train_min = (np.min(train_data[-1]).item() * var.get('Time_taken(min)') + means.get('Time_taken(min)')) /  60
train_max = (np.max(train_data[-1]).item() * var.get('Time_taken(min)') + means.get('Time_taken(min)')) /  60
print(train_rse, '\n', mae.detach().numpy().item(), '\n', train_rse/train_max,'\n',  train_rse/train_min)
ax1.plot(range(50), y_train_tens.detach().numpy()[3000:3050], 'o-g', pred_train_tens.detach().numpy()[3000:3050], 'o-r' )

There's an high bias

In [None]:
train_rse = 0
size = len(train_loader.dataset)

test_rse, y_test, pred_test, mae = test_loop( train_loader, model_RNN, loss_fn)

y_test_tens = torch.hstack(y_test)
pred_test_tens = torch.hstack(pred_test)

test_rse = test_rse.item()
test_rse /= epochs
test_min = (np.min(test_data[-1]).item() * var.get('Time_taken(min)') + means.get('Time_taken(min)')) /  60
test_max = (np.max(test_data[-1]).item() * var.get('Time_taken(min)') + means.get('Time_taken(min)')) /  60
print(test_rse, '\n', test_rse/test_max,'\n',  test_rse/test_min)
plt.plot(range(50), y_test_tens.detach().numpy()[0:50], 'o-g', pred_test_tens.detach().numpy()[0:50], 'o-r' )

Despite the model complexity, it seems to be affected by an high bias.