In [3]:
import math
import numpy as np
import random
from time import time

import pandas as pd
from downcast import reduce

import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb

# IMPORT DATA

In [4]:
IS_IOWA_DATASET = True  # iowa dataset : true, simulation : false
IOWA_PATH = '../../datasets/train_data_iowa.csv'
SIMULATION_PATH = '../../datasets/datensatz_emre.csv'
CSV_PATH = IOWA_PATH if IS_IOWA_DATASET else SIMULATION_PATH

CF_PATH = "../../datasets/crafted_features.csv"

In [5]:
features = [
    "location_x", "location_y",
    "restaurant_location_x", "restaurant_location_y", 
    "order_time",
    "atd",
    "etd",
    "restaurant_queue",
    "max_pre_shift",
    "max_post_shift",
    "restaurants_before_customer",
    "customers_before_customer",
    "len_vehicle_route_to_customer",
]

for i in range(23):
    features.append(f"vehicle_route_to_customer_pos_x_{i}")
    features.append(f"vehicle_route_to_customer_pos_y_{i}")
    features.append(f"vehicle_route_to_customer_action_{i}")
    features.append(f"vehicle_route_to_customer_time_action_{i}") 

In [6]:
# Import data
start_time = time()
meta = pd.read_csv(CSV_PATH, header=0, sep=";", usecols=features, nrows=1)
meta = reduce(meta)

data = pd.read_csv(CSV_PATH, header=0, sep=";", usecols=features, dtype=meta.dtypes.to_dict())
pd.set_option("display.max_columns", len(data.columns))

print(f"Elapsed time: {time() - start_time} seconds")
data

Elapsed time: 100.74628639221191 seconds


Unnamed: 0,order_time,atd,etd,restaurant_queue,max_pre_shift,max_post_shift,location_x,location_y,restaurant_location_x,restaurant_location_y,restaurants_before_customer,customers_before_customer,len_vehicle_route_to_customer,vehicle_route_to_customer_pos_x_0,vehicle_route_to_customer_pos_y_0,vehicle_route_to_customer_action_0,vehicle_route_to_customer_time_action_0,vehicle_route_to_customer_pos_x_1,vehicle_route_to_customer_pos_y_1,vehicle_route_to_customer_action_1,vehicle_route_to_customer_time_action_1,vehicle_route_to_customer_pos_x_2,vehicle_route_to_customer_pos_y_2,vehicle_route_to_customer_action_2,vehicle_route_to_customer_time_action_2,vehicle_route_to_customer_pos_x_3,vehicle_route_to_customer_pos_y_3,vehicle_route_to_customer_action_3,vehicle_route_to_customer_time_action_3,vehicle_route_to_customer_pos_x_4,vehicle_route_to_customer_pos_y_4,vehicle_route_to_customer_action_4,vehicle_route_to_customer_time_action_4,vehicle_route_to_customer_pos_x_5,vehicle_route_to_customer_pos_y_5,vehicle_route_to_customer_action_5,vehicle_route_to_customer_time_action_5,vehicle_route_to_customer_pos_x_6,vehicle_route_to_customer_pos_y_6,vehicle_route_to_customer_action_6,vehicle_route_to_customer_time_action_6,vehicle_route_to_customer_pos_x_7,vehicle_route_to_customer_pos_y_7,vehicle_route_to_customer_action_7,vehicle_route_to_customer_time_action_7,vehicle_route_to_customer_pos_x_8,vehicle_route_to_customer_pos_y_8,vehicle_route_to_customer_action_8,vehicle_route_to_customer_time_action_8,vehicle_route_to_customer_pos_x_9,vehicle_route_to_customer_pos_y_9,vehicle_route_to_customer_action_9,vehicle_route_to_customer_time_action_9,vehicle_route_to_customer_pos_x_10,vehicle_route_to_customer_pos_y_10,vehicle_route_to_customer_action_10,vehicle_route_to_customer_time_action_10,vehicle_route_to_customer_pos_x_11,vehicle_route_to_customer_pos_y_11,vehicle_route_to_customer_action_11,vehicle_route_to_customer_time_action_11,vehicle_route_to_customer_pos_x_12,vehicle_route_to_customer_pos_y_12,vehicle_route_to_customer_action_12,vehicle_route_to_customer_time_action_12,vehicle_route_to_customer_pos_x_13,vehicle_route_to_customer_pos_y_13,vehicle_route_to_customer_action_13,vehicle_route_to_customer_time_action_13,vehicle_route_to_customer_pos_x_14,vehicle_route_to_customer_pos_y_14,vehicle_route_to_customer_action_14,vehicle_route_to_customer_time_action_14,vehicle_route_to_customer_pos_x_15,vehicle_route_to_customer_pos_y_15,vehicle_route_to_customer_action_15,vehicle_route_to_customer_time_action_15,vehicle_route_to_customer_pos_x_16,vehicle_route_to_customer_pos_y_16,vehicle_route_to_customer_action_16,vehicle_route_to_customer_time_action_16,vehicle_route_to_customer_pos_x_17,vehicle_route_to_customer_pos_y_17,vehicle_route_to_customer_action_17,vehicle_route_to_customer_time_action_17,vehicle_route_to_customer_pos_x_18,vehicle_route_to_customer_pos_y_18,vehicle_route_to_customer_action_18,vehicle_route_to_customer_time_action_18,vehicle_route_to_customer_pos_x_19,vehicle_route_to_customer_pos_y_19,vehicle_route_to_customer_action_19,vehicle_route_to_customer_time_action_19,vehicle_route_to_customer_pos_x_20,vehicle_route_to_customer_pos_y_20,vehicle_route_to_customer_action_20,vehicle_route_to_customer_time_action_20,vehicle_route_to_customer_pos_x_21,vehicle_route_to_customer_pos_y_21,vehicle_route_to_customer_action_21,vehicle_route_to_customer_time_action_21,vehicle_route_to_customer_pos_x_22,vehicle_route_to_customer_pos_y_22,vehicle_route_to_customer_action_22,vehicle_route_to_customer_time_action_22
0,567,583,584,8,0,0,4.562500,8.312500,4.996094,10.343750,1,1,4,4.996094,10.343750,1,4,4.996094,10.343750,3,5,4.562500,8.312500,2,5,4.562500,8.312500,4,3,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
1,587,613,610,8,0,0,13.234375,3.943359,7.906250,3.837891,1,1,4,7.906250,3.837891,1,6,7.906250,3.837891,3,3,13.234375,3.943359,2,11,13.234375,3.943359,4,3,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
2,602,623,623,8,0,0,7.632812,7.269531,4.996094,10.343750,1,1,4,4.996094,10.343750,1,5,4.996094,10.343750,3,4,7.632812,7.269531,2,9,7.632812,7.269531,4,3,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
3,617,641,639,8,0,0,7.664062,7.433594,3.521484,9.429688,1,1,4,3.521484,9.429688,1,4,3.521484,9.429688,3,5,7.664062,7.433594,2,10,7.664062,7.433594,4,3,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
4,618,646,645,8,0,0,12.929688,6.019531,6.765625,6.617188,1,1,4,6.765625,6.617188,1,8,6.765625,6.617188,3,3,12.929688,6.019531,2,13,12.929688,6.019531,4,3,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
850464,1191,1224,1225,8,11,0,4.097656,11.203125,3.521484,9.429688,1,3,8,6.128906,7.078125,4,3,5.808594,6.886719,2,1,5.808594,6.886719,4,3,5.585938,11.531250,2,10,5.585938,11.531250,4,3,3.521484,9.429688,1,6,3.521484,9.429688,3,3,4.097656,11.203125,2,4,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
850465,1200,1246,1246,8,9,0,13.109375,4.996094,7.050781,10.210938,1,2,7,5.757812,4.250000,2,8,5.757812,4.250000,4,3,7.886719,10.664062,2,14,7.886719,10.664062,4,3,7.050781,10.210938,1,2,7.050781,10.210938,3,3,13.109375,4.996094,2,16,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
850466,1202,1244,1242,8,12,0,12.507812,2.494141,9.734375,5.386719,1,2,7,13.382812,6.144531,2,5,13.382812,6.144531,4,3,9.734375,5.386719,1,8,9.734375,5.386719,3,3,13.484375,5.949219,2,8,13.484375,5.949219,4,3,12.507812,2.494141,2,8,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
850467,1207,1246,1247,11,14,0,0.902832,10.476562,9.734375,5.386719,1,1,4,4.867188,3.845703,4,3,9.734375,5.386719,1,11,9.734375,5.386719,3,3,0.902832,10.476562,2,21,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0


# Feature Engineering / Selection

Features used in Hildebrandt et al. (2020):
<ul>
    <li>n_stops: sum(vehicle_route_to_customer_action_i = 1 or 2)</li>
    <li>n_pickup_stops: sum(vehicle_route_to_customer_action_i = 1)</li>
    <li>n_delivery_stops: sum(vehicle_route_to_customer_action_i = 2)</li>
    <li>max_pre_shift: already given</li>    
    <li>max_post_shift: already given</li>
    <li>prep_time: sum(v_r_t_c_time_action_*) where v_r_t_c_action_i = 3 and v_r_t_c_pos_j == restaurant_location</li>
    <li>order_time: already given</li>
    <li>eta_pom: already given</li>
    <li>customer_location: already given</li>
    <li>restaurant_location: already given</li>
</ul>

In [None]:
#Define strings to identify needed columns for each feature we want to craft
query_strings = {
    "n_stops" : ["vehicle_route_to_customer_action"],
        
    "n_pickup_stops" : ["vehicle_route_to_customer_action"],
    
    "n_delivery_stops" : ["vehicle_route_to_customer_action"],
    
    "prep_time" : ["vehicle_route_to_customer_time_action", 
                   "vehicle_route_to_customer_action",
                   "order_time", "restaurant_location", "vehicle_route_to_customer_pos"]
}

raw_feats = [
    "location_x", "location_y",
    "restaurant_location_x", "restaurant_location_y",
    "etd", 
    "atd", 
    "order_time", 
    "max_pre_shift", 
    "max_post_shift", 
    "restaurant_queue"]

mask = pd.DataFrame()
feats = pd.DataFrame()

# First, add used raw features to feats
for feat in raw_feats:
    feats[feat] = data[feat]

# Craft features and add to feats
for key,value in query_strings.items():
    
    needed_columns = [col for col in data.columns if any(x in col for x in value)]
    inp = data[needed_columns]
    
    if key == "n_stops":
        for col in inp:
            mask[col] = (inp[col] > 0) & (inp[col] < 3)
            feats[key] = mask.sum(axis=1)
    
    if key == "n_pickup_stops": 
        for col in inp:
            mask[col] = inp[col] == 1
            feats[key] = mask.sum(axis=1)
    
    if key == "n_delivery_stops": 
        for col in inp:
            mask[col] = inp[col] == 2
            feats[key] = mask.sum(axis=1)

In [None]:
feats.to_csv(CF_PATH, sep=";")

In [7]:
crafted_features = pd.read_csv(CF_PATH, sep=";", index_col=[0])
crafted_features

Unnamed: 0,location_x,location_y,restaurant_location_x,restaurant_location_y,etd,atd,order_time,max_pre_shift,max_post_shift,restaurant_queue,n_stops,n_pickup_stops,n_delivery_stops
0,4.562,8.310,4.996,10.340,584,583,567,0,0,8,2,1,1
1,13.234,3.943,7.906,3.838,610,613,587,0,0,8,2,1,1
2,7.633,7.270,4.996,10.340,623,623,602,0,0,8,2,1,1
3,7.664,7.434,3.521,9.430,639,641,617,0,0,8,2,1,1
4,12.930,6.020,6.766,6.617,645,646,618,0,0,8,2,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
850464,4.098,11.200,3.521,9.430,1225,1224,1191,11,0,8,4,1,3
850465,13.110,4.996,7.050,10.210,1246,1246,1200,9,0,8,4,1,3
850466,12.510,2.494,9.734,5.387,1242,1244,1202,12,0,8,4,1,3
850467,0.903,10.480,9.734,5.387,1247,1246,1207,14,0,11,2,1,1


In [8]:
X = data.loc[:, data.columns != 'atd']
y = data['atd'] - data['etd']

#Variation in rows

X_train, X_test, y_train , y_test = train_test_split(X,y, train_size=0.8)

X_crafted = crafted_features.loc[:, crafted_features.columns != 'atd']
y_crafted = crafted_features['atd'] - crafted_features['etd']

X_train_c, X_test_c, y_train_c , y_test_c = train_test_split(X_crafted,y_crafted, train_size=0.8)

In [None]:
# Encoding categorical variables ("insertion index")
#--> Later, when Florentins features are crafted

In [None]:
# Data description
def plot_histogram(x):
    plt.hist(x, color='gray', alpha=0.5)
    plt.title(f"Histogram of {x.name}")
    plt.xlabel("Value")
    plt.ylabel("Frequency")
    plt.show()

# Ensemble learning

### Set parameters for each model

In [None]:
params = {
    "Gradient Boosting Trees" : {
        "boosting_type" : "gbdt",
        "objective" : "regression",
        "learning_rate" : 0.02,
        "random_state" : 42,
    },
}

In [None]:
train_set = lgb.Dataset(X_train,y_train)

In [None]:
evals_result = {}
trained_models = []

for model in params:
    bst = lgb.cv(
        params[model],
        train_set,
        num_boost_round = 1000,
        verbose_eval = 1,
        seed = 42,
        return_cvbooster = True,
        stratified=False,
    )
    trained_models.append(bst)

## Test 1: How many samples are enough?

In [None]:
def best_iteration(evals_result):
    iterations = dict(evals_result["valid_0"])["l2"]
    small = iterations[0]
    for i in iterations:
        if small > i:
            small = i

    return small

In [None]:
sample_sizes = np.arange(start=1000, stop=101000, step=1000)
results = []
plots = []

evals_result = {}

for rows in sample_sizes:
    print(rows)
    train_set = lgb.Dataset(X_train[:rows],y_train[:rows])
    val_set = lgb.Dataset(X_test[:rows], y_test[:rows], reference=train_set)

    bst = lgb.train(
        params[model],
        train_set=train_set,
        num_boost_round = 1000,
        valid_sets=[val_set, train_set],
        evals_result = evals_result,
        early_stopping_rounds = 10,
        verbose_eval = 5,
    )
    results.append(best_iteration(evals_result=evals_result))
plt.xlabel("Sample size")
plt.ylabel("MSE")
plt.plot(sample_sizes, results)
plt.savefig(f"Plots/{model}.png")

In [47]:
# Always scale the input. The most convenient way is to use a pipeline.
reg = make_pipeline(StandardScaler(),
                    SGDRegressor(
                        max_iter=1000,
                        validation_fraction=0.2,
                        learning_rate="adaptive",
                        verbose = 1)
                   )
reg.fit(X_train, y_train)
mean_squared_error(y_test, reg.predict(X_test))

-- Epoch 1
Norm: 4326842169894.05, NNZs: 100, Bias: -82825836039.884872, T: 680375, Avg. loss: 28441814539279055642951680.000000
Total training time: 1.56 seconds.
-- Epoch 2
Norm: 4054256958398.07, NNZs: 100, Bias: -122436914096.405624, T: 1360750, Avg. loss: 22155589562228262517604352.000000
Total training time: 3.12 seconds.
-- Epoch 3
Norm: 5329718983567.07, NNZs: 100, Bias: -78427853706.773514, T: 2041125, Avg. loss: 19702355011430288581459968.000000
Total training time: 4.65 seconds.
-- Epoch 4
Norm: 7350266750915.65, NNZs: 100, Bias: 7022786797.266507, T: 2721500, Avg. loss: 21556510630089362394578944.000000
Total training time: 6.18 seconds.
-- Epoch 5
Norm: 5273174973916.81, NNZs: 100, Bias: 79780818794.753128, T: 3401875, Avg. loss: 28492644082738067201327104.000000
Total training time: 7.79 seconds.
-- Epoch 6
Norm: 3938492942944.25, NNZs: 100, Bias: -37944529179.303841, T: 4082250, Avg. loss: 37022641980404752080437248.000000
Total training time: 9.65 seconds.
-- Epoch 7
No

KeyboardInterrupt: 

In [None]:
crafted_set = lgb.Dataset(X_crafted,y_crafted)

trained_models = []
for model in params:
    bst = lgb.cv(
        params[model],
        train_set,
        num_boost_round = 500,
        early_stopping_rounds = 10,
        verbose_eval = 5,
        seed = 42,
        return_cvbooster = True,
    )
    trained_models.append(bst)

# NEURAL NETWORK (Pytorch or Tensorflow)

### TODO:
- Experiment with different architectures and techniques (i.e. MLPs, RNNs (?) , ...) 

# 1. Model definition

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CancelOut(nn.Module):
    '''
    CancelOut Layer
    
    x - an input data (vector, matrix, tensor)
    '''

    def __init__(self, inp, *kargs, **kwargs):
        super(CancelOut, self).__init__()
        self.weights = nn.Parameter(torch.zeros(inp, requires_grad=True) + 4)

    def forward(self, x):
        return x * torch.sigmoid(self.weights.float())

class Autoencoder(torch.nn.Module):
    def __init__(self, n_features, n_hidden, n_encode):
        super(Autoencoder, self).__init__()
        self.hidden_enc = nn.Linear(n_features, n_hidden)
        self.encode = nn.Linear(n_hidden, n_encode)
        self.hidden_dec = nn.Linear(n_encode, n_hidden)
        self.decode = nn.Linear(n_hidden, n_features)

    def forward(self, x):
        x = F.leaky_relu(self.hidden_enc(x))
        x = F.leaky_relu(self.encode(x))
        x = F.leaky_relu(self.hidden_dec(x))
        x = torch.sigmoid(self.decode(x))

        return x
    
class Model(torch.nn.Module):
    def __init__(self, n_features, n_hidden, n_output):
        super(Model, self).__init__()
        self.dropout = nn.Dropout(p=0.2)
        self.hidden = nn.Linear(n_features, n_hidden)
        self.dropout = nn.Dropout(p=0.2)
        self.predict = nn.Linear(n_hidden, n_output)
    
    def forward(self, x):
        x = F.leaky_relu(self.dropout(x))
        x = F.leaky_relu(self.hidden(x))
        x = F.leaky_relu(self.dropout(x))
        x = self.predict(x)

        return x

# 2. Training loop

In [None]:
from torch import nn, optim
from utils import *

#Hyperparameter values DL
LR = 0.001
EARLY_STOPPING_PATIENCE = 10

#reproducibility
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Importing data.")
feature_list = [f for f in list(data.columns.values) if f != 'atd']

etd_dataset = ETDData(data=data, feature_list=feature_list)
split = DataSplit(etd_dataset, shuffle=True)
trainloader, _, testloader = split.get_split(batch_size=50, num_workers=8)

print("Data imported.")
print("Instanciating model.")
n_features = len(feature_list)
n_hidden = math.ceil(n_features * (1 / 2))
n_hidden_2 = math.ceil(n_hidden * (1 / 2))
n_encode = math.ceil(n_hidden_2 * (1 / 2))

ae = Autoencoder(
        n_features=n_features,
        n_hidden=n_hidden,
        n_encode=n_encode,
    )

ae.to(device)
criterion = nn.MSELoss()  # define your loss function and optimizer
optimizer = optim.AdamW(ae.parameters(), lr=LR)

print("Start training.")
train_losses = []
test_losses = []
early_stopping = EarlyStopping(patience=EARLY_STOPPING_PATIENCE,
                                verbose=True)  # TODO: Define your early stopping

epochs = 100  # How many epochs do you want to train?
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in trainloader:
        # get the inputs; data is a list of [inputs, labels]
        inputs = inputs.float().to(device)
        #labels = labels.float().view(-1, 1).to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = ae(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
    test_loss = 0
    ae.eval()
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs = inputs.float().to(device)
            #labels = labels.float().view(-1, 1).to(device)
            logps = ae.forward(inputs)
            batch_loss = criterion(logps, inputs)
            test_loss += batch_loss.item()
    train_losses.append(running_loss / len(trainloader))
    test_losses.append(test_loss / len(testloader))
    print(f"Epoch {epoch}/{epochs}.. "
            f"Train loss: {running_loss / len(trainloader):.3f}.. "
            f"Test loss: {test_loss / len(testloader):.3f}.. ")
    early_stopping(test_loss / len(testloader), ae)
    if early_stopping.early_stop:
        print("Early stopping")
        break
    ae.train()

print('Finished Training')

In [None]:
ae.load_state_dict(torch.load('checkpoint.pt'))
torch.save(ae, 'perceptron.pth')