In [None]:
## Import required libraries ##
import sys, os, pickle
sys.path.append(os.path.abspath('../tools'))
# Third party imports
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
# Local imports
import preprocess as pre

In [None]:
## Select route to train ##
route = ''

In [None]:
## Set filepaths ##
inputPath = os.path.abspath(f'../../data/{route}.csv')
outputPath = f'../models/{route}/supportVector'

In [None]:
## Read and preprocess data ##
df = pd.read_csv(inputPath)
df = pre.convertTime(df)
df = pre.calculateETA(df)
df = pre.encodeTime(df, 5)
df.drop('Heading', axis=1, inplace=True)
print(df.isnull().sum())

In [None]:
## Perform one hot encoding of categorical variables ##
encoder = OneHotEncoder(handle_unknown='ignore')
encoder_df = pd.DataFrame(encoder.fit_transform(df[['Vehicle_id', 'Next_stop']]).toarray())
df = df.join(encoder_df)
df.drop('Vehicle_id', axis=1, inplace=True)
df.drop('Next_stop', axis=1, inplace=True)
# df = df.dropna()

In [None]:
## Split data into dependent and independent variables ##
X = df.iloc[:, df.columns != 'ETA'].values
y = df.iloc[:, df.columns == 'ETA'].values

In [None]:
## Split data into training and test sets ##
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False) # Allocates last 20% of data as test set

In [None]:
## Scale and standardize data ##
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train  = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
# y_train = sc_y.fit_transform(y_train)
# y_test = sc_y.transform(y_test)

In [None]:
## Train SVR model on the training set ##
regressor = SVR(kernel='rbf', C=10, gamma=0.1, epsilon=0.1)
regressor.fit(X_train, y_train)

In [None]:
# Predicting the test set results
y_pred = regressor.predict(X_test)
print(y_pred)

In [None]:
# Calculate accuracy
r2 = r2_score(y_test, y_pred)
print('R-squared score:', r2)
mae = mean_absolute_error(y_test, y_pred)
print('Mean absolute error:', mae)
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)

In [None]:
## Save trained model ##
pickle.dump(regressor, open(outputPath, 'wb'))