In [None]:
import pathlib
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import svm
from sklearn import model_selection
from statsmodels.tools.eval_measures import mse
from sklearn.metrics import mean_absolute_error
import keras.layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.options.display.float_format = '{:.5f}'.format
from datetime import datetime
from google.colab import files
import re
from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import KFold
from sklearn.utils import shuffle



In [None]:
datasetbig = pd.read_csv("/content/sample_data/base_dataset.csv")
# If you are using the extra dataset please set this to true
bol_extra_dataset=False

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self,epoch,logs):
    if epoch % 100 == 0: print("")
    print(".",end="")

def popAndGetPredictionLabels(bol_extra_dataset,train_dataset):

  if bol_extra_dataset == False:
    train_labels=[
      ["success_rate",train_dataset.pop("success_rate")],
      ["collision_rate", train_dataset.pop("collision_rate")],
      ["timeout_rate", train_dataset.pop("timeout_rate")],
      ["average_path_length", train_dataset.pop("average_path_length")],
      ["average_time_diff", train_dataset.pop("average_time_diff")]
    ]
  else:
    train_labels=[
      ["success_rate",train_dataset.pop("success_rate")],
      ["collision_rate", train_dataset.pop("collision_rate")],
      ["average_collision_amount", train_dataset.pop("average_collision_amount")],
      ["timeout_rate", train_dataset.pop("timeout_rate")],
      ["timeout_collision_rate",train_dataset.pop("timeout_collision_rate")],
      ["average_path_length", train_dataset.pop("average_path_length")],
      ["average_time_diff", train_dataset.pop("average_time_diff")]

    ]
  return train_labels

def get_group(dataset, performanceMetric):
  return dataset.loc[dataset["Label"]== performanceMetric]

def get_best_of(dataset,performanceMetric,measure):
  dataset = get_group(dataset,performanceMetric)
  result = dataset[dataset[measure] == dataset[measure].min()]
  return result

def norm(dataset):
  train_stats = dataset.describe()
  train_stats = train_stats.transpose()
  return ((dataset-train_stats["min"])/(train_stats["max"]-train_stats["min"]))

def is_unique(s):
    a = s.to_numpy()
    return (a[0] == a).all()

def checkForConstants(dataset):
  for column in dataset:
      if is_unique(dataset[column]) == True:
          print("Dropping", column)
          dataset=dataset.drop(columns=column)
  return dataset

def getMeans(bol_extra_dataset, dataset):
  if(bol_extra_dataset == False):
    means = [
      ["mean_success_rate",             dataset["success_rate"].mean()],
      ["mean_collision_rate",           dataset["collision_rate"].mean()],
      ["mean_timeout_rate",             dataset["timeout_rate"].mean()],
      ["mean_average_path_length",      dataset["average_path_length"].mean()],
      ["mean_average_time_diff",        dataset["average_time_diff"].mean()]
      ]
  else:
    means = [
      ["mean_success_rate",             dataset["success_rate"].mean()],
      ["mean_collision_rate",           dataset["collision_rate"].mean()],
      ["mean_average_collision_amount", dataset["average_collision_amount"].mean()],
      ["mean_timeout_rate",             dataset["timeout_rate"].mean()],
      ["mean_timeout_collision_rate",   dataset["timeout_collision_rate"].mean()],
      ["mean_average_path_length",      dataset["average_path_length"].mean()],
      ["mean_average_time_diff",        dataset["average_time_diff"].mean()]
      ]
  return means

def get_numpy_labels(bol_extra_dataset,dataset):
  if(bol_extra_dataset==False):
    success_rate        =np.hstack([dataset[:,2:3]])
    collision_rate      =np.hstack([dataset[:,3:4]])
    timeout_rate        =np.hstack([dataset[:,4:5]])
    average_path_length =np.hstack([dataset[:,5:6]])
    average_time_diff   =np.hstack([dataset[:,6:7]])
    label_array=[success_rate,collision_rate,timeout_rate,average_path_length,average_time_diff]
  else:
    success_rate            =np.hstack([dataset[:,2:3]])
    collision_rate          =np.hstack([dataset[:,3:4]])
    average_collision_amount=np.hstack([dataset[:,4:5]])
    timeout_rate            =np.hstack([dataset[:,5:6]])
    timeout_collision_rate  =np.hstack([dataset[:,6:7]])
    average_path_length     =np.hstack([dataset[:,7:8]])
    average_time_diff       =np.hstack([dataset[:,8:9]])
    label_array=[success_rate,collision_rate,average_collision_amount,
                 timeout_rate,timeout_collision_rate,average_path_length,average_time_diff]
  return label_array

In [None]:
datasetbig=shuffle(datasetbig,random_state=0)

In [None]:
datasetbig

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
rlca = datasetbig[datasetbig["rlca"] == 1]
crowdnav = datasetbig[datasetbig["crowdnav"] == 1]
dwa = datasetbig[datasetbig["dwa"] == 1]

indoor = datasetbig[datasetbig["indoor_map_type"] == 1]
outdoor = datasetbig[datasetbig["outdoor_map_type"] == 1]

In [None]:
# choose dataset to work with
dataset = outdoor

In [None]:
dataset = dataset.drop(columns=["teb"])

In [None]:
means = getMeans(bol_extra_dataset,dataset)

In [None]:
dataset = checkForConstants(dataset)

In [None]:
nump=dataset["robot_max_speed"].to_numpy()

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state = 0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
train_dataset_array = np.array(train_dataset)
test_dataset_array = np.array(test_dataset)

output_train_array=get_numpy_labels(bol_extra_dataset,train_dataset_array)
output_test_array=get_numpy_labels(bol_extra_dataset,test_dataset_array)

In [None]:
train_labels = popAndGetPredictionLabels(bol_extra_dataset,train_dataset)
test_labels = popAndGetPredictionLabels(bol_extra_dataset,test_dataset)

Scaling data via standard scaler

In [None]:
normed_train_data_std = preprocessing.StandardScaler().fit_transform(train_dataset)
normed_test_data_std = preprocessing.StandardScaler().fit_transform(test_dataset)

Scaling via formular

In [None]:
normed_train_data_form=norm(train_dataset)
normed_test_data_form=norm(test_dataset)

# Support Vector machine

In [None]:
formular = True
standard_scaler = False

if formular == True and standard_scaler == False:
  normed_train_data = normed_train_data_form
  normed_test_data = normed_test_data_form

if formular == False and standard_scaler == True:
  normed_train_data = normed_train_data_std
  normed_test_data = normed_test_data_std



In [None]:
SupportVectorPervormance = pd.DataFrame()

In [None]:
# shrinkingbool, default=True: save training time

kernels = ["rbf"]
epsilons = [0.1,0.2,0.3,0.4]

for kernel in kernels:
  for epsilon in epsilons:
    for label_idx in range(len(train_labels)):

      h1 = None

      if kernel == "poly2":
        h1 = svm.SVR(kernel = "poly",degree = 2, epsilon = epsilon)
      elif kernel == "poly3":
        h1 = svm.SVR(kernel = "poly",degree = 3, epsilon = epsilon)
      else:
        h1 = svm.SVR(kernel = kernel, epsilon = epsilon)

      h1.fit(normed_train_data, train_labels[label_idx][1])
      prediction = h1.predict(normed_test_data)

      # calculating performance measures
      mean_series = pd.Series(means[label_idx][1])
      s = pd.Series(means[label_idx][1])
      for idx in range(len(test_labels[label_idx][1])-1):
        mean_series = pd.concat([mean_series,s])

      prediction_R2  = r2_score(output_test_array[label_idx],prediction)
      prediction_MSE = mse(output_test_array[label_idx],prediction)
      prediction_MAE = mean_absolute_error(output_test_array[label_idx],prediction)
      MAE_mean = mean_absolute_error(mean_series,prediction)

      n = len(normed_train_data)
      x = len(normed_train_data.columns)

      Adjusted_R2=1-((1-prediction_R2)*(n-1))/(n-x-1)
      SSE = np.sum((output_test_array[label_idx] - prediction) ** 2)
      RMSE=prediction_MSE.mean()**0.5
      MAPE = mean_absolute_percentage_error(output_test_array[label_idx],prediction)


      d={
          "Datapoints":[len(train_dataset.index)],
          "Kernel" :[kernel],
          "Label" : [train_labels[label_idx][0]],
          "epsilon" : [epsilon],
          "base_mean":[means[label_idx][1]],
          "R2":[prediction_R2],
          "adj.R2":[Adjusted_R2],
          "MSE": [prediction_MSE.mean()],
          "MAE": [prediction_MAE],
          "SSE":[SSE],
          "RMSE":[RMSE],
          "MAPE":[MAPE],
          "MAE_mean":[MAE_mean]
      }
      df = pd.DataFrame(data=d)
      SupportVectorPervormance = pd.concat([SupportVectorPervormance,df])

dt = datetime.now()
SupportVectorPervormance.to_csv("/content/sample_data/SupportVectorPervormance "+dt.isoformat()+".csv")


In [None]:
# success_rate
# collision_rate
# timeout_rate
# average_path_length
# average_time_diff
# average_collision_amount
# timeout_collision_rate
# average_collision_amount
# timeout_collision_rate

# filter for performance metric
result=get_best_of(SupportVectorPervormance,"success_rate","MAE")
result[["base_mean","Kernel", "R2","adj.R2","MAE","MSE","SSE","RMSE","MAPE","MAE_mean"]]

Unnamed: 0,base_mean,Kernel,R2,adj.R2,MAE,MSE,SSE,RMSE,MAPE,MAE_mean
0,120.62382,rbf,-0.08013,-0.11891,75.05352,18499.39763,383603509.21896,136.01249,1.21167,36.14629
