In [None]:
import pathlib
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import svm
from sklearn import model_selection
from statsmodels.tools.eval_measures import mse
from sklearn.metrics import mean_absolute_error
import keras.layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.options.display.float_format = '{:.5f}'.format
from datetime import datetime
from google.colab import files
import re
from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.utils import shuffle


In [None]:
datasetbig = pd.read_csv("/content/sample_data/base_dataset.csv")
# If you are using the extra dataset please set this to true
bol_extra_dataset = False

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self,epoch,logs):
    if epoch % 100 == 0: print("")
    print(".",end="")

def popAndGetPredictionLabels(bol_extra_dataset,train_dataset):

  if bol_extra_dataset == False:
    train_labels=[
      ["success_rate",train_dataset.pop("success_rate")],
      ["collision_rate", train_dataset.pop("collision_rate")],
      ["timeout_rate", train_dataset.pop("timeout_rate")],
      ["average_path_length", train_dataset.pop("average_path_length")],
      ["average_time_diff", train_dataset.pop("average_time_diff")]
    ]
  else:
    train_labels=[
      ["success_rate",train_dataset.pop("success_rate")],
      ["collision_rate", train_dataset.pop("collision_rate")],
      ["average_collision_amount", train_dataset.pop("average_collision_amount")],
      ["timeout_rate", train_dataset.pop("timeout_rate")],
      ["timeout_collision_rate",train_dataset.pop("timeout_collision_rate")],
      ["average_path_length", train_dataset.pop("average_path_length")],
      ["average_time_diff", train_dataset.pop("average_time_diff")]

    ]
  return train_labels

def get_group(dataset, performanceMetric):
  return dataset.loc[dataset["Label"]== performanceMetric]

def get_best_of(dataset,performanceMetric,measure):
  dataset = get_group(dataset,performanceMetric)
  result = dataset[dataset[measure] == dataset[measure].min()]
  return result

def norm(dataset):
  train_stats = dataset.describe()
  train_stats = train_stats.transpose()
  return ((dataset-train_stats["min"])/(train_stats["max"]-train_stats["min"]))

def is_unique(s):
    a = s.to_numpy()
    return (a[0] == a).all()

def checkForConstants(dataset):
  for column in dataset:
      if is_unique(dataset[column]) == True:
          print("Dropping", column)
          dataset=dataset.drop(columns=column)
  return dataset

def getMeans(bol_extra_dataset, dataset):
  if(bol_extra_dataset == False):
    means = [
      ["mean_success_rate",             dataset["success_rate"].mean()],
      ["mean_collision_rate",           dataset["collision_rate"].mean()],
      ["mean_timeout_rate",             dataset["timeout_rate"].mean()],
      ["mean_average_path_length",      dataset["average_path_length"].mean()],
      ["mean_average_time_diff",        dataset["average_time_diff"].mean()]
      ]
  else:
    means = [
      ["mean_success_rate",             dataset["success_rate"].mean()],
      ["mean_collision_rate",           dataset["collision_rate"].mean()],
      ["mean_average_collision_amount", dataset["average_collision_amount"].mean()],
      ["mean_timeout_rate",             dataset["timeout_rate"].mean()],
      ["mean_timeout_collision_rate",   dataset["timeout_collision_rate"].mean()],
      ["mean_average_path_length",      dataset["average_path_length"].mean()],
      ["mean_average_time_diff",        dataset["average_time_diff"].mean()]
      ]
  return means

def get_numpy_labels(bol_extra_dataset,dataset):
  if(bol_extra_dataset==False):
    success_rate        =np.hstack([dataset[:,2:3]])
    collision_rate      =np.hstack([dataset[:,3:4]])
    timeout_rate        =np.hstack([dataset[:,4:5]])
    average_path_length =np.hstack([dataset[:,5:6]])
    average_time_diff   =np.hstack([dataset[:,6:7]])
    label_array=[success_rate,collision_rate,timeout_rate,average_path_length,average_time_diff]
  else:
    success_rate            =np.hstack([dataset[:,2:3]])
    collision_rate          =np.hstack([dataset[:,3:4]])
    average_collision_amount=np.hstack([dataset[:,4:5]])
    timeout_rate            =np.hstack([dataset[:,5:6]])
    timeout_collision_rate  =np.hstack([dataset[:,6:7]])
    average_path_length     =np.hstack([dataset[:,7:8]])
    average_time_diff       =np.hstack([dataset[:,8:9]])
    label_array=[success_rate,collision_rate,average_collision_amount,
                 timeout_rate,timeout_collision_rate,average_path_length,average_time_diff]
  return label_array

In [None]:
datasetbig=shuffle(datasetbig,random_state=0)

In [None]:
datasetbig

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
rlca = datasetbig[datasetbig["rlca"] == 1]
crowdnav = datasetbig[datasetbig["crowdnav"] == 1]

indoor = datasetbig[datasetbig["indoor_map_type"] == 1]
outdoor = datasetbig[datasetbig["outdoor_map_type"] == 1]

In [None]:
# choose dataset to work with
dataset = datasetbig

In [None]:
dataset = dataset.drop(columns=["teb"])

In [None]:
means = getMeans(bol_extra_dataset,dataset)

In [None]:
dataset = checkForConstants(dataset)

In [None]:
nump=dataset["robot_max_speed"].to_numpy()

In [None]:
dataset["dyn_obstacle_occupation"] = dataset["number_dynamic_obstacles"]*dataset["average_obstalce_size"]

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state = 0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
train_dataset_array = np.array(train_dataset)
test_dataset_array = np.array(test_dataset)

output_train_array=get_numpy_labels(bol_extra_dataset,train_dataset_array)
output_test_array=get_numpy_labels(bol_extra_dataset,test_dataset_array)

In [None]:
train_labels = popAndGetPredictionLabels(bol_extra_dataset,train_dataset)
test_labels = popAndGetPredictionLabels(bol_extra_dataset,test_dataset)

Scaling data via Standard Scaler

In [None]:
normed_train_data_std = preprocessing.StandardScaler().fit_transform(train_dataset)
normed_test_data_std = preprocessing.StandardScaler().fit_transform(test_dataset)

Scaling data via Formular

In [None]:
normed_train_data_form=norm(train_dataset)
normed_test_data_form=norm(test_dataset)

# Linear Regression

In [None]:
# for linear regression we need only a train and test set
normed_train_data = normed_train_data_form
normed_test_data = normed_test_data_form

In [None]:
dataset_corr = dataset.corr()
success_rate_dict = dataset_corr["success_rate"]
collision_rate_dict = dataset_corr["collision_rate"]
timeout_rate_dict = dataset_corr["timeout_rate"]
average_path_length_dict  = dataset_corr["average_path_length"]
average_time_diff_dict  = dataset_corr["average_time_diff"]

dictList = [success_rate_dict,collision_rate_dict,timeout_rate_dict,
            average_path_length_dict,average_time_diff_dict]

keyList=["success_rate","collision_rate","timeout_rate","average_path_length",
 "average_time_diff","width","height"]

for idx in range(0,len(dictList)):
  for key in keyList:
    del dictList[idx][key]
  dictList[idx] = sorted(dictList[idx].items(), key=lambda x:abs(x[1]),reverse=True)

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
performance_table_forward_search = pd.DataFrame()

In [None]:
# create feature list
dictList2 = dictList

featrueTuple = dictList2[0]

# get only training parameter names
feature_List = [i[0] for i in featrueTuple]

epochs = [200,500,1000]
for idx_dictList in range(0,len(dictList)):
  print("##################################")
  print("index", idx_dictList)
  print("##################################")
  for epoch in epochs:
    features = feature_List.copy()
    best_features = []
    parameterList =  []
    performanceList = []
    while len(features) != 0:
      for feature in features:

          train_params = []

          for element in best_features:
            train_params.append(element)
          train_params.append(feature)

          parameterList.append(train_params)

          # get subset of training data
          subset_normed_train_data = normed_train_data[train_params]
          subset_nomred_test_data = normed_test_data[train_params]

          linear_model = keras.Sequential()
          # Create model, input dim is the number of input variables
          linear_model.add(layers.Dense(1, input_dim =len(train_params), activation = "linear"))

          linear_model.compile(loss="mse", optimizer= "rmsprop", metrics= ["mse"])

          # train model
          linear_model.fit(subset_normed_train_data, train_labels[idx_dictList][1], epochs = epoch, verbose = 0)

          # make prediction on test data
          y_pred=linear_model.predict(subset_nomred_test_data)

          # evaluate test data
          meanSquared = mse(y_pred,test_labels[idx_dictList][1])
          meanAbsolute = mean_absolute_error(y_pred,test_labels[idx_dictList][1])

          n = len(test_labels[idx_dictList][1])
          x = len(train_params)

          R2=r2_score(test_labels[idx_dictList][1],y_pred)
          Adjusted_R2=1-((1-R2)*(n-1))/(n-x-1)
          SSE = np.sum((output_test_array[idx_dictList] - y_pred) ** 2)
          RMSE=meanSquared.mean()**0.5

          performance_table_forward_search = performance_table_forward_search.append(
              {
                "Label": train_labels[idx_dictList][0],
                "number_of_features":len(train_params),
                "features": train_params,
                "epochs":epoch,
                "base_mean":means[idx_dictList][1],
                "meanSquared":meanSquared.mean(),
                "meanAbsolute":meanAbsolute,
                "R2":R2,
                "adj. R2":Adjusted_R2,
                "SSE":SSE,
                "RMSE":RMSE,
                "Diff_base_MAE": means[idx_dictList][1]-meanAbsolute
             },
              True
          )



          performanceList.append(meanAbsolute)
          print("##################################")
          print("Finished |",epoch," |",train_labels[idx_dictList][0]," |",train_params,)
          print("##################################")
      # close for

      # select best feature space

      # set new parameter list
      if len(features) != 0:
        index_best_features=performanceList.index(min(performanceList))
        best_features = parameterList[index_best_features]
        performanceList = []
        parameterList = []
        # remove parameter from features
        for element in best_features:
          if element in features:
            features.remove(element)
    print("end of while")
    dt = datetime.now()

    # close while
  # end of epoch
  postfix = dt.isoformat()+" "+ train_labels[idx_dictList][0]+".csv"
  performance_table_forward_search.to_csv("/content/sample_data/performance_table_linear_regression "+ postfix)
  files.download('/content/sample_data/performance_table_linear_regression '+ postfix)

##################################
index 0
##################################
##################################
Finished | 200  | success_rate  | ['dyn_obstacle_occupation']
##################################


Linear Regression single run

In [None]:
normed_train_data = normed_train_data_form
normed_test_data = normed_test_data_form

In [None]:
performance_table_forward_search = pd.DataFrame()

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
train_params=	['map_size', 'num_static_obstacles', 'robot_max_speed',
               'crowdnav', 'rlca', 'indoor_map_type', 'mean_angle_info',
               'corridor_width', 'robot_radius', 'iterations']
idx_of_performance_metric = 4
epoch = 500
label_array = output_test_array[4]

# get subset of training data
subset_normed_train_data = normed_train_data[train_params]
subset_nomred_test_data = normed_test_data[train_params]

linear_model = keras.Sequential()
# Create model, input dim is the number of input variables
linear_model.add(layers.Dense(1, input_dim =len(train_params), activation = "linear"))

linear_model.compile(loss="mse", optimizer= "rmsprop", metrics= ["mse"])

# train model
linear_model.fit(subset_normed_train_data, train_labels[idx_of_performance_metric][1], epochs = epoch, verbose = 0)

# make prediction on test data
y_pred=linear_model.predict(subset_nomred_test_data)

# evaluate test data
meanSquared = mse(y_pred,test_labels[idx_of_performance_metric][1])
meanAbsolute = mean_absolute_error(y_pred,test_labels[idx_of_performance_metric][1])


n = len(test_labels[idx_of_performance_metric][1])
x = len(train_params)

R2=r2_score(label_array,y_pred)
Adjusted_R2=1-((1-R2)*(n-1))/(n-x-1)
SSE = np.sum((output_test_array[0] - y_pred) ** 2)
RMSE=meanSquared.mean()**0.5

performance_table_forward_search = performance_table_forward_search.append(
    {
    "Label": train_labels[idx_of_performance_metric][0],
    "number_of_features":len(train_params),
    "features": train_params,
    "epochs":epoch,
    "base_mean":means[idx_of_performance_metric][1],
    "meanSquared":meanSquared.mean(),
    "meanAbsolute":meanAbsolute,
    "R2":R2,
    "adj. R2":Adjusted_R2,
    "SSE":SSE,
    "RMSE":RMSE,
    "Diff_base_MAE": means[idx_of_performance_metric][1]-meanAbsolute
    }, True
          )

dt = datetime.now()

# close while
# end of epoch
postfix = dt.isoformat()+" "+ train_labels[idx_of_performance_metric][0]+".csv"
performance_table_forward_search.to_csv("/content/sample_data/performance_table_linear_regression "+ postfix)
