In [None]:
import pathlib
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import svm
from sklearn import model_selection
from statsmodels.tools.eval_measures import mse
from sklearn.metrics import mean_absolute_error
import keras.layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from google.colab import files
import re
from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import KFold
pd.options.display.float_format = '{:.5f}'.format
from sklearn.utils import shuffle



In [None]:
datasetbig = pd.read_csv("/content/sample_data/base_dataset.csv")
# If you are using the extra dataset please set this to true
bol_extra_dataset=False

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self,epoch,logs):
    if epoch % 100 == 0: print("")
    print(".",end="")

def popAndGetPredictionLabels(bol_extra_dataset,train_dataset):

  if bol_extra_dataset == False:
    train_labels=[
      ["success_rate",train_dataset.pop("success_rate")],
      ["collision_rate", train_dataset.pop("collision_rate")],
      ["timeout_rate", train_dataset.pop("timeout_rate")],
      ["average_path_length", train_dataset.pop("average_path_length")],
      ["average_time_diff", train_dataset.pop("average_time_diff")]
    ]
  else:
    train_labels=[
      ["success_rate",train_dataset.pop("success_rate")],
      ["collision_rate", train_dataset.pop("collision_rate")],
      ["average_collision_amount", train_dataset.pop("average_collision_amount")],
      ["timeout_rate", train_dataset.pop("timeout_rate")],
      ["timeout_collision_rate",train_dataset.pop("timeout_collision_rate")],
      ["average_path_length", train_dataset.pop("average_path_length")],
      ["average_time_diff", train_dataset.pop("average_time_diff")]

    ]
  return train_labels

def get_group(dataset, performanceMetric):
  return dataset.loc[dataset["Label"]== performanceMetric]

def get_best_of(dataset,performanceMetric,measure):
  dataset = get_group(dataset,performanceMetric)
  result = dataset[dataset[measure] == dataset[measure].min()]
  return result

def norm(dataset):
  train_stats = dataset.describe()
  train_stats = train_stats.transpose()
  return ((dataset-train_stats["min"])/(train_stats["max"]-train_stats["min"]))

def is_unique(s):
    a = s.to_numpy()
    return (a[0] == a).all()

def checkForConstants(dataset):
  for column in dataset:
      if is_unique(dataset[column]) == True:
          print("Dropping", column)
          dataset=dataset.drop(columns=column)
  return dataset

def getMeans(bol_extra_dataset, dataset):
  if(bol_extra_dataset == False):
    means = [
      ["mean_success_rate",             dataset["success_rate"].mean()],
      ["mean_collision_rate",           dataset["collision_rate"].mean()],
      ["mean_timeout_rate",             dataset["timeout_rate"].mean()],
      ["mean_average_path_length",      dataset["average_path_length"].mean()],
      ["mean_average_time_diff",        dataset["average_time_diff"].mean()]
      ]
  else:
    means = [
      ["mean_success_rate",             dataset["success_rate"].mean()],
      ["mean_collision_rate",           dataset["collision_rate"].mean()],
      ["mean_average_collision_amount", dataset["average_collision_amount"].mean()],
      ["mean_timeout_rate",             dataset["timeout_rate"].mean()],
      ["mean_timeout_collision_rate",   dataset["timeout_collision_rate"].mean()],
      ["mean_average_path_length",      dataset["average_path_length"].mean()],
      ["mean_average_time_diff",        dataset["average_time_diff"].mean()]
      ]
  return means

def get_numpy_labels(bol_extra_dataset,dataset):
  if(bol_extra_dataset==False):
    success_rate        =np.hstack([dataset[:,2:3]])
    collision_rate      =np.hstack([dataset[:,3:4]])
    timeout_rate        =np.hstack([dataset[:,4:5]])
    average_path_length =np.hstack([dataset[:,5:6]])
    average_time_diff   =np.hstack([dataset[:,6:7]])
    label_array=[success_rate,collision_rate,timeout_rate,average_path_length,average_time_diff]
  else:
    success_rate            =np.hstack([dataset[:,2:3]])
    collision_rate          =np.hstack([dataset[:,3:4]])
    average_collision_amount=np.hstack([dataset[:,4:5]])
    timeout_rate            =np.hstack([dataset[:,5:6]])
    timeout_collision_rate  =np.hstack([dataset[:,6:7]])
    average_path_length     =np.hstack([dataset[:,7:8]])
    average_time_diff       =np.hstack([dataset[:,8:9]])
    label_array=[success_rate,collision_rate,average_collision_amount,
                 timeout_rate,timeout_collision_rate,average_path_length,average_time_diff]
  return label_array

In [None]:
datasetbig=shuffle(datasetbig,random_state=0)

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
rlca = datasetbig[datasetbig["rlca"] == 1]
crowdnav = datasetbig[datasetbig["crowdnav"] == 1]
dwa = datasetbig[datasetbig["dwa"] == 1]

indoor = datasetbig[datasetbig["indoor_map_type"] == 1]
outdoor = datasetbig[datasetbig["outdoor_map_type"] == 1]

In [None]:
# choose dataset to work with
dataset = datasetbig

In [None]:
dataset = dataset.drop(columns=["teb"])

In [None]:
means = getMeans(bol_extra_dataset,dataset)

In [None]:
dataset = checkForConstants(dataset)

In [None]:
nump=dataset["robot_max_speed"].to_numpy()

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state = 0)
test_dataset = dataset.drop(train_dataset.index)

temp = train_dataset.sample(frac=0.8, random_state = 0)
predict_dataset = train_dataset.drop(temp.index)
train_dataset = train_dataset.drop(predict_dataset.index)

In [None]:
train_dataset_array = np.array(train_dataset)
test_dataset_array = np.array(test_dataset)
predict_dataset_array = np.array(predict_dataset)

output_train_array=get_numpy_labels(bol_extra_dataset,train_dataset_array)
output_test_array=get_numpy_labels(bol_extra_dataset,test_dataset_array)
output_predict_array=get_numpy_labels(bol_extra_dataset,predict_dataset_array)

In [None]:
train_labels = popAndGetPredictionLabels(bol_extra_dataset,train_dataset)
test_labels = popAndGetPredictionLabels(bol_extra_dataset,test_dataset)
predict_labels = popAndGetPredictionLabels(bol_extra_dataset,predict_dataset)

Scaling data via standard scaler

In [None]:
normed_train_data_std = preprocessing.StandardScaler().fit_transform(train_dataset)
normed_test_data_std = preprocessing.StandardScaler().fit_transform(test_dataset)
normed_predict_data_std = preprocessing.StandardScaler().fit_transform(predict_dataset)

Scaling via formular

In [None]:
normed_train_data_form=norm(train_dataset)
normed_test_data_form=norm(test_dataset)
normed_predict_data_form=norm(predict_dataset)

# DNN

In [None]:
formular = True
standard_scaler = False

if formular == True and standard_scaler == False:
  normed_train_data = normed_train_data_form
  normed_test_data = normed_test_data_form
  normed_predict_data = normed_predict_data_form

if formular == False and standard_scaler == True:
  normed_train_data = normed_train_data_std
  normed_test_data = normed_test_data_std
  normed_predict_data = normed_predict_data_std


In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(units=24, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
        layers.Dense(units=16, activation=tf.nn.relu),
        layers.Dense(units=1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(
        loss="mae",
        optimizer=optimizer,
        metrics=["mae"]
             )
    return model


In [None]:
from tensorflow.keras import layers
model = build_model()

In [None]:
performanceOverview = pd.DataFrame()
performanceOverview

In [None]:
patienceNumbers = [20,30,40,80]

for patienceNumber in patienceNumbers:
  for label_idx in range(len(train_labels)):
    print(train_labels[label_idx][0])
    from tensorflow.keras import layers
    model = build_model()
    EPOCHS = 1000

    early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=patienceNumber)

    history = model.fit(
        normed_train_data,
        train_labels[label_idx][1],
        epochs=EPOCHS,
        validation_split = 0.2,
        verbose=0,
        batch_size = 64,
        callbacks=[early_stop, PrintDot()])


    hist = pd.DataFrame(history.history)
    hist["epoch"] = history.epoch
    hist.tail()

    histogram_data = pd.DataFrame(data=hist.tail(1))
    histogram_data=histogram_data.reset_index()
    histogram_data.astype('int64')
    histogram_data.round(4)

    layers = len(model.layers)
    params = model.count_params()

    evaluation_loss ="no loss"
    evaluation_mae ="no mae"
    evaluation_mse ="no mse"

    evaluation_loss,evaluation_mse = model.evaluate(normed_test_data, test_labels[label_idx][1],verbose = 0)
    prediction = model.predict(normed_predict_data)

    mean_series = pd.Series(means[label_idx][1])
    s = pd.Series(means[label_idx][1])
    for idx in range(len(predict_labels[label_idx][1])-1):
      mean_series = pd.concat([mean_series,s])

    prediction_R2  = r2_score(output_predict_array[label_idx],prediction)
    prediction_MSE = mse(output_predict_array[label_idx],prediction)
    prediction_MAE = mean_absolute_error(output_predict_array[label_idx],prediction)
    MAE_mean = mean_absolute_error(mean_series,prediction)

    n = len(normed_train_data)
    x = len(normed_train_data.columns)
    Adjusted_R2=1-((1-prediction_R2)*(n-1))/(n-x-1)
    SSE = np.sum((output_predict_array[label_idx] - prediction) ** 2)
    RMSE=prediction_MSE.mean()**0.5
    MAPE = mean_absolute_percentage_error(output_predict_array[label_idx],prediction)

    model_data = pd.DataFrame()
    model_data = {
          "Datapoints":[len(train_dataset.index)],
          "Label":[train_labels[label_idx][0]],
          "Layers":[layers],
          "Params":[params],
          "Patience":[patienceNumber],
          "base_mean":[means[label_idx][1]],
          "evaluation_Loss":[evaluation_loss],
          "evaluation_mae":[evaluation_mae],
          "evaluation_mse":[evaluation_mse],
          "Diff_base_MAE": [means[label_idx][1]-prediction_MAE],
          "prediction_R2"   :[prediction_R2],
          "prediction_adj.R2":[Adjusted_R2],
          "prediction_MAE"  :[prediction_MAE],
          "prediciton_MSE":[prediction_MSE.mean()],
          "SSE":[SSE],
          "RMSE":[RMSE],
          "MAPE":[MAPE],
          "MAE_mean":[MAE_mean]
      }

    model_data_df = pd.DataFrame(data=model_data)

    result = pd.concat([model_data_df, histogram_data],axis=1)
    performanceOverview = pd.concat([performanceOverview,result])

dt = datetime.now()
performanceOverview.to_csv("/content/sample_data/performanceOverview "+dt.isoformat()+".csv")


In [None]:
result=get_best_of(performanceOverview,"average_time_diff","prediction_MAE")
result[["base_mean","prediction_R2","prediction_adj.R2","prediction_MAE","prediciton_MSE"]]