In [1]:
from IPython.display import clear_output
! pip install tensorflow==2.4.0
!apt-get update && apt-get -qq install xxd
clear_output()

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np
import math as m
from matplotlib import pyplot

import copy

from sklearn.preprocessing import StandardScaler

import os
import io
import time

In [None]:
!rm -f -r ./data
!rm -f -r ./models
!rm -f -r ./__MACOSX
!rm -f ./constants.cc
!rm -f ./data.zip

In [3]:
#file paths

training_data_path = "./training_data"
unseen_data_path = "./unseen_data"
output_path = "./constants.cc"

base_path = "./"

MODELS_DIR = base_path+'models/'
if not os.path.exists(MODELS_DIR):
    os.mkdir(MODELS_DIR)
MODEL_TF = MODELS_DIR + 'model'
MODEL_NO_QUANT_TFLITE = MODELS_DIR + 'model_no_quant.tflite'
MODEL_TFLITE = MODELS_DIR + 'model.tflite'
MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'

In [4]:
!rm -f -r ./__MACOSX
!rm -f -r ./training_data
!unzip training_data.zip
clear_output()

In [5]:
!rm -f -r ./__MACOSX
!rm -f -r ./unseen_data

!unzip unseen_data.zip
clear_output()

In [6]:
#get data to pd dataframes

def find_csv_filenames(path_to_dir, suffix=".CSV"):
    filenames = []
    for sub in os.walk(path_to_dir):
        if sub[0] != path_to_dir:
          filenames += [sub[0] + "/" +
          filename for filename in os.listdir(sub[0]) if filename.endswith(suffix)]
    filenames.sort()
    return filenames


def to_dataframes (data_files):
  count = 0

  data_frames = []

  for data_file in data_files:
    
        df = pd.read_csv(data_file)
        data_frames.append(df)
        count += 1
        progress = round(count/len(data_files), ndigits=2)
        if count % 100 == 0 : 
          clear_output()
          print("progress: "+str(progress))

  return data_frames

#most common services in selected environments
services = [	"0af0", "1802", "180f", "1812", "1826", "2222", "ec88", "fd5a",
    "fd6f", "fdd2", "fddf", "fe03", "fe07", "fe0f", "fe61", "fe9f",
    "fea0", "feb9", "febe", "fee0", "ff0d", "ffc0", "ffe0"]

def process_files(data_frames, without_services = False, only_labels = None, remove_columns = [" services", " manufacturer_data_lengths"]):

  input = []
  output = []

  count = 0
  data_columns = []

  for df in data_frames:

      count+=1
  
      label = df.iloc[0]["label"]

      if not only_labels or label in only_labels:
        if without_services:
          for serv in services:
            del df[" "+serv]

        del df["label"]

        #features used for testing that are still in some data samples
        for column in remove_columns:
          if column in df.columns:
            del df[column]


        if count == 1:
          data_columns = [c.strip() for c in df.columns.values.tolist()]

        arr = df.to_numpy().flatten().tolist()

        input.append(arr)
        output.append(label)

  labels_dict = {}

  for l in output:
      if l not in labels_dict:
          labels_dict[l] = 0
      labels_dict[l] += 1


  print(labels_dict)

  labels = list(labels_dict.keys())

  return input, output, labels, data_columns




In [7]:
#normalization

def get_normalization_params(X, rows = 5):
  mean_list = []
  std_list = []

  parameters = len(X[0])//rows

  for j in range(0, parameters):
      mean_sum = 0
      for i in range(0, len(X)):
          for k in range (0, rows):
            mean_sum += X[i][k*parameters+j]
      mean = mean_sum/(len(X)*rows)
      std_sum = 0
      for i in range(0, len(X)):
          for k in range (0, rows):
            std_sum += m.pow(X[i][k*parameters+j] - mean, 2)
      std = m.sqrt(std_sum/(len(X)*rows-1))

      mean_list.append(mean)
      std_list.append(std)

  return mean_list, std_list


def normalize_data(data_x, data_y, means, stds, labels, rows = 5):
  data_normalized = copy.deepcopy(data_x)
  labels_num = []
  parameters = len(data_x[0])//rows

  for i in range(0, parameters):
    mean = means[i]
    std = stds[i]

    if std != 0:
      for j in range(0, len(data_x)):
          for k in range(0, rows):
              data_normalized[j][k*parameters+i] = (data_normalized[j][k*parameters+i]-mean)/std
  
  for label in data_y:
      labels_num.append(labels.index(label))
  return data_normalized, labels_num


#generate for contants.cc 
def get_constants_strings(mean_list, std_list, labels):
  mean_str = "const float mean_list[] = {"

  for i in range(0, len(mean_list)):
    if i != 0:
      mean_str += ", "
    mean_str += str(mean_list[i])
    

  mean_str += "};"
  print(mean_str)

  std_str = "const float std_list[] = {"

  for i in range(0, len(std_list)):
    if i != 0:
      std_str += ", "
    std_str += str(std_list[i])
    

  std_str += "};"

  print(std_str)


  labels_str = "const char available_env[][50] = {"

  for i in range(0, len(labels)):
    if i != 0:
      labels_str += ", "
    labels_str += "\""+labels[i]+"\""
    

  labels_str += "};"

  return mean_str, std_str, labels_str



In [8]:
#build model with specified architecture and train with given hyperparams


def build_model (architecture, train_x, labels, opt = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)):
 
  model_compile = tf.keras.models.Sequential( 
      [
        tf.keras.layers.Flatten(input_shape=(len(train_x[0]),)),
        architecture,
        tf.keras.layers.Dense(len(labels), activation='softmax')
      ]
  )
  model_compile.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=tf.keras.metrics.SparseCategoricalAccuracy())

  return model_compile

def train_model(model_train, train_x, train_y, test_x, test_y, batch_size = 25, epochs = 25):
  history = model_train.fit(train_x, train_y, epochs=epochs, validation_data=(test_x, test_y), verbose=0, batch_size=batch_size)

  _, test_acc = model_train.evaluate(test_x, test_y, verbose=0)
  return history, test_acc

def plot_results(history):
  # plot loss during training
  pyplot.subplot(211)
  pyplot.title('Loss')
  pyplot.plot(history.history['loss'], label='train')
  pyplot.plot(history.history['val_loss'], label='test')
  pyplot.legend()
  # plot accuracy during training
  pyplot.subplot(212)
  pyplot.title('Accuracy')
  pyplot.plot(history.history['accuracy'], label='train')
  pyplot.plot(history.history['val_accuracy'], label='test')
  pyplot.legend()
  pyplot.show()

def build_layers (option, activation = "relu"):
    model_layers = tf.keras.models.Sequential()
    for o in option:
      if isinstance(o, int) and o != 0:
        model_layers.add(tf.keras.layers.Dense(o, activation=activation))
      elif o != 0:
        model_layers.add(tf.keras.layers.Dropout(o))
    return model_layers





In [9]:
# Convert TensorFlow model to TensorFlow Lite with quantization that is feasible to execute on embedded device 
    
# Convert the model to the TensorFlow Lite format without quantization
def convert_model(model_to_convert, train_x):
  converter = tf.lite.TFLiteConverter.from_keras_model(model_to_convert)
  model_no_quant_tflite = converter.convert()

 
  # Convert the model to the TensorFlow Lite format with quantization

  # Set the optimization flag.
  converter.optimizations = [tf.lite.Optimize.DEFAULT]
  # Enforce integer only quantization
  converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
  converter.inference_input_type = tf.float32
  converter.inference_output_type = tf.float32
  # Provide a representative dataset to ensure we quantize correctly.

  def representative_dataset():
    for i in range(len(train_x)):
     yield([np.float32(train_x[i]).reshape(1, len(train_x[0]))])

  converter.representative_dataset = representative_dataset
  model_tflite = converter.convert()

  return model_no_quant_tflite, model_tflite



In [10]:
#predict and evaluate model

def evaluate_preds(predictions, test_x, test_y, labels, data_columns, verbose = True):
  wrong_predictions = {label:0 for label in labels}
  df_columns = np.array([[c+"_"+str(j) for c in data_columns] for j in range(0,5)]).flatten()
  evaluation = {label:{label_: pd.DataFrame(index=df_columns, dtype="int").T for label_ in labels} for label in labels}

  for i in range(0, len(predictions)):
    pred = predictions[i].flatten().tolist()
    pred_label = ""
    pred_index = pred.index( max(pred))
    if len(labels) > pred_index:
       pred_label = labels[pred_index] 
    true_label = labels[test_y[i]]

    # which environments get wrongly classified to which environments
    evaluation[true_label][pred_label]= evaluation[true_label][pred_label].append(pd.DataFrame([int((test_x[i][k] * std_list[k%len(data_columns)]) + mean_list[k%len(data_columns)]) for k in range(0, len(test_x[i]))], index=df_columns, dtype="int").T, ignore_index = True)
    if pred_label != true_label:
        wrong_predictions[true_label] += 1

  w_pred_str = ""
  for l in wrong_predictions:
    w_pred_str += l + ": "+ str(wrong_predictions[l])+"/"+str(len([label_y for label_y in test_y if label_y == labels.index(l)]))+", "

  accuracy = 1- sum(wrong_predictions.values())/len(test_y)

  if verbose: 
    print("predicted wrong: "+w_pred_str)
    print("predicted wrong overall: "+str(sum(wrong_predictions.values()))+"/"+str(len(test_y))+" (acc: "+str(round(accuracy*100,2))+"%)")
  
  return evaluation, accuracy



def predict_tflite(tflite_model, test_x):
  # Prepare the test data
  x_test_ = test_x.copy()
  x_test_ = x_test_.astype(np.float32)

 
  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_content=tflite_model)
  interpreter.allocate_tensors()

  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]

  # Invoke the interpreter
  y_pred = []
  for i in range((len(x_test_))):
    interpreter.set_tensor(input_details["index"], [x_test_[i]])
    interpreter.invoke()
    y_pred.append(interpreter.get_tensor(output_details["index"]))
    
  

  return y_pred



In [11]:
#export model to constants.cc 

def export_model(mean_str, std_str, labels_str, verbose = True):
  # Convert to a C source file, i.e, a TensorFlow Lite for Microcontrollers model
  !xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}
  REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')
  !sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}

  if verbose:
    clear_output()

  !rm -f ./constants.cc

  model_str = "alignas(16) const unsigned char g_modelurd[] = "
  with open(MODEL_TFLITE_MICRO, 'r') as file:
      data = file.read();
      model_str += data[data.index("{"): len(data)].replace("unsigned", "const")

  output_str = ""
  output_str += "#include \"constants.h\"\n"
  output_str += mean_str +"\n"
  output_str += std_str + "\n"
  output_str += labels_str + "\n"
  output_str += "const int available_env_len = "+str(len(labels)) +";\n"
  output_str += model_str

  with open(output_path, "w") as file:
    file.write(output_str)



In [12]:
#get data sets
train_data_frames = to_dataframes(find_csv_filenames(training_data_path))
test_data_frames = to_dataframes(find_csv_filenames(unseen_data_path))

print(len(train_data_frames))
print(len(test_data_frames))

progress: 0.98
12098
3555


In [13]:
#preperation main data set

data_x, data_y, labels, parameters = process_files(copy.deepcopy(train_data_frames))

mean_list, std_list = get_normalization_params(X= data_x)
mean_str, std_str,labels_str = get_constants_strings(mean_list= mean_list, std_list= std_list, labels=labels)
X, y_num = normalize_data(data_x=data_x, data_y=data_y, means=mean_list, stds=std_list, labels = labels)

print(parameters)


{'street': 1189, 'park': 877, 'apartment': 1550, 'supermarket': 1495, 'clothing_store': 1315, 'train': 802, 'bus': 1123, 'gym': 350, 'car': 598, 'house': 1150, 'nature': 445, 'restaurant': 500, 'cinema': 200, 'concert': 104, 'plane': 200, 'bar': 200}
const float mean_list[] = {31.44268474127955, 4.7901636634154405, 4.803636964787568, 2.092759133741114, 9.539709042816995, 90.03927921970573, 12.839180029756985, 6.282740948917176, 50.077186311787074, 147.01858158373284, 91.59257728550173, 14.267416101835014, 10.387716978012895, 2.510233096379567, 42.05886923458423, -72.64306496941643, -86.04589188295586, -46.72296247313606, -88.2988262522731, -41.97154901636634, 7.626070424863614, 9475.404380889404, 63560.128616300215, 0.05733179037857497, -0.5648867581418416, 93268.30338898991, -0.5216234088279055, 93272.18146801124, -0.5667548355100016, 93276.25233922963, -0.5651347330137213, 93287.50385187635, -0.5569846255579435, 93283.89424698298, -0.49244503223673336, 93287.953529509, -0.51765581087

In [14]:
#preperation unseen data set

test_data_x, test_data_y, _, _ = process_files(copy.deepcopy(test_data_frames), only_labels = labels)

test_X, test_y_num = normalize_data(data_x=test_data_x, data_y=test_data_y, means=mean_list, stds=std_list, labels= labels)


{'supermarket': 710, 'bus': 507, 'street': 611, 'park': 300, 'clothing_store': 426, 'house': 101, 'train': 100, 'car': 250, 'restaurant': 250, 'nature': 200, 'apartment': 100}


In [16]:
#generate configurations to evaluate 

diff_layer_count = [2,3,4]
diff_layers = [25, 50, 100, 250, 300]
diff_dropouts = [0.1]#[0, 0.05, 0.1, 0.2]
diff_test_sizes = [0.1,0.2, 0.25, 0.3, 0.4,0.5]
diff_epochs = [25, 50, 75, 100]
diff_batch_sizes = [10, 25, 50, 100]
times = 3


def build_options (factor, start_list, result):
  if factor == 0:
    return result
  return build_options(factor-1, start_list, [option + [item] for item in start_list for option in result])


architectures_multiple = [item for sublist in [build_options(count-1, diff_layers, [[l, d] for l in diff_layers for d in diff_dropouts]) for count in diff_layer_count] for item in sublist]

option = [250, 0, 250]
architectures_single = [[option[0], d, option[2]] for d in diff_dropouts]

architectures = architectures_multiple

print(len(architectures))
print(architectures)



775
[[25, 0.1, 25], [50, 0.1, 25], [100, 0.1, 25], [250, 0.1, 25], [300, 0.1, 25], [25, 0.1, 50], [50, 0.1, 50], [100, 0.1, 50], [250, 0.1, 50], [300, 0.1, 50], [25, 0.1, 100], [50, 0.1, 100], [100, 0.1, 100], [250, 0.1, 100], [300, 0.1, 100], [25, 0.1, 250], [50, 0.1, 250], [100, 0.1, 250], [250, 0.1, 250], [300, 0.1, 250], [25, 0.1, 300], [50, 0.1, 300], [100, 0.1, 300], [250, 0.1, 300], [300, 0.1, 300], [25, 0.1, 25, 25], [50, 0.1, 25, 25], [100, 0.1, 25, 25], [250, 0.1, 25, 25], [300, 0.1, 25, 25], [25, 0.1, 50, 25], [50, 0.1, 50, 25], [100, 0.1, 50, 25], [250, 0.1, 50, 25], [300, 0.1, 50, 25], [25, 0.1, 100, 25], [50, 0.1, 100, 25], [100, 0.1, 100, 25], [250, 0.1, 100, 25], [300, 0.1, 100, 25], [25, 0.1, 250, 25], [50, 0.1, 250, 25], [100, 0.1, 250, 25], [250, 0.1, 250, 25], [300, 0.1, 250, 25], [25, 0.1, 300, 25], [50, 0.1, 300, 25], [100, 0.1, 300, 25], [250, 0.1, 300, 25], [300, 0.1, 300, 25], [25, 0.1, 25, 50], [50, 0.1, 25, 50], [100, 0.1, 25, 50], [250, 0.1, 25, 50], [300, 0

In [18]:
#gridsearch over defined configurations
#evaluating a configuration on test data (unseen but not fully independent from training data) and completely independent data

def evaluate_option (option, train_x, test_x, train_y, test_y, test_data_X, test_data_y_numeric, times, verbose = False, opt = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)):
    accs = []
    models = []
    for i in range(0, times):
      
      current_model = build_model(architecture= build_layers(option[0]), train_x = train_x, labels= labels, opt = opt)
      history, acc_normal = train_model(current_model, train_x = train_x, train_y = train_y, test_x = test_x, test_y = test_y, epochs=option[1], batch_size=option[2])
      if verbose:
        plot_results(history)
      _, model_quant = convert_model(model_to_convert=current_model, train_x=train_x)
      models.append(model_quant)
      eval_quant, acc_quant = evaluate_preds(predictions = predict_tflite(model_quant, np.array(copy.deepcopy(test_x))), test_x = test_x, test_y = test_y, labels=labels, data_columns=parameters)
      test_eval_quant, test_acc_quant = evaluate_preds(predictions = predict_tflite(model_quant, np.array(copy.deepcopy(test_data_X))), test_x = test_data_X, test_y = test_data_y_numeric, labels=labels, data_columns = parameters)
      accs.append([(acc_quant + test_acc_quant)/2, acc_quant, test_acc_quant])
    average_accs = [acc[0] for acc in accs]
    test_accs = [acc[1] for acc in accs]
    unseen_accs = [acc[2] for acc in accs]
    return [sum(average_accs)/times, sum(test_accs)/times, sum(unseen_accs)/times] , models[average_accs.index(max(average_accs))], accs, models


def grid_search(architecture_options, X, y_numeric,  test_data_X, test_data_y_numeric, test_set_options = [0.25], epochs_options = [25], batch_size_options = [25], times = 1):
  results = []
  options = [[arch, epochs, batch_size] for arch in architecture_options  for epochs in epochs_options for batch_size in batch_size_options]
  best_option = []
  best_accuracy = [0,0,0]
  best_unseen_option = []
  best_unseen_model = None
  best_unseen_accuracy = [0,0,0]

  for t in range(0, len(test_set_options)):
    test_set_size = test_set_options[t]
    train_x, test_x, train_y, test_y = train_test_split(
            X, y_numeric, test_size=test_set_size)
  
    for i in range(0, len(options)):
      clear_output()

      if os.path.isfile(output_path):
        with open(output_path, mode="r") as file:
          print(file.read())
      print("best option: "+ str(best_option)+ ", accuracy: "+str(round(best_accuracy[0], 4))+ " (test: "+str(round(best_accuracy[1], 4))+"; unseen: "+str(round(best_accuracy[2], 4))+")")
      print("best unseen option: "+ str(best_unseen_option)+ ", accuracy: "+str(round(best_unseen_accuracy[0], 4))+ " (test: "+str(round(best_unseen_accuracy[1], 4))+"; unseen: "+str(round(best_unseen_accuracy[2], 4))+")\n")
      print(results)
      
      print("\n\noption: "+str(options[i])+", test_size: "+str(test_set_size)+ " ("+str(t*len(options)+i)+"/"+str(len(options)*len(test_set_options))+ ")\n\n")
      
      try:
        
        acc, model_quant, _, _ = evaluate_option(options[i],train_x, test_x, train_y, test_y, test_data_X, test_data_y_numeric, times)
        chosen_option = [options[i], test_set_size]
        results.append([chosen_option, acc])

        if acc[0] > best_accuracy[0]:
          open(MODEL_TFLITE, "wb").write(model_quant)
          export_model(mean_str, std_str, labels_str)

          best_accuracy = acc
          best_option = chosen_option

        if acc[2] > best_unseen_accuracy[2]:
          best_unseen_model = model_quant
          best_unseen_accuracy = acc
          best_unseen_option = chosen_option
      except RuntimeError:
        print("runtime error")
  return best_unseen_model, results
    


In [None]:

model,results = grid_search(architectures, X, y_num, test_X, test_y_num, epochs_options = [25], batch_size_options = [20], times = 1, test_set_options = [0.2])
print(results)

In [21]:
#build single model


selected_option = [[250, 0.1, 250], 50, 50]
test_size = 0.2
ntimes = 5

train_x, test_x, train_y, test_y = train_test_split(
          X, y_num, test_size=test_size)
acc, model_quant, accs, models = evaluate_option(selected_option,train_x, test_x, train_y, test_y, test_X, test_y_num, ntimes, verbose=False)

print("option: "+ str(selected_option)+ ", accuracy: "+str(round(acc[0], 4))+ " (test: "+str(round(acc[1], 4))+"; unseen: "+str(round(acc[2], 4))+")")
print("all models: \n"+str(accs))

open(MODEL_TFLITE, "wb").write(model_quant)
export_model(mean_str=mean_str, std_str=std_str, labels_str=labels_str, verbose = False)


INFO:tensorflow:Assets written to: /tmp/tmpo0p_3mjt/assets


INFO:tensorflow:Assets written to: /tmp/tmpo0p_3mjt/assets


INFO:tensorflow:Assets written to: /tmp/tmprqsnj1cw/assets


INFO:tensorflow:Assets written to: /tmp/tmprqsnj1cw/assets


predicted wrong: street: 23/211, park: 35/185, apartment: 5/325, supermarket: 32/298, clothing_store: 21/259, train: 5/156, bus: 16/250, gym: 0/72, car: 21/107, house: 1/234, nature: 7/82, restaurant: 2/105, cinema: 0/34, concert: 0/18, plane: 0/36, bar: 1/48, 
predicted wrong overall: 169/2420 (acc: 93.02%)
predicted wrong: street: 68/611, park: 116/300, apartment: 13/100, supermarket: 189/710, clothing_store: 237/426, train: 0/100, bus: 48/507, gym: 0/0, car: 61/250, house: 26/101, nature: 73/200, restaurant: 6/250, cinema: 0/0, concert: 0/0, plane: 0/0, bar: 0/0, 
predicted wrong overall: 837/3555 (acc: 76.46%)
INFO:tensorflow:Assets written to: /tmp/tmp0og86cpi/assets


INFO:tensorflow:Assets written to: /tmp/tmp0og86cpi/assets


INFO:tensorflow:Assets written to: /tmp/tmp35t5_t33/assets


INFO:tensorflow:Assets written to: /tmp/tmp35t5_t33/assets


predicted wrong: street: 16/211, park: 28/185, apartment: 4/325, supermarket: 38/298, clothing_store: 22/259, train: 5/156, bus: 25/250, gym: 2/72, car: 17/107, house: 1/234, nature: 6/82, restaurant: 1/105, cinema: 0/34, concert: 0/18, plane: 0/36, bar: 4/48, 
predicted wrong overall: 169/2420 (acc: 93.02%)
predicted wrong: street: 66/611, park: 100/300, apartment: 12/100, supermarket: 201/710, clothing_store: 244/426, train: 0/100, bus: 58/507, gym: 0/0, car: 67/250, house: 45/101, nature: 37/200, restaurant: 9/250, cinema: 0/0, concert: 0/0, plane: 0/0, bar: 0/0, 
predicted wrong overall: 839/3555 (acc: 76.4%)
INFO:tensorflow:Assets written to: /tmp/tmpsg60abjy/assets


INFO:tensorflow:Assets written to: /tmp/tmpsg60abjy/assets


INFO:tensorflow:Assets written to: /tmp/tmpcwkvr4_a/assets


INFO:tensorflow:Assets written to: /tmp/tmpcwkvr4_a/assets


predicted wrong: street: 22/211, park: 41/185, apartment: 6/325, supermarket: 27/298, clothing_store: 29/259, train: 6/156, bus: 21/250, gym: 0/72, car: 16/107, house: 1/234, nature: 5/82, restaurant: 0/105, cinema: 0/34, concert: 0/18, plane: 0/36, bar: 1/48, 
predicted wrong overall: 175/2420 (acc: 92.77%)
predicted wrong: street: 72/611, park: 121/300, apartment: 14/100, supermarket: 161/710, clothing_store: 242/426, train: 0/100, bus: 89/507, gym: 0/0, car: 53/250, house: 42/101, nature: 60/200, restaurant: 2/250, cinema: 0/0, concert: 0/0, plane: 0/0, bar: 0/0, 
predicted wrong overall: 856/3555 (acc: 75.92%)
INFO:tensorflow:Assets written to: /tmp/tmpimb6n4mn/assets


INFO:tensorflow:Assets written to: /tmp/tmpimb6n4mn/assets


INFO:tensorflow:Assets written to: /tmp/tmpj1p7m_9q/assets


INFO:tensorflow:Assets written to: /tmp/tmpj1p7m_9q/assets


predicted wrong: street: 16/211, park: 38/185, apartment: 5/325, supermarket: 34/298, clothing_store: 28/259, train: 6/156, bus: 25/250, gym: 1/72, car: 21/107, house: 1/234, nature: 6/82, restaurant: 9/105, cinema: 0/34, concert: 0/18, plane: 0/36, bar: 1/48, 
predicted wrong overall: 191/2420 (acc: 92.11%)
predicted wrong: street: 57/611, park: 104/300, apartment: 29/100, supermarket: 189/710, clothing_store: 246/426, train: 0/100, bus: 75/507, gym: 0/0, car: 67/250, house: 26/101, nature: 73/200, restaurant: 25/250, cinema: 0/0, concert: 0/0, plane: 0/0, bar: 0/0, 
predicted wrong overall: 891/3555 (acc: 74.94%)
INFO:tensorflow:Assets written to: /tmp/tmpw32ne3cp/assets


INFO:tensorflow:Assets written to: /tmp/tmpw32ne3cp/assets


INFO:tensorflow:Assets written to: /tmp/tmpkqy3czfo/assets


INFO:tensorflow:Assets written to: /tmp/tmpkqy3czfo/assets


predicted wrong: street: 12/211, park: 39/185, apartment: 6/325, supermarket: 34/298, clothing_store: 21/259, train: 4/156, bus: 39/250, gym: 1/72, car: 19/107, house: 1/234, nature: 6/82, restaurant: 0/105, cinema: 0/34, concert: 0/18, plane: 0/36, bar: 3/48, 
predicted wrong overall: 185/2420 (acc: 92.36%)
predicted wrong: street: 66/611, park: 133/300, apartment: 20/100, supermarket: 179/710, clothing_store: 240/426, train: 0/100, bus: 88/507, gym: 0/0, car: 54/250, house: 53/101, nature: 77/200, restaurant: 12/250, cinema: 0/0, concert: 0/0, plane: 0/0, bar: 0/0, 
predicted wrong overall: 922/3555 (acc: 74.06%)
option: [[250, 0.1, 250], 50, 50], accuracy: 0.841 (test: 0.9265; unseen: 0.7556)
all models: 
[[0.8473611256407574, 0.9301652892561983, 0.7645569620253164], [0.8470798316885774, 0.9301652892561983, 0.7639943741209564], [0.8434491636735595, 0.9276859504132231, 0.7592123769338959], [0.8352207343864421, 0.9210743801652892, 0.7493670886075949], [0.8321003475491393, 0.9235537190

In [22]:
#classification with sklearn

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(250,250), random_state=1, max_iter=300)
train_x, test_x, train_y, test_y = train_test_split(
        X, y_num, test_size=0.1)

clf = clf.fit(train_x, train_y)
predict = clf.predict_proba(test_x)
eval, acc = evaluate_preds(predict, test_x, test_y, labels, parameters)


predict = clf.predict_proba(test_X)
eval, acc = evaluate_preds(predict, test_X, test_y_num, labels, parameters)

predicted wrong: street: 7/131, park: 19/97, apartment: 0/132, supermarket: 10/152, clothing_store: 9/141, train: 1/81, bus: 7/114, gym: 1/40, car: 2/54, house: 0/107, nature: 1/37, restaurant: 1/59, cinema: 0/13, concert: 0/13, plane: 0/18, bar: 0/21, 
predicted wrong overall: 58/1210 (acc: 95.21%)
predicted wrong: street: 74/611, park: 116/300, apartment: 20/100, supermarket: 190/710, clothing_store: 230/426, train: 0/100, bus: 66/507, gym: 0/0, car: 49/250, house: 34/101, nature: 59/200, restaurant: 6/250, cinema: 0/0, concert: 0/0, plane: 0/0, bar: 0/0, 
predicted wrong overall: 844/3555 (acc: 76.26%)
