# Libaries to Install
- Built in Anaconda Environment
- Installed Keras and Tensorflow on top
- Using Tensorflow as the backend. (Set this by using the file explorer to navigate to "%USERPROFILE%/.keras/keras.json" and changing "backend" to "tensorflow")

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

from keras.models import load_model

import seaborn as sns
from sklearn.externals import joblib

# Load Files for Prediction
Kindly put test data into the "TEST DATA HERE" folder

In [23]:
#replace with the locaiton of the features
filename = "./TEST DATA HERE/features/"+"part-00000-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv"
data_features = pd.read_csv(filename,header=0)

In [24]:
#replace with the locaiton of the features
filename = "./TEST DATA HERE/labels/"+"part-00000-e9445087-aa0a-433b-a7f6-7f4c19d78ad6-c000.csv"
data_labels = pd.read_csv(filename,header=0)

# Process Data

In [25]:
def clean_features(df):
    #rename to more uniform convention
    df.columns = ['bookingID', 'accuracy', 'bearing', 
                    'acceleration_x', 'acceleration_y', 'acceleration_z',
                    'gyro_x', 'gyro_y', 'gyro_z',
                    'second','speed']
    #remove NAs
    df = df.dropna()
    df = df.drop_duplicates()
    df = df.loc[(df["accuracy"]<20) &
                (df["second"]<1e6) &
                (df["speed"]>0) &
                (df["speed"]<60)]
    return df

def clean_labels(df):
    df = df.dropna()
    df = df.drop_duplicates("bookingID", keep="last") #some booking IDs have multiple labels. set them to be dangerous
    return df

def process_data(features, labels):
    temp_features = clean_features(features)
    temp_labels = clean_labels(labels)
    data_combined = data = pd.merge(temp_features, temp_labels, how="inner",on="bookingID")
    return data_combined

data_combined = process_data(data_features, data_labels)

# Load Models

In [26]:
scaler = joblib.load("standard_scaler.pkl")

In [27]:
keras_model = load_model("train_model_all_data.h5")

# Make Predictions

In [29]:
X = data_combined[["bookingID",
      "accuracy",
      "bearing",
      "acceleration_x","acceleration_y","acceleration_z",
      "gyro_x","gyro_y","gyro_z",
      "second",
      "speed"]]
y = data_combined[["label"]]

In [30]:
X_standardised = scaler.transform(X)
y_pred = keras_model.predict_classes(X_standardised)

  """Entry point for launching an IPython kernel.


In [31]:
keras_model.evaluate(X_standardised, y)



[0.5828630390650341, 0.7218876452302406, 0.19385076910205712]

In [33]:
print(confusion_matrix(y, y_pred))

[[876210  21162]
 [334376  46649]]


In [35]:
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.72      0.98      0.83    897372
           1       0.69      0.12      0.21    381025

   micro avg       0.72      0.72      0.72   1278397
   macro avg       0.71      0.55      0.52   1278397
weighted avg       0.71      0.72      0.65   1278397

