# Train ML models that can classify HubAndSpoke and Routed vehicles

In [None]:
import pandas as pd
import numpy as np

In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
downloaded = drive.CreateFile({'id':"1sLSDafMwIhJxDipgCVgrs67UShNSBZM_"})
downloaded.GetContentFile('Features.json') 

### Step 1: Load feature dataset

In [None]:
df = pd.read_json('Features.json')

In [None]:
# How many vehicles are in the dataframe?
len(df)

In [None]:
df.head()

In [None]:
df.GeoLabel.value_counts()

In [None]:
def array1D(arr2D):
    arr = []
    for x in arr2D:
        for y in x:
            arr.append(y)
    return arr

In [None]:
df['Feature1D'] = df.Features.apply(lambda x: array1D(x))

### Step 2: Create train and Test set

In [None]:
from sklearn.model_selection import train_test_split
X = [np.array(x) for x in df.Feature1D.values]
y = [np.array(x) for x in df.GeoLabel.values]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.70)

In [None]:
# Number of examples in training set.
len(X_train)

In [None]:
# Number of examples in the test set
len(X_test)

### Step 3: Train models and show accuracy

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
#clf = SVC()
clf = RandomForestClassifier()

In [None]:
clf.fit(X_train, y_train)

In [None]:
pred = clf.predict(X_test)

In [None]:
# Show accuracy on the test set. 
from sklearn.metrics import  accuracy_score
accuracy_score(pred, y_test)

In [None]:
from sklearn.metrics import  confusion_matrix
confusion_matrix(pred, y_test)

### Step 4: Show where model is making right and wrong predictions

In [None]:
df.head()

In [None]:
df_test = pd.DataFrame()
df_test["Predicted"] = pred
df_test["GeoLabel"] = y_test
df_test['Wrong'] = df_test.Predicted != df_test.GeoLabel
df_test['Features'] = X_test

In [None]:
# Function to create matplot lib image
import matplotlib.pyplot as plt
img_dim_inches = 2
img_dpi = 100
def matplot_image(data, i, j):
    fig = plt.figure(figsize=[img_dim_inches, img_dim_inches], dpi=img_dpi)
    ax = fig.add_axes([0.2, 0.2, 0.8, 0.8])
    ax.set_xticks([0, 10,20])
    ax.set_yticks([0, 10,20])
    
    ax.tick_params(axis='x', colors='#24477b')
    ax.tick_params(axis='y', colors='#00aeef')
    
    ax.spines['bottom'].set_color('white')
    ax.spines['top'].set_color('white')
    ax.spines['left'].set_color('white')
    ax.spines['right'].set_color('white')
    
    _= ax.pcolor(data, cmap=plt.cm.Blues)
    ax.set_facecolor('white')
    plt.show()
    plt.close(fig)

In [None]:
# Print some predictions that are correct.
for i in range(5):
    print(df_test.GeoLabel.values[i])
    matplot_image(df_test[df_test.Wrong == False].Features.values[i].reshape(25, 25), 0, 0)

In [None]:
# Print some predictions that are wrong.
for i in range(5):
    print(df_test.GeoLabel.values[i])
    matplot_image(df_test[df_test.Wrong == True].Features.values[i].reshape(25, 25), 0, 0)

### Step 5: Train a simple NN model

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
!rm -rf Graph

In [None]:
tb = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

In [None]:
model = tf.keras.models.Sequential([
    keras.layers.Flatten(input_shape=(25, 25)),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(8, activation=tf.nn.relu),
    keras.layers.Dense(2, activation=tf.nn.softmax)    
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
X_2D = np.array([np.array(x).reshape(25,25) for x in X_train])
y_2D = np.array([1 if x =='Routed' else 0 for x in y_train])

In [None]:
model.fit(X_2D, y_2D, epochs=40, callbacks=[tb])

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir  './Graph'