In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

# Appendix - More variables introduced

In [2]:
# Data reading and processing
import re
import time
import pandas as pd
import numpy as np
import warnings
from datetime import datetime

import os
from os import path

# Visualisation
from matplotlib import pyplot as plt
from pandas_ml import ConfusionMatrix

# Modelling
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBClassifier
from sklearn.multioutput import MultiOutputRegressor

from sklearn.metrics import accuracy_score, r2_score

# Suppress warnings
warnings.filterwarnings("ignore")

In [3]:
# Setting current directory as same directory where code is stored
try:
    CUR_DIR = dirname(realpath(__file__))
except NameError:
    CUR_DIR = os.getcwd()

In [4]:
## Calculating distance        
def pythagoras(long1, long2, lat1, lat2):
    import math
    a = abs(long1-long2)**2
    b = abs(lat1-lat2)**2
    return math.sqrt(a+b)

## Data filtering and processing

In [5]:
# Import data files
fp = os.path.join(CUR_DIR,"input")

train = pd.DataFrame.from_csv(path.join(fp, 'trainingData.csv'),index_col=None)
test = pd.DataFrame.from_csv(path.join(fp, 'validationData.csv'),index_col=None)
print('Training dataset (length, width) = {}'.format(str(train.shape)))
print('Validation dataset (length, width) = {}'.format(str(test.shape)))

Training dataset (length, width) = (19937, 529)
Validation dataset (length, width) = (1111, 529)


In [6]:
sub_train = train[[x for x in train.columns if 'WAP' not in x]]
sub_train['max_signal'] = train.loc[:,[x for x in train.columns if 'WAP' in x]].replace(100,np.NaN).max(axis=1)
nan_sub = sub_train.loc[np.isnan(sub_train['max_signal']),]
nan_rows = sub_train['max_signal'].isnull().sum()
nan_rows_index = nan_sub.index.tolist()
train = train.iloc[[x for x in train.index if x not in nan_rows_index]]
wap_max = train[[x for x in train.columns if 'WAP' in x]].replace(100,np.nan).max(axis=0).reset_index()
wap_max = wap_max.rename(columns={'index':'wap_name', 0: 'max_signal'}).fillna(10)
cols_torm = wap_max.loc[wap_max['max_signal']==10,'wap_name'].tolist()
train = train.drop(train[cols_torm],axis=1)
test = test.drop(test[cols_torm],axis=1)

# Creates single variable combining both targets
train['target'] = 'B' + train['BUILDINGID'].astype(str) + ' x F' + train['FLOOR'].astype(str)
test['target'] = 'B' + test['BUILDINGID'].astype(str) + ' x F' + test['FLOOR'].astype(str)

## Baseline Models

### Classification Model

In [7]:
st = time.time()
original_class_params = {'gamma': 0,
 'learning_rate': 0.21164182169390228,
 'max_depth': 3,
 'min_child_weight': 3,
 'n_estimators': 394,
 'objective': 'multi:softprob'}
class_xg = XGBClassifier(**original_class_params,num_classes=13,seed=20,n_jobs=-1)
class_xg.fit(train[[x for x in train.columns if 'WAP' in x]], train['target'].ravel())
class_preds = class_xg.predict(test[[x for x in train.columns if 'WAP' in x]])
class_acc = accuracy_score(test['target'].ravel(), class_preds)
print("Accuracy: {:.2f}%".format(class_acc*100))
print("\tTime elapsed: {:.2f} seconds".format(time.time()-st))

actual_class = test['target'].tolist()
bldg_results = []
floor_results = []
for i, pred in enumerate(class_preds):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results.append(pred[0] == act[0])
    floor_results.append(pred[1] == act[1])


pred_bldg = sum(bldg_results)/len(bldg_results)
pred_floor = sum(floor_results)/len(floor_results)
print("\tBuilding prediction accuracy: %.2f%%" % (pred_bldg*100))
print("\tFloor prediction accuracy: %.2f%%" % (pred_floor*100))

Accuracy: 90.01%
	Time elapsed: 1383.86 seconds
	Building prediction accuracy: 99.82%
	Floor prediction accuracy: 90.01%


### Regression Model

In [8]:
reg_knn = MultiOutputRegressor(KNeighborsRegressor(n_neighbors=3))
reg_knn.fit(train[[x for x in train.columns if 'WAP' in x]], train[['LATITUDE','LONGITUDE']].values)
reg_preds = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x]])
reg_preds = tuple(map(tuple, reg_preds))

rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds)
print('R-squared score: %.4f' % rsq)

actual_ll = tuple(map(tuple, test[['LATITUDE','LONGITUDE']].values))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius = sum(dist_results) / float(len(dist_results))
print("Accuracy radius: %.2fm" % pred_radius)

R-squared score: 0.9722
Accuracy radius: 12.94m


## Adding variables to modelling

In [13]:
train['time_weekday'] = train['TIMESTAMP'].apply(lambda x: int(datetime.fromtimestamp(x).strftime("%w")))
train['time_hour'] = train['TIMESTAMP'].apply(lambda x: int(datetime.fromtimestamp(x).strftime("%H")))

test['time_weekday'] = test['TIMESTAMP'].apply(lambda x: int(datetime.fromtimestamp(x).strftime("%w")))
test['time_hour'] = test['TIMESTAMP'].apply(lambda x: int(datetime.fromtimestamp(x).strftime("%H")))

### 1. Time of day

In [19]:
print("Classification Model -")
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or x == 'time_hour']], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or x == 'time_hour']])

pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

print("Regression Model -")
reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or x == 'time_hour']], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or x == 'time_hour']])

new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model -
	Overall accuracy score: 68.50% (-23.90% from original)
	Building prediction accuracy: 98.20% (-1.62% from original)
	Floor prediction accuracy: 68.95% (-23.40% from original)
Regression Model -
	R-squared score: 0.9723 (0.01% from original)
	Accuracy radius: 12.94m (0.00% from original)


### 2. Day of Week

In [21]:
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or x == 'time_weekday']], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or x == 'time_weekday']])
print("Classification Model - ")
pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or x == 'time_weekday']], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or x == 'time_weekday']])

print("Regression Model -")
new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model - 
	Overall accuracy score: 83.71% (-7.00% from original)
	Building prediction accuracy: 97.93% (-1.89% from original)
	Floor prediction accuracy: 83.80% (-6.90% from original)
Regression Model -
	R-squared score: 0.9722 (0.00% from original)
	Accuracy radius: 12.94m (0.00% from original)


### 3. Phone Model

In [22]:
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or x == 'PHONEID']], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or x == 'PHONEID']])
print("Classification Model - ")
pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

# reg_knn = MultiOutputRegressor(KNeighborsRegressor(n_neighbors=3))
reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or x == 'PHONEID']], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or x == 'PHONEID']])

print("Regression Model -")
new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model - 
	Overall accuracy score: 81.01% (-10.00% from original)
	Building prediction accuracy: 98.38% (-1.44% from original)
	Floor prediction accuracy: 81.10% (-9.90% from original)
Regression Model -
	R-squared score: 0.9722 (-0.00% from original)
	Accuracy radius: 12.94m (0.00% from original)


### 4. Phone Model + Time of Day

In [23]:
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_hour']], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_hour']])
print("Classification Model - ")
pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_hour']], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_hour']])

print("Regression Model -")
new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model - 
	Overall accuracy score: 56.35% (-37.40% from original)
	Building prediction accuracy: 90.91% (-8.93% from original)
	Floor prediction accuracy: 56.80% (-36.90% from original)
Regression Model -
	R-squared score: 0.9723 (0.01% from original)
	Accuracy radius: 12.94m (0.00% from original)


### 5. Phone Model + Day of Week

In [24]:
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_weekday']], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_weekday']])
print("Classification Model - ")
pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_weekday']], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or x == 'PHONEID' or x == 'time_weekday']])

print("Regression Model -")
new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model - 
	Overall accuracy score: 73.63% (-18.20% from original)
	Building prediction accuracy: 93.70% (-6.13% from original)
	Floor prediction accuracy: 74.62% (-17.10% from original)
Regression Model -
	R-squared score: 0.9722 (-0.00% from original)
	Accuracy radius: 12.94m (0.00% from original)


### 6. Time of Day + Day of Week

In [25]:
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or 'time_' in x]], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or 'time_' in x]])
print("Classification Model - ")
pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or 'time_' in x]], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or 'time_' in x]])

print("Regression Model -")
new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model - 
	Overall accuracy score: 61.21% (-32.00% from original)
	Building prediction accuracy: 90.91% (-8.93% from original)
	Floor prediction accuracy: 65.71% (-27.00% from original)
Regression Model -
	R-squared score: 0.9723 (0.01% from original)
	Accuracy radius: 12.94m (0.00% from original)


### 7. Combination of all 3

In [26]:
class_xg.fit(train[[x for x in train.columns if 'WAP' in x or 'time_' in x or x == 'PHONEID']], train['target'].ravel())
class_preds_tuned = class_xg.predict(test[[x for x in train.columns if 'WAP' in x or 'time_' in x or x == 'PHONEID']])
print("Classification Model - ")
pred_class_acc = accuracy_score(test['target'].ravel(), class_preds_tuned)
print("\tOverall accuracy score: %.2f%% (%.2f%% from original)" % ((pred_class_acc*100),((pred_class_acc-class_acc)/class_acc*100)))

bldg_results_new = []
floor_results_new = []
for i, pred in enumerate(class_preds_tuned):
    pred = pred.split(' x ')
    act = actual_class[i].split(' x ')
    bldg_results_new.append(pred[0] == act[0])
    floor_results_new.append(pred[1] == act[1])

pred_bldg_new = sum(bldg_results_new)/len(bldg_results_new)
pred_floor_new = sum(floor_results_new)/len(floor_results_new)
print("\tBuilding prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_bldg_new*100), ((pred_bldg_new-pred_bldg)/pred_bldg*100)))
print("\tFloor prediction accuracy: %.2f%% (%.2f%% from original)" % ((pred_floor_new*100), ((pred_floor_new-pred_floor)/pred_floor*100)))

reg_knn.fit(train[[x for x in train.columns if 'WAP' in x or 'time_' in x or x == 'PHONEID']], train[['LATITUDE','LONGITUDE']].values)
reg_preds_tuned = reg_knn.predict(test[[x for x in train.columns if 'WAP' in x or 'time_' in x or x == 'PHONEID']])

print("Regression Model -")
new_rsq = r2_score(test[['LATITUDE','LONGITUDE']].values, reg_preds_tuned)
print('\tR-squared score: %.4f (%.2f%% from original)' % (new_rsq,((new_rsq-rsq)/rsq*100)))

dist_results = []
for i, pred in enumerate(reg_preds):
    dist_results.append(pythagoras(lat1=pred[0],lat2=actual_ll[i][0],long1=pred[1],long2=actual_ll[i][1]))

pred_radius_new = sum(dist_results) / float(len(dist_results))
print("\tAccuracy radius: %.2fm (%.2f%% from original)" % (pred_radius_new, ((pred_radius-pred_radius_new)/pred_radius)*100))

Classification Model - 
	Overall accuracy score: 50.68% (-43.70% from original)
	Building prediction accuracy: 83.80% (-16.05% from original)
	Floor prediction accuracy: 57.25% (-36.40% from original)
Regression Model -
	R-squared score: 0.9723 (0.01% from original)
	Accuracy radius: 12.94m (0.00% from original)


## Conclusion
With the additional variables, the regression model largely remained the same while the classification model worsened in accuracy.

* None of the additional models could provide a 100% prediction for Building
* Day of Week and Phone Model (individually) provided the closest estimate to the original optimised model