In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
from pyproj import Proj

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
from sklearn.model_selection import train_test_split
from post_processing import plot_results, evaluate_model, l2_dist
import sqlalchemy
import warnings
warnings.filterwarnings('ignore')

# Postgres query

In [3]:
DB_URL = 'postgresql+psycopg2://hospitrackadmin@hospitrack:HospiTrack34@hospitrack.postgres.database.azure.com/hospitrack'
engine = sqlalchemy.create_engine(DB_URL)
df = pd.read_sql('select * from public.demo_router_scans', con=engine)
print(df.shape)
df.head(2)

(2459, 8)


Unnamed: 0,row_id,id,timestamp,longitude,latitude,altitude,accuracy,rssi_by_bssid
0,6340,67464705.0,1582179360515,34.769125,32.073358,42.0,15.462,"{'00:0e:f4:dd:f9:de': -77.0, '00:b8:c2:12:d3:3..."
1,6341,31780742.0,1582179370668,34.76905,32.073462,42.0,13.115,"{'00:0e:f4:dd:f9:de': -63.0, '00:b8:c2:32:87:1..."


In [18]:
df['rssi_by_bssid'] = df.apply(lambda x : pd.concat([pd.DataFrame([x['timestamp']]).rename(columns={0:1}), 
                                                     pd.DataFrame((x['rssi_by_bssid']).items()).set_index(0)],
                                                    axis=0).T.set_index(0) ,axis=1)
X = pd.concat(df['rssi_by_bssid'].tolist())

myProj = Proj("+proj=utm +zone=36k, +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
Y = pd.DataFrame(df.apply(lambda x: myProj(x.longitude, x.latitude),
                                      axis=1).tolist(), index=df.index).rename(columns={0:'x', 1:'y'})
print(X.shape)
X.head()

(2459, 2640)


Unnamed: 0_level_0,00:0e:f4:dd:f9:de,00:b8:c2:12:d3:35,00:b8:c2:47:06:22,08:3e:5d:5a:a1:74,10:be:f5:35:7b:38,14:ae:db:47:47:dd,14:ae:db:50:55:6d,14:ae:db:b1:aa:6d,14:ae:db:ce:a0:9d,14:ae:db:ce:c8:fd,...,74:4d:28:00:3f:5a,80:2a:a8:b5:ab:c1,00:b8:c2:29:79:48,08:97:58:32:60:f2,32:cd:a7:fa:f1:dd,34:0a:91:15:21:66,80:2a:a8:b5:aa:d1,94:8f:cf:ad:8f:3b,f8:e9:03:13:55:44,90:a7:c1:13:1e:b4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1582179000000.0,-77.0,-88.0,-79.0,-71.0,-86.0,-85.0,-85.0,-88.0,-72.0,-64.0,...,,,,,,,,,,
1582179000000.0,-63.0,,-73.0,-78.0,,-77.0,-87.0,-78.0,-79.0,-75.0,...,,,,,,,,,,
1582179000000.0,-63.0,-89.0,-74.0,-78.0,,-84.0,-86.0,-79.0,,-75.0,...,,,,,,,,,,
1582179000000.0,-75.0,,-75.0,-77.0,-91.0,-89.0,,-80.0,,-75.0,...,,,,,,,,,,
1582179000000.0,-85.0,,-68.0,-80.0,,,,-77.0,,-79.0,...,,,,,,,,,,


In [17]:
print(Y.shape)
Y.head()

(2459, 2)


Unnamed: 0,x,y
0,666981.7887,3549936.0
1,666974.482216,3549948.0
2,666974.482216,3549948.0
3,666983.811626,3549961.0
4,666987.404883,3549965.0


## Location prediction

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X.fillna(-200), Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [20]:
model = MultiOutputRegressor(RandomForestRegressor(random_state=1))
multi_target_forest = model.fit(X_train, y_train)
y_pred = multi_target_forest.predict(X_test)
y_test['pred_x'] = y_pred[:, 0]
y_test['pred_y'] = y_pred[:, 1]

In [22]:
l2dists_mean, l2dists = l2_dist((y_test['pred_x'], y_test['pred_y']), (y_test['x'], y_test['y']))
y_test['dist'] = list(l2dists)

srt = np.sort(l2dists)
n = len(srt)
prec_90 = srt[int(0.9*n)]
print("mean distances : " + str(l2dists.mean()))
print("median : " + str(srt[int(0.5*n)]))
print("90 percentile : " + str(prec_90))

mean distances : 15.489476084862766
median : 10.187214333636547
90 percentile : 31.60504905649355


In [33]:
y_test[['longitude', 'latitude']] = pd.DataFrame(y_test.apply(lambda x: myProj(x.x, x.y, inverse=True)
                                                , axis=1).tolist(), index=y_test.index)
fig = px.scatter_mapbox(y_test, lat="latitude", lon="longitude",
                         color='dist', zoom=16, height=500,)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

# Reading UJI data

In [2]:
Trainingdf = pd.read_csv('TrainingData.csv')
Validationdf = pd.read_csv('ValidationData.csv')
df = pd.concat([Trainingdf, Validationdf], ignore_index=True)
df.head(2)

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691


## Location prediction

In [3]:
X = df[df.columns[0:len(df.columns)-9]]
Y = df[['LONGITUDE','LATITUDE']]
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [4]:
model = MultiOutputRegressor(RandomForestRegressor(random_state=1))
multi_target_forest = model.fit(X_train, y_train)
y_pred = multi_target_forest.predict(X_test)
y_test['pred_x'] = y_pred[:, 0]
y_test['pred_y'] = y_pred[:, 1]

In [5]:
l2dists_mean, l2dists = l2_dist((y_test['pred_x'], y_test['pred_y']), (y_test['LONGITUDE'], y_test['LATITUDE']))
y_test['dist'] = list(l2dists)

srt = np.sort(l2dists)
n = len(srt)
prec_90 = srt[int(0.9*n)]
print("mean distances : " + str(l2dists.mean()))
print("median : " + str(srt[int(0.5*n)]))
print("90 percentile : " + str(prec_90))

mean distances : 6.055419620106044
median : 3.5352180528246513
90 percentile : 13.507243651269079


In [None]:
fig = px.scatter_mapbox(y_test, lat="LATITUDE", lon="LONGITUDE", hover_name="USERID", hover_data=["BUILDINGID", "FLOOR", "SPACEID"],
                        color="USERID", zoom=16, height=500,)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Buliding prediction

In [6]:
X = df[df.columns[0:len(df.columns)-9]]
Y = df['BUILDINGID']
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [7]:
model = RandomForestClassifier(random_state=1)
y_pred = model.fit(X_train, y_train).predict(X_test)
evaluate_model(y_test, y_pred)

Confusion matrix:
[[1742    0    0]
 [   1 1625   13]
 [   0    0 2934]]
f1 Score :  0.9977377772836262
Accuracy Score : 0.9977830562153602
Precision Score :  0.9983383369469281
Recall Score :  0.997152735407769
MCC Score : 0.9965447948547613


## Floor prediction

In [8]:
df2 = df[df['BUILDINGID']==2]
X = df2[df2.columns[0:len(df2.columns)-9]]
Y = df2['FLOOR']
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [9]:
model = RandomForestClassifier(random_state=1)
y_pred = model.fit(X_train, y_train).predict(X_test)
evaluate_model(y_test, y_pred)

Confusion matrix:
[[595   0   0   0   0]
 [ 13 637   5   1   0]
 [  6  19 472   5   0]
 [  0   2   7 829   0]
 [  0   0   1   7 329]]
f1 Score :  0.9770339039089333
Accuracy Score : 0.9774590163934426
Precision Score :  0.9789793854671462
Recall Score :  0.975359379997017
MCC Score : 0.9712802283034752


## Visualization with interactive map

In [10]:
myProj = Proj(init='epsg:3857')
df[['longitude_geo', 'latitude_geo']] = pd.DataFrame(df.apply(lambda x: myProj(x.LONGITUDE, x.LATITUDE, inverse=True), axis=1).tolist(), index=df.index)

In [16]:
fig = px.scatter_mapbox(df, lat="latitude_geo", lon="longitude_geo", hover_name="USERID", hover_data=["BUILDINGID", "FLOOR", "SPACEID"],
                        color="USERID", zoom=16, height=500, animation_frame='FLOOR')
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [41]:
folium.Map(zoom_start=6, tiles='https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}{r}.png', 
           attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>')
m.save('test123.html')
m