In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import machine learning tools
from sklearn.preprocessing import StandardScaler  
from sklearn.cluster import KMeans  
from sklearn.metrics import silhouette_score  

In [2]:
#loading the data from CSV
df_building = pd.read_csv('/workspaces/2nd_semester_project/notebooks/df_bikelane_weather.csv')
df_bikelane = pd.read_csv('/workspaces/2nd_semester_project/notebooks/df_bikelane_weather.csv')

In [4]:
# dropping observations with missing values in the radar columns
df_building = df_building.dropna(subset=['radar_0'])
df_bikelane = df_bikelane.dropna(subset=['radar_0'])

In [5]:
# Making a dataframe for the features we wish to cluster on
building_mag = df_building[["x","y","z"]]
bikelane_mag = df_bikelane[["x","y","z"]]

In [6]:
# Normalizing the data
scaler = StandardScaler()
building_mag_norm = scaler.fit_transform(building_mag)
bikelane_mag_norm = scaler.fit_transform(bikelane_mag)
# Clustering the magnetic field data with 2 clusters using kmeans
building_kmeans = KMeans(n_clusters=2, random_state=0).fit(building_mag_norm)
bikelane_kmeans = KMeans(n_clusters=2, random_state=0).fit(bikelane_mag_norm)

In [7]:
# Adding cluster labels to the mag dataframe
building_mag = building_mag.copy() #dealing with an error
bikelane_mag = bikelane_mag.copy() #dealing with an error
building_mag['mag_cluster'] = building_kmeans.labels_
bikelane_mag['mag_cluster'] = bikelane_kmeans.labels_
df_building = df_building.copy() #dealing with an error
df_bikelane = df_bikelane.copy() #dealing with an error
df_building['mag_cluster'] = building_mag['mag_cluster']
df_bikelane['mag_cluster'] = bikelane_mag['mag_cluster']

In [8]:
# Renaming the cluster labels to 'detection' and 'no_detection'
df_building['mag_cluster'].replace({0: 'no_detection', 1: 'detection'}, inplace=True)
df_bikelane['mag_cluster'].replace({0: 'no_detection', 1: 'detection'}, inplace=True)

In [9]:
# Fixing an error with the mag_cluster column type
df_building['mag_cluster'] = df_building['mag_cluster'].astype(str)
df_building['mag_cluster'].replace('nan', None, inplace=True)  # Replace 'nan' string with actual None
df_bikelane['mag_cluster'] = df_bikelane['mag_cluster'].astype(str)
df_bikelane['mag_cluster'].replace('nan', None, inplace=True)  # Replace 'nan' string with actual None

In [10]:
df_bikelane['mag_cluster'].value_counts()

mag_cluster
no_detection    6793
detection        253
Name: count, dtype: int64

# Clustering with radar data

# Making a dataframe for the features we wish to cluster on

In [11]:
# Making a dataframe for the features we wish to cluster on
building_radar = df_building[['radar_0', 'radar_1', 'radar_2', 'radar_3', 'radar_4', 'radar_5', 'radar_6', 'radar_7']]
bikelane_radar = df_bikelane[['radar_0', 'radar_1', 'radar_2', 'radar_3', 'radar_4', 'radar_5', 'radar_6', 'radar_7']]

In [12]:
# Normalizing the data
scaler = StandardScaler()
building_radar_norm = scaler.fit_transform(building_radar)
bikelane_radar_norm = scaler.fit_transform(bikelane_radar)
# Clustering the magnetic field data with 2 clusters using kmeans
building_kmeans = KMeans(n_clusters=2, random_state=0).fit(building_radar_norm)
bikelane_kmeans = KMeans(n_clusters=2, random_state=0).fit(bikelane_radar_norm)

In [13]:
# Adding cluster labels to the radar dataframe
building_radar = building_radar.copy() #dealing with an error
bikelane_radar = bikelane_radar.copy() #dealing with an error
building_radar['radar_cluster'] = building_kmeans.labels_
bikelane_radar['radar_cluster'] = bikelane_kmeans.labels_
df_building = df_building.copy() #dealing with an error
df_bikelane = df_bikelane.copy() #dealing with an error
df_building['radar_cluster'] = building_radar['radar_cluster']
df_bikelane['radar_cluster'] = bikelane_radar['radar_cluster']

In [14]:
df_building['radar_cluster'].value_counts()

radar_cluster
0    5971
1    1075
Name: count, dtype: int64

In [15]:
# Comparing the clusters with the detection status
df_building.groupby(['radar_cluster', 'mag_cluster']).size()

radar_cluster  mag_cluster 
0              detection          2
               no_detection    5969
1              detection        251
               no_detection     824
dtype: int64

In [18]:
#Finding information about when the radar cluster is 1 and the magnetic cluster is no detection
df_building[(df_building['radar_cluster'] == 1) & (df_building['mag_cluster'] == 'detection')]

Unnamed: 0,time,battery,temperature,x,y,z,radar_0,radar_1,radar_2,radar_3,...,snr,rssi,hw_fw_version,time_hour,temperature_2m,precipitation,psensor,id,mag_cluster,radar_cluster
35,2024-03-01 05:55:01.806,3.04,3.000,-219.0,432.0,-1312.0,153.0,93.0,51.0,41.0,...,-7.2,-92.0,DataVersion.HW_2FW2_X_X,2024-03-01 05:00:00,4.7805,0.0,BIKELANE,2024-03-01 05:55:01.806_BIKELANE,detection,1
36,2024-03-01 06:04:59.793,3.06,3.125,-232.0,431.0,-1270.0,129.0,73.0,45.0,25.0,...,-2.2,-91.0,DataVersion.HW_2FW2_X_X,2024-03-01 06:00:00,3.6805,0.0,BIKELANE,2024-03-01 06:04:59.793_BIKELANE,detection,1
37,2024-03-01 06:15:03.793,3.06,4.000,-237.0,427.0,-1255.0,129.0,73.0,45.0,25.0,...,-2.2,-91.0,DataVersion.HW_2FW2_X_X,2024-03-01 06:00:00,3.6805,0.0,BIKELANE,2024-03-01 06:15:03.793_BIKELANE,detection,1
38,2024-03-01 06:25:03.793,3.06,4.250,-237.0,430.0,-1257.0,129.0,73.0,45.0,25.0,...,-2.2,-91.0,DataVersion.HW_2FW2_X_X,2024-03-01 06:00:00,3.6805,0.0,BIKELANE,2024-03-01 06:25:03.793_BIKELANE,detection,1
39,2024-03-01 07:05:02.443,3.06,4.500,-244.0,428.0,-1256.0,123.0,81.0,21.0,29.0,...,-11.0,-91.0,DataVersion.HW_2FW2_X_X,2024-03-01 07:00:00,3.5805,0.0,BIKELANE,2024-03-01 07:05:02.443_BIKELANE,detection,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6280,2024-04-24 11:34:53.493,3.06,6.875,281.0,673.0,-623.0,39.0,53.0,53.0,54.0,...,,-96.0,DataVersion.HW_2FW2_X_X,2024-04-24 11:00:00,6.3805,0.1,BIKELANE,2024-04-24 11:34:53.493_BIKELANE,detection,1
6281,2024-04-24 11:54:56.485,3.06,7.000,273.0,708.0,-637.0,40.0,56.0,55.0,50.0,...,1.2,-93.0,DataVersion.HW_2FW2_X_X,2024-04-24 11:00:00,6.3805,0.1,BIKELANE,2024-04-24 11:54:56.485_BIKELANE,detection,1
6282,2024-04-24 12:04:54.643,3.06,7.000,270.0,712.0,-633.0,41.0,55.0,53.0,52.0,...,3.5,-92.0,DataVersion.HW_2FW2_X_X,2024-04-24 12:00:00,7.5805,0.0,BIKELANE,2024-04-24 12:04:54.643_BIKELANE,detection,1
6283,2024-04-24 12:14:56.643,3.06,7.000,268.0,705.0,-626.0,41.0,55.0,53.0,52.0,...,3.5,-92.0,DataVersion.HW_2FW2_X_X,2024-04-24 12:00:00,7.5805,0.0,BIKELANE,2024-04-24 12:14:56.643_BIKELANE,detection,1
