In [67]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle, lonlat, distance, geodesic
from datetime import datetime
import folium

In [68]:
df = pd.read_csv("scanned_aps_sanitized.csv", usecols=["created_at", "ssid", "bssid", "level", "latitude", "longitude"])
df["created_at"] = pd.to_datetime(df["created_at"], format="ISO8601")

df = df.loc[(df["latitude"] < 47.552425) & (df["latitude"] >= 47.550714) & (df["longitude"] >= -122.266685 ) & (df["longitude"] < -122.262940)]
# df = df.loc[(df["created_at"] >= "2024-05-01") & (df["created_at"] < "2024-06-01")]


## Michael's place -- box (47.552425, -122.266685) (47.550714, -122.262940)
df

Unnamed: 0,created_at,latitude,longitude,ssid,bssid,level
16,2024-03-31 20:36:50.517279,47.55160,-122.26533,FieldHouse,d4:6e:0e:35:c1:12,-80
43,2024-04-01 15:43:22.669218,47.55158,-122.26534,2F4G,4c:01:43:3f:61:03,-78
83,2024-04-01 15:43:25.604086,47.55158,-122.26534,,10:33:bf:09:1c:e1,-81
136,2024-04-01 15:43:28.640624,47.55158,-122.26534,MTBaker1375,e4:5e:1b:89:41:95,-82
152,2024-03-31 22:38:10.338355,47.55159,-122.26532,CenturyLink3280,02:83:cc:fb:cf:64,-85
...,...,...,...,...,...,...
499425,2024-11-10 02:08:30.045014,47.55159,-122.26533,ANTHC-Guest,24:d7:9c:c9:7b:c0,-66
499426,2024-11-10 02:08:20.902479,47.55159,-122.26533,ANTHC-SECURE,24:d7:9c:c9:7b:c2,-72
499427,2024-11-10 02:08:27.002868,47.55159,-122.26533,SCFWIRELESS,24:d7:9c:c9:7b:c1,-68
499428,2024-11-10 02:08:33.088071,47.55159,-122.26533,InternetOnly,b8:38:61:e3:cf:54,-81


In [116]:
def calc_distance(p1, p2):
    # Great Circle performs waay faster. like 10x more
    return great_circle( 
        lonlat(p1[1], p1[0]), 
        lonlat(p2[1], p1[0])
    ).meters

def clusterize(df):
    model = DBSCAN(
        eps=15, # meters
        min_samples=10,
        metric=calc_distance
    )
    t = datetime.now()
    print("Clustering {} points".format(df.index.size))
    cluster_labels = model.fit_predict(df[["latitude", "longitude"]])
    print("Time spent = {}".format(datetime.now() - t))
    df["cluster"] = cluster_labels
    return df.groupby([
        "ssid", "bssid", "cluster"
    ]).agg(
        latitude=pd.NamedAgg(column="latitude", aggfunc="mean"),
        latitude_min=pd.NamedAgg(column="latitude", aggfunc="min"),
        latitude_max=pd.NamedAgg(column="latitude", aggfunc="max"),
        longitude=pd.NamedAgg(column="longitude", aggfunc="mean"),
        longitude_min=pd.NamedAgg(column="longitude", aggfunc="min"),
        longitude_max=pd.NamedAgg(column="longitude", aggfunc="max"),
        level=pd.NamedAgg(column="level", aggfunc="mean"),
    ).reset_index()


ssid_df = df.loc[df["ssid"].isin(["Ernie"])]

def localize(df, bssid, n_clusters=1):
    print("localizing BSSID: " + bssid)
    ssid = df["ssid"].iloc[0]
    all_clusters = clusterize(df.copy()).sort_values("level", ascending=False)
    clusters = all_clusters.iloc[:n_clusters]
    return {
        "bssid": bssid,
        "ssid": ssid,
        "clusters": clusters,
        "all_clusters": all_clusters,
        "latitude": np.average(clusters.latitude, weights=clusters.level),
        "longitude": np.average(clusters.longitude, weights=clusters.level),
        "level": np.average(clusters.level),
    } if clusters.index.size > 0 else None

localized_bssids = [
    localize(df.loc[df["bssid"] == bssid], bssid, n_clusters=1) 
    for bssid in ssid_df["bssid"].unique()
]

localized_bssids = [l for l in localized_bssids if l is not None]
    

# localized_bssid = localize_bssid(bssid_df, bssid)

localizing BSSID: d0:21:f9:63:55:b0
Clustering 1168 points
Time spent = 0:00:01.060782
localizing BSSID: d0:21:f9:63:55:b1
Clustering 1192 points
Time spent = 0:00:01.167979
localizing BSSID: d0:21:f9:63:51:ad
Clustering 1255 points
Time spent = 0:00:01.243214
localizing BSSID: d0:21:f9:63:51:ac
Clustering 1021 points
Time spent = 0:00:00.855463


In [117]:
len(localized_bssids)

4

In [118]:

m = folium.Map(location=[47.5515, -122.265], zoom_start=21)
for localized_bssid in localized_bssids[:1]:
    used_cluster_idxs = []
    for idx, cluster in localized_bssid["clusters"].iterrows():        
        used_cluster_idxs.append(idx)
        folium.Marker(
            location=[cluster.latitude, cluster.longitude],
            tooltip=f"Cluster: {idx} => {cluster.ssid} - {cluster.bssid}",
            popup=f'level: {cluster.level}',
            icon=folium.Icon(color="blue")
        ).add_to(m)
                
        folium.Rectangle(
            bounds=[
                [cluster["latitude_min"], cluster["longitude_min"]],
                [cluster["latitude_max"], cluster["longitude_max"]],
            ],
            color="blue",
            fill_color="blue",
            fill_opacity=0.3,
            fill=True,
            popup=f"Cluster: {idx}"
        ).add_to(m)
    
    folium.Marker(
        location=[localized_bssid["latitude"], localized_bssid["longitude"]],
        tooltip=f"Localized: {localized_bssid["ssid"]} - {localized_bssid["bssid"]}",
        popup=f'level: {localized_bssid["level"]}',
        icon=folium.Icon(color="green")
    ).add_to(m)

    for idx, cluster in localized_bssid["all_clusters"].iterrows():
        if idx in used_cluster_idxs:
            continue
        
        folium.Marker(
            location=[cluster.latitude, cluster.longitude],
            tooltip=f"Cluster: {idx} => {cluster.ssid} - {cluster.bssid}",
            popup=f'level: {cluster.level}',
            icon=folium.Icon(color="red")
        ).add_to(m)
                
        folium.Rectangle(
            bounds=[
                [cluster["latitude_min"], cluster["longitude_min"]],
                [cluster["latitude_max"], cluster["longitude_max"]],
            ],
            color="red",
            fill_color="red",
            fill_opacity=0.3,
            fill=True,
            popup=f"Cluster: {idx}"
        ).add_to(m)
        
m