# Imports

In [None]:
import plotly.express as px
import pymongo
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest

# Getting data from Mongo

In [None]:
client = pymongo.MongoClient("mongodb://root:example@localhost:27017/")
db = client.polar
collection = db.trainingSession
requesting = []

In [None]:
cursor = collection.find({"exercises.sport": "RUNNING"})

dfs = []
for document in cursor:
    samples = document["exercises"][0]["samples"]
    if "recordedRoute" in samples:
        df = pd.DataFrame(samples["recordedRoute"])
        df["time"] = document["startTime"]
        df["distance"] = document["exercises"][0]["distance"]
        dfs.append(df)

# Preprocessing

In [None]:
data = []
for df in dfs:
    df["prev_coords"] = list(zip(df["latitude"].shift(1), df["longitude"].shift(1)))
    df["curr_coords"] = list(zip(df["latitude"], df["longitude"]))

    data.append(
        {
            "distance": df["distance"].iloc[0],
            "left": df["latitude"].min(),
            "right": df["latitude"].max(),
            "up": df["longitude"].max(),
            "down": df["longitude"].min(),
        }
    )
df_data = pd.DataFrame(data)

# Clustering

In [None]:
df_data_scaled = pd.DataFrame(StandardScaler().fit_transform(df_data))

iso = IsolationForest(contamination=0.02, random_state=42)
outlier_preds = iso.fit_predict(df_data_scaled)
df_no_outliers = df_data_scaled[outlier_preds == 1]

clusters = KMeans(n_clusters=5, random_state=42, n_init="auto").fit(df_no_outliers)
df_clusters = df_no_outliers.copy()
df_clusters["cluster"] = clusters.labels_
df_clusters = df_clusters.reset_index()
df_clusters

In [None]:
import plotly.offline as pyo

pyo.init_notebook_mode()

CLUSTER = 3

df_concat_data = []
for i, df in enumerate(dfs):
    if i in df_clusters.query(f"cluster == {CLUSTER}")["index"]:
        df["number"] = i
        df_concat_data.append(df)
df_concat = pd.concat(df_concat_data)

fig = px.line_map(
    df_concat, lat="latitude", lon="longitude", color="number", zoom=12, height=500
)
fig.update_layout(map_style="open-street-map", margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()