# Imports

In [None]:
import plotly.express as px
import pymongo
import pandas as pd
from geopy import distance
import numpy as np

In [None]:
client = pymongo.MongoClient("mongodb://root:example@localhost:27017/")
db = client.polar
collection = db.trainingSession
requesting = []

In [None]:
cursor = collection.find({"exercises.sport": "RUNNING"})

dfs = []
for document in cursor:
    samples = document["exercises"][0]["samples"]
    if "recordedRoute" in samples:
        df = pd.DataFrame(samples["recordedRoute"])
        df["time"] = document["startTime"]
        dfs.append(df)

In [None]:
for df in dfs:
    df["prev_coords"] = list(zip(df["latitude"].shift(1), df["longitude"].shift(1)))
    df["curr_coords"] = list(zip(df["latitude"], df["longitude"]))

    df["distance"] = df.apply(
        lambda row: distance.distance(row["curr_coords"], row["prev_coords"]).m
        if pd.notnull(row["prev_coords"][0])
        else 0,
        axis=1,
    )
    df["cum_distance"] = df["distance"].cumsum()

In [None]:
data = []
for df in dfs:
    df_new = pd.DataFrame(
        np.linspace(0, df["cum_distance"].iloc[-1], 40), columns=["cum_distance"]
    )

    df_new["longitude"] = np.interp(
        df_new["cum_distance"], df["cum_distance"], df["longitude"]
    )
    df_new["latitude"] = np.interp(
        df_new["cum_distance"], df["cum_distance"], df["latitude"]
    )

    data.append(np.concatenate([df_new["longitude"].values, df_new["latitude"].values]))

df_data = pd.DataFrame(data)
df_data

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest

df_data_scaled = pd.DataFrame(StandardScaler().fit_transform(df_data))

iso = IsolationForest(contamination=0.02, random_state=42)
outlier_preds = iso.fit_predict(df_data_scaled)
df_no_outliers = df_data_scaled[outlier_preds == 1]

# df_data_pca = pd.DataFrame(MDS(n_components=10).fit_transform(df_no_outliers))
clusters = KMeans(n_clusters=10, random_state=42, n_init="auto").fit(df_no_outliers)
df_clusters = df_no_outliers.copy()
df_clusters["cluster"] = clusters.labels_
df_clusters = df_clusters.reset_index()
df_clusters

In [None]:
import plotly.offline as pyo

pyo.init_notebook_mode()

CLUSTER = 8

df_concat_data = []
for i, df in enumerate(dfs):
    if i in df_clusters.query(f"cluster == {CLUSTER}")["index"]:
        df["number"] = i
        df_concat_data.append(df)
df_concat = pd.concat(df_concat_data)

fig = px.line_map(
    df_concat, lat="latitude", lon="longitude", color="number", zoom=12, height=500
)
fig.update_layout(map_style="open-street-map", margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()