# Imports

In [None]:
import plotly.express as px
import pymongo
import pandas as pd
from geopy import distance
import numpy as np
from collections import defaultdict

from sklearn.cluster import DBSCAN
from tqdm import tqdm

# Getting data from Mongo

In [None]:
client = pymongo.MongoClient("mongodb://root:example@localhost:27017/")
db = client.polar
collection = db.trainingSession
requesting = []

In [None]:
cursor = collection.find({"exercises.sport": "RUNNING"})

data = defaultdict(list)
for document in cursor:
    samples = document["exercises"][0]["samples"]
    date = document["startTime"]

    if "recordedRoute" in samples and isinstance(date, str):
        for sample in samples["recordedRoute"]:
            data[date].append((sample["latitude"], sample["longitude"]))

In [None]:
def interpolate(route, n_points=50):
    df = pd.DataFrame(route, columns=["latitude", "longitude"])
    df["prev_coords"] = list(zip(df["latitude"].shift(1), df["longitude"].shift(1)))
    df["curr_coords"] = list(zip(df["latitude"], df["longitude"]))
    df["distance"] = df.apply(
        lambda row: distance.distance(row["curr_coords"], row["prev_coords"]).m
        if pd.notnull(row["prev_coords"][0])
        else 0,
        axis=1,
    )
    df["cum_distance"] = df["distance"].cumsum()
    cum_distance_new = np.linspace(0, df["cum_distance"].iloc[-1], n_points)
    latitude_new = np.interp(cum_distance_new, df["cum_distance"], df["latitude"])
    longitude_new = np.interp(cum_distance_new, df["cum_distance"], df["longitude"])
    return [(lat, long) for lat, long in zip(latitude_new, longitude_new)]

In [None]:
def compute_distance(route_1, route_2, window_percent=0.5):
    n = len(route_1)
    m = len(route_2)
    window = max(int(abs(n - m)), int(window_percent * max(n, m)))

    dtw_matrix = np.full((n + 1, m + 1), np.inf)
    dtw_matrix[0, 0] = 0

    for i in range(1, n + 1):
        start = max(1, i - window)
        end = min(m + 1, i + window)

        for j in range(start, end):
            cost = distance.distance(route_1[i - 1], route_2[j - 1]).m
            dtw_matrix[i, j] = cost + min(
                dtw_matrix[i - 1, j], dtw_matrix[i, j - 1], dtw_matrix[i - 1, j - 1]
            )

    return dtw_matrix[n, m]

In [None]:
for k, v in data.items():
    data[k] = interpolate(v, 50)

In [None]:
N = len(data)
distance_matrix = [[0] * N for _ in range(N)]
data_list = [v for k, v in data.items()]
for i in tqdm(range(N)):
    for j in range(i, N):
        d = compute_distance(data_list[i], data_list[j])
        distance_matrix[i][j] = d
        distance_matrix[j][i] = d

# print(distance_matrix)

In [None]:
clusters = DBSCAN(eps=12000, metric="precomputed", min_samples=3).fit_predict(
    distance_matrix
)
pd.Series(clusters).value_counts()

In [None]:
import plotly.offline as pyo

pyo.init_notebook_mode()

CLUSTER = 0

df_concat_data = []
for i, (key, route) in enumerate(data.items()):
    if clusters[i] == CLUSTER:
        df = pd.DataFrame(route, columns=["latitude", "longitude"])
        df["time"] = key
        df_concat_data.append(df)
df_concat = pd.concat(df_concat_data)

fig = px.line_map(
    df_concat, lat="latitude", lon="longitude", color="time", zoom=12, height=500
)
fig.update_layout(map_style="open-street-map", margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()