In [17]:
# facilitate imports from project root
import sys

sys.path.append("..")

In [90]:
import os
import json
from datetime import datetime, timedelta
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv
import plotly.express as px

from chase_rank.wrappers import (
    ActivityHandler
)

In [3]:
load_dotenv("../.env")

ACTIVITY_PATH = Path(os.getenv("ACTIVITY_PATH", "../data/activities.parquet"))
USER_MAP_PATH = Path("../data/user_map.json")

In [4]:
with open(USER_MAP_PATH) as file_pointer:
    USER_MAP = {int(user_id): name for user_id, name in json.load(file_pointer).items()}

# Initialize Handlers

In [5]:
activities = ActivityHandler(ACTIVITY_PATH)
# actually we need no wrapper
activities = activities.activities
activities

# Visualize

In [14]:
# time window with all activities
start = datetime(year=2022, month=1, day=1)
end = datetime(year=2022, month=12, day=31)

activities_2022 = activities[
    (start <= activities.start_date) &
    (activities.start_date <= end)]
activities_2022

Unnamed: 0,strava_id,strava_name,user_id,distance,moving_time,elapsed_time,total_elevation_gain,sport_type,start_date,timezone,...,average_speed,max_speed,elev_high,elev_low,external_id,private,trainer,manual,commute,geometry
8285991606,8285991606,Festliches Fatzen,49971345,46118.1,7225.0,10469.0,495.0,Ride,2022-12-24 10:14:07,(GMT+01:00) Europe/Berlin,...,6.383,16.260,514.8,327.4,7434802f-c71c-4796-8b81-7a6e851d8fbd.fit,False,False,False,False,"LINESTRING (1024380.879 6220319.378, 1024010.1..."
8236222190,8236222190,Afternoon Ride,49971345,33718.2,5903.0,6379.0,451.0,Ride,2022-12-11 13:12:12,(GMT+01:00) Europe/Berlin,...,5.712,14.762,438.0,293.6,479f1618-0cde-4708-89e1-1960cd79f7e1.fit,False,False,False,False,"LINESTRING (1026321.177 6219510.184, 1026353.4..."
8205871116,8205871116,Herrenlose Hütehunde,49971345,41216.7,6687.0,13624.0,456.0,Ride,2022-12-04 08:15:56,(GMT+01:00) Europe/Berlin,...,6.164,26.250,423.6,273.0,adb1dddc-4688-4c1d-b65c-2ba00722bc49.fit,False,False,False,False,"LINESTRING (1025566.431 6219147.757, 1025467.3..."
8202561359,8202561359,Punsch & Nebel,49971345,10417.5,9611.0,14246.0,441.5,Hike,2022-12-03 12:20:01,(GMT+01:00) Europe/Berlin,...,1.084,3.202,810.0,423.3,dd6bc65c-cd61-42fc-b4b9-c39e9ec3c342-activity.fit,False,False,False,False,"LINESTRING (1056563.343 6205959.207, 1056553.3..."
8175691848,8175691848,Sonntag & Sonne,49971345,49517.2,8194.0,16799.0,810.0,Ride,2022-11-27 08:25:50,(GMT+01:00) Europe/Berlin,...,6.043,17.542,483.8,283.2,6bb4fc73-66b7-4693-a172-b42de6b9b3b5.fit,False,False,False,False,"LINESTRING (1025570.884 6219142.700, 1025471.8..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6466774513,6466774513,Afternoon Ride,59511327,35409.2,5720.0,6428.0,289.0,Ride,2022-01-03 13:58:48,(GMT+01:00) Europe/Berlin,...,6.190,15.274,411.8,339.2,2022-01-03-135848-ELEMNT BOLT 0FEF-405-0.fit,False,False,False,False,"LINESTRING (1023787.546 6216993.745, 1023709.6..."
6461642250,6461642250,Bilder machen,59511327,4433.4,2502.0,4559.0,42.4,Ride,2022-01-02 13:49:05,(GMT+01:00) Europe/Berlin,...,1.772,5.324,414.6,402.1,-activity.fit,False,False,False,False,"LINESTRING (1023765.282 6216980.263, 1023751.9..."
6461832170,6461832170,Afternoon Ride,59511327,10716.8,1521.0,1521.0,103.0,Ride,2022-01-02 13:09:51,(GMT+01:00) Europe/Berlin,...,7.046,10.870,452.2,410.2,2022-01-02-130951-ELEMNT BOLT 0FEF-404-0.fit,False,False,False,False,"LINESTRING (1016154.368 6227404.808, 1016161.0..."
6461854776,6461854776,Morning Ride,59511327,13390.3,2030.0,2815.0,122.0,Ride,2022-01-02 08:57:42,(GMT+01:00) Europe/Berlin,...,6.596,11.406,418.8,364.0,2022-01-02-085741-ELEMNT BOLT 0FEF-403-0.fit,False,False,False,False,"LINESTRING (1024042.467 6216434.263, 1024062.5..."


### Users

In [91]:
# sum activities of users for each sport_type
users = activities_2022[
    ["user_id", "sport_type",
     "distance", "moving_time", "elapsed_time", "total_elevation_gain"]
].groupby([
    pd.Grouper(key="user_id"),
    pd.Grouper(key="sport_type")
]).sum().reset_index()
# add user_name for each user_id
users["user_name"] = users["user_id"].apply(lambda u_id: USER_MAP[int(u_id)])
# express travelled distance as kilometers
users["distance"] = users["distance"].apply(lambda dist: dist / 1000)
# express moving_time as timedelta
users["moving_time"] = users["moving_time"].apply(lambda sec: timedelta(seconds=sec))
# express moving_time as timedelta
users["elapsed_time"] = users["elapsed_time"].apply(lambda sec: timedelta(seconds=sec))

In [92]:
plot_df = users[
    users.sport_type.isin(["Ride", "MountainBikeRide", "VirtualRide"])
].groupby(["user_id", "user_name"]).sum(numeric_only=True).reset_index()

px.bar(
    plot_df,
    x="distance",
    y="user_name",
    color="user_name",
    orientation="h",
    category_orders={
        "user_name": []
    },
    labels={
        "user_id": "athlete",
        "user_name": "athlete",
        "distance": "distance"
    },
    template="simple_white"
)

In [43]:
months = activities_2022[
    ["user_id", "start_date", "sport_type",
     "distance", "moving_time", "elapsed_time", "total_elevation_gain"]
].groupby([
    pd.Grouper(key="start_date", freq="M"),
    pd.Grouper(key="user_id"),
    pd.Grouper(key="sport_type")
]).sum().reset_index()

In [93]:
weeks = activities_2022[
    ["user_id", "start_date", "sport_type",
     "distance", "moving_time", "elapsed_time", "total_elevation_gain"]
].groupby([
    pd.Grouper(key="start_date", freq="W"),
    pd.Grouper(key="user_id"),
    pd.Grouper(key="sport_type")
]).sum().reset_index()
# add user_name for each user_id
weeks["user_name"] = weeks["user_id"].apply(lambda u_id: USER_MAP[int(u_id)])
# express travelled distance as kilometers
weeks["distance"] = weeks["distance"].apply(lambda dist: dist / 1000)
# express moving_time as timedelta
weeks["moving_time"] = weeks["moving_time"].apply(lambda sec: timedelta(seconds=sec))
# express moving_time as timedelta
weeks["elapsed_time"] = weeks["elapsed_time"].apply(lambda sec: timedelta(seconds=sec))

In [112]:
plot_df = weeks[
    weeks.sport_type.isin(["Ride", "MountainBikeRide", "VirtualRide"])
].groupby(["user_id", "user_name", "start_date"]).sum(numeric_only=True).reset_index()

px.bar(
    plot_df,
    x="start_date",
    y="distance",
    color="user_name",
    barmode="group",
    labels={
        "user_id": "athlete",
        "user_name": "athlete",
        "distance": "distance",
        "start_date": "week"
    },
    template="simple_white"
)

In [114]:
plot_df = weeks[
    weeks.sport_type.isin(["Ride", "MountainBikeRide", "VirtualRide"])
].groupby(["user_id", "user_name", "start_date"]).sum(numeric_only=True).reset_index()

px.line(
    plot_df,
    x="start_date",
    y="distance",
    color="user_id",
    line_shape="spline",
    labels={
        "user_id": "athlete",
        "user_name": "athlete",
        "distance": "distance",
        "start_date": "week"
    },
    template="simple_white"
)