# GiroE GARMIN Dataset Analysis

## 1. Import dataset (from pickle)

In [None]:
import pandas as pd
import os, json
from datetime import timedelta

DATADIR = "GarminRawData"

In [None]:
import pickle

with open( "garmin_df.pk", "rb" ) as fr:
    garmin_df = pickle.load( fr )

In [None]:
garmin_df.shape, garmin_df.columns, garmin_df.device.unique()

## 2. Dataset Analysis

### Export some statistics on data data usage

In [None]:
garmin_df.groupby(["day", "device"]).timestamp.min().to_csv( "min.csv" )
garmin_df.groupby(["day", "device"]).timestamp.max().to_csv( "max.csv" )
garmin_df.groupby(["day", "device"]).timestamp.count().to_csv( "count.csv" )

## 3. Dataset Filtering

### Drop data outside race time

In [None]:
THRESH_SEC = 1800

GIROE_UTC = [
    ("2024-05-04T10:45:00", "2024-05-04T13:40:00"),
    ("2024-05-05T12:45:00", "2024-05-05T14:00:00"),
    ("2024-05-06T10:40:00", "2024-05-06T13:40:00"),
    ("2024-05-07T09:45:00", "2024-05-07T14:00:00"),
    ("2024-05-08T11:00:00", "2024-05-08T14:00:00"),
    ("2024-05-09T10:25:00", "2024-05-09T13:16:00"),
    ("2024-05-10T09:20:00", "2024-05-10T10:46:00"),
    ("2024-05-11T09:50:00", "2024-05-11T13:05:00"),
    ("2024-05-12T11:07:00", "2024-05-12T13:20:00"),
    ("2024-05-14T09:10:00", "2024-05-14T13:30:00"),
    ("2024-05-15T11:10:00", "2024-05-15T13:33:00"),
    ("2024-05-16T10:40:00", "2024-05-16T13:50:00"),
    ("2024-05-17T11:15:00", "2024-05-17T13:45:00"),
    ("2024-05-18T09:10:00", "2024-05-18T11:00:00"),
    ("2024-05-19T09:45:00", "2024-05-19T13:45:00"),
    ("2024-05-21T10:55:00", "2024-05-21T13:20:00"),
    ("2024-05-22T10:15:00", "2024-05-22T13:50:00"),
    ("2024-05-23T10:40:00", "2024-05-23T13:55:00"),
    ("2024-05-24T10:40:00", "2024-05-24T13:26:00"),
    ("2024-05-26T11:40:00", "2024-05-26T13:10:00"),
]

# utc_limits = pd.DataFrame.from_dict(GIROE_UTC)
# utc_limits[1]=utc_limits[1].swifter.apply(lambda x: (pd.to_datetime(x)+timedelta(hours=2)).isoformat())
# utc_limits

Filtering session shorter than THRESH_SEC points

In [None]:
counts = garmin_df.groupby(["device", "session"]).count()
sessions_invalid = counts.loc[counts.timestamp < THRESH_SEC].index.get_level_values(
    "session"
)
sessions_invalid

Get only race points

In [None]:
giroe_df = pd.concat(
    [
        garmin_df.loc[
            (garmin_df.timestamp >= GIROE_UTC[i][0])
            & (garmin_df.timestamp <= GIROE_UTC[i][1])
        ]
        for i in range(len(GIROE_UTC))
    ]
)

giroe_df.drop(
    index=giroe_df.loc[~(giroe_df.day == giroe_df.timestamp.dt.day)].index, inplace=True
)

giroe_df.shape

Save some statistics.

In [None]:
giroe_df.groupby(["day", "device"]).timestamp.min().to_csv( "giroe_min.csv" )
giroe_df.groupby(["day", "device"]).timestamp.max().to_csv( "giroe_max.csv" )
giroe_df.groupby(["day", "device"]).timestamp.count().to_csv( "giroe_count.csv" )

### Export Filtered data

In [None]:
display(giroe_df.columns)

ExportColumns = [
        "device",
        "session",
        "timestamp",
        "position_lat",
        "position_long",
        "distance",
        "enhanced_altitude",
        # "unknown_87",
        # "unknown_141",
        "heart_rate",
        # "cadence",
        # "fractional_cadence",
        # "unknown_107",
        # "unknown_135",
        # "unknown_136",
        # "unknown_143",
        "SystemBattery",
        "PositionLatitude",
        "PositionLongitude",
        "PositionAltitude",
        "PositionSpeed",
        "PositionHeading",
        "PositionAccuracy",
        "SensorAltitude",
        "SensorSpeed",
        "SensorHeading",
        "SensorPressure",
        "SensorAccelerationX_HD",
        "SensorAccelerationY_HD",
        "SensorAccelerationZ_HD",
        "SensorHeartrate",
        "SensorTemperature",
        "enhanced_speed",
        # "day",
        # "unknown_137",
        # "unknown_138",

    ]

Export all the data to multiple CSV

In [None]:
outputFolder = "outputs"

if not os.path.exists( outputFolder ):
    os.makedirs( outputFolder )

# Export all
giroe_df[ ExportColumns ].to_csv( os.path.join( outputFolder, 'giroe.csv' ), index=False )

for day in giroe_df.day.unique():
    devices = giroe_df.loc[giroe_df.day == day].device
    for device in devices.unique():
        fName = f'giroe_{day}_{device}.csv'
        giroe_df.loc[ (giroe_df.day == day) & (giroe_df.day == day), ExportColumns ].to_csv( os.path.join( outputFolder, fName ), index=False )


## 3. MAP Plotting

In [None]:
DAY = 24
gdf = gpd.GeoDataFrame(
    giroe_df,
    geometry=gpd.points_from_xy(giroe_df["position_lat"], giroe_df["position_long"]),
)
gdf.dropna(subset=["position_lat", "position_long"], inplace=True)
gdf = gdf.loc[gdf.day == DAY]
sessions = list(gdf.groupby("session").groups.keys())
print(len(sessions))

In [None]:
colors = [
    "red",
    "blue",
    "gray",
    "darkred",
    "lightred",
    "orange",
    "beige",
    "green",
    "darkgreen",
    "lightgreen",
    "darkblue",
    "lightblue",
    "purple",
    "darkpurple",
    "pink",
    "cadetblue",
    "lightgray",
    "black",
]
for i in range(len(sessions)):
    mapped_points = gdf.groupby("session").get_group(sessions[i])
    # print(mapped_points["SensorSpeed"].max() * 3.60, mapped_points["device"].unique())
    # display(
    #     mapped_points.sort_values("timestamp")[
    #         ["timestamp", "session", "SensorSpeed", "SensorHeartrate"]
    #     ]
    # )
    lat_minmax = (
        mapped_points.position_lat.min(),
        mapped_points.position_lat.max(),
        mapped_points.position_lat.mean(),
    )
    lon_minmax = (
        mapped_points.position_long.min(),
        mapped_points.position_long.max(),
        mapped_points.position_long.mean(),
    )

    if i == 0:
        map_plot = folium.Map(
            location=[lat_minmax[2], lon_minmax[2]],
            tiles="OpenStreetMap",
            zoom_start=12,
            width="100%",
            height="100%",
        )

    mapped_points.swifter.apply(
        lambda x: folium.CircleMarker(
            location=[x["position_lat"], x["position_long"]],
            radius=2,
            weight=5,
            fill=True,
            fill_color=colors[i],
            color=colors[i],
            popup=folium.Popup(
                f"{i}@{x['timestamp']}",
            ),
        ).add_to(map_plot),
        axis=1,
    )
map_plot