# Strava Data Exploratory Analysis (EDA)

This notebook provides an interactive environment to explore your Strava activity data.  
It uses the `ActivityParser` from our project to load data and standard libraries for visualization.

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add current directory to path to import local modules
sys.path.append('.')

from data_processor import ActivityParser
from map_visualizer import RouteMapVisualizer

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Data

We'll use the `ActivityParser` to load all activities from the `activities/` directory.

In [None]:
parser = ActivityParser()

# Load activities (set limit=None for all activities)
print("Loading activities...")
routes = parser.parse_all_activities(limit=None)

print(f"Loaded {len(routes)} activities")

In [None]:
data = []
for r in routes:
    d = r.to_dict()
    data.append(d)

df = pd.DataFrame(data)
df.head()

## 2. Basic Statistics

In [None]:
df[['distance_km', 'elevation_gain', 'elevation_loss']].describe()

## 3. Visualizations

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(data=df, y='activity_type', order=df['activity_type'].value_counts().index)
plt.title('Number of Activities by Type')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(data=df, x='distance_km', hue='activity_type', kde=True, element='step')
plt.title('Distance Distribution by Activity Type')
plt.xlabel('Distance (km)')
plt.show()

## 4. Map Visualization

In [None]:
visualizer = RouteMapVisualizer()

# Pick the longest route to visualize
longest_route_idx = df['distance_km'].idxmax()
longest_route = routes[longest_route_idx]

print(f"Visualizing longest route: {longest_route.name} ({longest_route.distance_km:.2f} km)")

m = visualizer.create_route_map(
    coordinates=longest_route.coordinates,
    route_name=longest_route.name,
    distance_km=longest_route.distance_km,
    elevation_gain=longest_route.elevation_gain
)

# Display map in notebook
m