In [None]:
import pandas as pd

df = pd.read_parquet("/content/strava_activities_filtered.parquet")
df.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

numeric_df = df.select_dtypes(include='number')
corr_matrix = numeric_df.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap='coolwarm', cbar=True)
plt.show()

In [None]:
activity_counts = df['activity_type'].value_counts()
plt.figure(figsize=(10,6))
activity_counts.plot(kind='bar', color='green')
plt.xlabel('Activity Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [None]:
# List of activity names to exclude
exclude_names = [
    "Morning Run", "Stretching", "Night Run", "Afternoon Run",
    "Morning Walk", "Afternoon Walk", "Afternoon Swim", "Lunch Swim",
    "Morning Ride", "Morning Swim", "Evening Swim", "Evening Run"
]

# Filter out empty names/descriptions and excluded names
df_filtered = df[
    (df['activity_name'].astype(str).str.strip() != '') &
    (df['activity_description'].astype(str).str.strip() != '') &
    (~df['activity_name'].isin(exclude_names))
]

# Calculate lengths
df_filtered['name_length'] = df_filtered['activity_name'].astype(str).apply(len)
df_filtered['description_length'] = df_filtered['activity_description'].astype(str).apply(len)

# Zen-inspired color palette
name_color = "#6B8E23"        # Olive green
description_color = "#4682B4" # Steel blue

# Plot distributions
plt.figure(figsize=(12,6))
sns.histplot(df_filtered['description_length'], color=description_color, label='Activity Description', bins=10, alpha=0.8)
sns.histplot(df_filtered['name_length'], color=name_color, label='Activity Name', bins=10, alpha=0.9)
plt.xlabel('Length (characters)', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.legend()
sns.despine()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_parquet("/content/strava_activities_filtered.parquet")

# Convert to datetime
df['activity_start_date_local'] = pd.to_datetime(df['activity_start_date_local'])

# Extract hour
df['hour'] = df['activity_start_date_local'].dt.hour

# Define time of day function
def time_of_day(hour):
    if 3 <= hour < 7:
        return 'Early Morning'
    elif 7 <= hour < 12:
        return 'Late Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'

# Apply function
df['time_of_day'] = df['hour'].apply(time_of_day)


# Define custom colors for each time of day
colors = ['#FFB347',  # Early Morning - orange
          '#FFD700',  # Late Morning - gold
          '#87CEEB',  # Afternoon - sky blue
          '#FF69B4',  # Evening - hot pink
          '#4B0082']  # Night - indigo

plt.figure(figsize=(8,5))
sns.countplot(
    x='time_of_day',
    data=df,
    order=['Early Morning', 'Late Morning', 'Afternoon', 'Evening', 'Night'],
    palette=colors
)
plt.ylabel('Number of Activities')
plt.xlabel('')
plt.show()