# Mount Diablo Challenge: Data Visualization

This notebook creates comprehensive visualizations of the Mount Diablo Challenge race data.

## Visualizations:
1. Year-over-year performance trends
2. Wind speed impact on race times
3. Weather comparisons (start vs summit)
4. Custom publication-ready charts

In [None]:
# Setup
import sys
import os

# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.getcwd()))
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'src'))

from analysis import DiabloAnalyzer
import config

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 300

%matplotlib inline

In [None]:
# Initialize analyzer
analyzer = DiabloAnalyzer()

# Load data
df_race = analyzer.get_race_results_df()
df_weather = analyzer.get_weather_data_df()

print(f"Race results: {len(df_race)} records")
print(f"Weather data: {len(df_weather)} records")
print(f"Years: {sorted(df_race['year'].unique())}")

## 1. Year-over-Year Performance Trends

Visualize how race times have evolved across years.

In [None]:
# Generate the built-in plot
fig = analyzer.plot_yearly_times_distribution()
plt.show()

## 2. Wind Speed Impact

Compare race performance with wind conditions.

In [None]:
# Generate times vs wind plot
fig = analyzer.plot_times_with_wind()
plt.show()

## 3. Weather Comparison: Start vs Summit

Compare weather conditions at the start and summit locations.

In [None]:
# Generate wind comparison plot
fig = analyzer.plot_wind_comparison()
plt.show()

## 4. Custom Visualizations

Create custom publication-ready visualizations.

In [None]:
# Heatmap of finish times by year and percentile
stats = analyzer.calculate_yearly_statistics()

if not stats.empty and len(stats) > 1:
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Prepare data for heatmap
    heatmap_data = stats[['year', 'p25', 'median', 'p75', 'mean']].set_index('year')
    # Convert to minutes
    heatmap_data = heatmap_data / 60
    
    # Create heatmap
    sns.heatmap(heatmap_data.T, annot=True, fmt='.1f', cmap='RdYlGn_r', 
                cbar_kws={'label': 'Time (minutes)'}, ax=ax)
    
    ax.set_xlabel('Year', fontsize=12, fontweight='bold')
    ax.set_ylabel('Statistic', fontsize=12, fontweight='bold')
    ax.set_title('Race Time Statistics Heatmap', fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Multi-panel comprehensive dashboard
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(3, 2, hspace=0.3, wspace=0.3)

# Calculate statistics
stats = analyzer.calculate_yearly_statistics()
combined = analyzer.get_combined_statistics()

# Panel 1: Mean and median times
ax1 = fig.add_subplot(gs[0, :])
if not stats.empty:
    ax1.plot(stats['year'], stats['mean'] / 60, marker='o', label='Mean', linewidth=2, markersize=8)
    ax1.plot(stats['year'], stats['median'] / 60, marker='s', label='Median', linewidth=2, markersize=8)
    ax1.fill_between(stats['year'], stats['p25'] / 60, stats['p75'] / 60, alpha=0.2, label='IQR')
    ax1.set_ylabel('Time (minutes)', fontweight='bold')
    ax1.set_title('Race Time Trends', fontweight='bold', fontsize=12)
    ax1.legend()
    ax1.grid(True, alpha=0.3)

# Panel 2: Number of participants
ax2 = fig.add_subplot(gs[1, 0])
if not stats.empty:
    ax2.bar(stats['year'], stats['count'], color='steelblue', alpha=0.7)
    ax2.set_xlabel('Year', fontweight='bold')
    ax2.set_ylabel('Participants', fontweight='bold')
    ax2.set_title('Participation by Year', fontweight='bold', fontsize=12)
    ax2.grid(True, alpha=0.3, axis='y')

# Panel 3: Wind speeds
ax3 = fig.add_subplot(gs[1, 1])
if not df_weather.empty:
    weather_summary = df_weather.groupby(['year', 'location'])['windspeed_10m'].mean().unstack()
    if not weather_summary.empty:
        weather_summary.plot(kind='bar', ax=ax3, alpha=0.7)
        ax3.set_xlabel('Year', fontweight='bold')
        ax3.set_ylabel('Avg Wind Speed (mph)', fontweight='bold')
        ax3.set_title('Average Wind Speed by Location', fontweight='bold', fontsize=12)
        ax3.legend(title='Location')
        ax3.grid(True, alpha=0.3, axis='y')
        plt.setp(ax3.xaxis.get_majorticklabels(), rotation=0)

# Panel 4: Temperature comparison
ax4 = fig.add_subplot(gs[2, 0])
if not df_weather.empty:
    temp_summary = df_weather.groupby(['year', 'location'])['temperature_2m'].mean().unstack()
    if not temp_summary.empty:
        temp_summary.plot(kind='line', ax=ax4, marker='o', linewidth=2, markersize=8)
        ax4.set_xlabel('Year', fontweight='bold')
        ax4.set_ylabel('Avg Temperature (°F)', fontweight='bold')
        ax4.set_title('Average Temperature by Location', fontweight='bold', fontsize=12)
        ax4.legend(title='Location')
        ax4.grid(True, alpha=0.3)

# Panel 5: Wind gusts
ax5 = fig.add_subplot(gs[2, 1])
if not df_weather.empty:
    gust_summary = df_weather.groupby(['year', 'location'])['wind_gusts_10m'].max().unstack()
    if not gust_summary.empty:
        gust_summary.plot(kind='line', ax=ax5, marker='s', linewidth=2, markersize=8)
        ax5.set_xlabel('Year', fontweight='bold')
        ax5.set_ylabel('Max Wind Gust (mph)', fontweight='bold')
        ax5.set_title('Maximum Wind Gusts by Location', fontweight='bold', fontsize=12)
        ax5.legend(title='Location')
        ax5.grid(True, alpha=0.3)

fig.suptitle('Mount Diablo Challenge: Comprehensive Analysis Dashboard', 
             fontsize=16, fontweight='bold', y=0.995)

plt.show()

## 5. Interactive Analysis

Explore specific years and create custom visualizations.

In [None]:
# Select a year to analyze in detail
year_to_analyze = df_race['year'].max()

df_year = df_race[df_race['year'] == year_to_analyze].copy()
df_year['chip_time_minutes'] = df_year['chip_time_seconds'] / 60

print(f"Analyzing year: {year_to_analyze}")
print(f"Number of finishers: {len(df_year)}")

# Create detailed visualization for this year
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Distribution
axes[0, 0].hist(df_year['chip_time_minutes'], bins=30, edgecolor='black', alpha=0.7)
axes[0, 0].axvline(df_year['chip_time_minutes'].mean(), color='red', 
                   linestyle='--', linewidth=2, label='Mean')
axes[0, 0].axvline(df_year['chip_time_minutes'].median(), color='blue', 
                   linestyle='--', linewidth=2, label='Median')
axes[0, 0].set_xlabel('Chip Time (minutes)', fontweight='bold')
axes[0, 0].set_ylabel('Frequency', fontweight='bold')
axes[0, 0].set_title(f'Time Distribution ({year_to_analyze})', fontweight='bold')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3, axis='y')

# Age vs Time
if 'age' in df_year.columns:
    df_year_clean = df_year[df_year['age'].notna()]
    axes[0, 1].scatter(df_year_clean['age'], df_year_clean['chip_time_minutes'], alpha=0.5)
    axes[0, 1].set_xlabel('Age', fontweight='bold')
    axes[0, 1].set_ylabel('Chip Time (minutes)', fontweight='bold')
    axes[0, 1].set_title(f'Age vs Performance ({year_to_analyze})', fontweight='bold')
    axes[0, 1].grid(True, alpha=0.3)

# Gender comparison
if 'gender' in df_year.columns:
    df_year.boxplot(column='chip_time_minutes', by='gender', ax=axes[1, 0])
    axes[1, 0].set_xlabel('Gender', fontweight='bold')
    axes[1, 0].set_ylabel('Chip Time (minutes)', fontweight='bold')
    axes[1, 0].set_title(f'Times by Gender ({year_to_analyze})', fontweight='bold')
    plt.suptitle('')  # Remove default title

# Top 10 finishers
top10 = df_year.nsmallest(10, 'chip_time_minutes')
y_pos = np.arange(len(top10))
axes[1, 1].barh(y_pos, top10['chip_time_minutes'], alpha=0.7)
axes[1, 1].set_yticks(y_pos)
axes[1, 1].set_yticklabels([name.split('(')[0].strip()[:20] for name in top10['name']])
axes[1, 1].invert_yaxis()
axes[1, 1].set_xlabel('Chip Time (minutes)', fontweight='bold')
axes[1, 1].set_title(f'Top 10 Finishers ({year_to_analyze})', fontweight='bold')
axes[1, 1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.show()

## 6. Export Charts

Save all visualizations to the output directory.

In [None]:
# Generate and save all plots
output_dir = config.CHARTS_DIR
os.makedirs(output_dir, exist_ok=True)

print(f"Generating all plots...")
analyzer.generate_all_plots(output_dir=output_dir)

print(f"\nAll charts saved to: {output_dir}")
print("\nGenerated files:")
for file in os.listdir(output_dir):
    if file.endswith('.png'):
        print(f"  - {file}")

In [None]:
# Close analyzer
analyzer.close()
print("\nVisualization complete!")

## Next Steps

- Apply custom styling from aura.build design template
- Create interactive dashboards using Plotly
- Export data for use in web visualizations
- Generate reports and presentations