In [None]:
# Chess Game Data Analysis

This notebook pulls chess game data from Chess.com API and displays comprehensive analytics across all sections.

## Sections Covered:
1. **Overall Performance** - Win rate, rating trends
2. **Color Performance** - White vs Black statistics
3. **ELO Progression** - Rating changes over time
4. **Termination Analysis** - How games end (wins/losses)
5. **Opening Performance** - Best and worst openings
6. **Opponent Strength** - Performance against different ratings
7. **Time of Day** - Performance by time periods
8. **Mistake Analysis** - Game stage mistakes (early/middle/endgame)
9. **AI Coaching Advice** - Personalized recommendations

In [None]:
# Import required libraries
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import json

# Add project root to path to import project modules
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.insert(0, project_root)

from app.services.chess_service import ChessService
from app.services.analytics_service import AnalyticsService

print("‚úì Libraries imported successfully")

In [None]:
# Configuration - Update these values
USERNAME = 'hikaru'  # Chess.com username to analyze
START_DATE = '2024-12-01'  # Analysis start date (YYYY-MM-DD)
END_DATE = '2024-12-31'    # Analysis end date (YYYY-MM-DD)
TIMEZONE = 'America/New_York'  # User timezone

# Initialize services
chess_service = ChessService()
analytics_service = AnalyticsService(
    stockfish_path='stockfish',  # Update path if needed
    engine_depth=12,
    engine_enabled=True,  # Set to False to skip mistake analysis
    openai_api_key='',    # Add OpenAI API key for AI advice
    openai_model='gpt-4o-mini'
)

print(f"Configuration:")
print(f"  Username: {USERNAME}")
print(f"  Date Range: {START_DATE} to {END_DATE}")
print(f"  Timezone: {TIMEZONE}")
print(f"‚úì Services initialized")

## Step 1: Fetch Chess Games Data

Pulling games from Chess.com API for the specified date range.

In [None]:
# Fetch games from Chess.com API
print(f"Fetching games for {USERNAME} from {START_DATE} to {END_DATE}...")

start = datetime.strptime(START_DATE, '%Y-%m-%d')
end = datetime.strptime(END_DATE, '%Y-%m-%d')

all_games = []
current = start

# Fetch games for each month in the range
while current <= end:
    try:
        games = chess_service.get_games_by_month(USERNAME, current.year, current.month)
        
        # Filter games by date range
        filtered_games = []
        for game in games:
            game_date = datetime.fromtimestamp(game.get('end_time', 0))
            if start <= game_date <= end:
                filtered_games.append(game)
        
        all_games.extend(filtered_games)
        print(f"  ‚úì {current.strftime('%Y-%m')}: {len(filtered_games)} games")
    except Exception as e:
        print(f"  ‚úó {current.strftime('%Y-%m')}: No games or error - {str(e)}")
    
    # Move to next month
    if current.month == 12:
        current = current.replace(year=current.year + 1, month=1)
    else:
        current = current.replace(month=current.month + 1)

print(f"\n‚úì Total games fetched: {len(all_games)}")

# Display sample game data
if all_games:
    print("\nSample game data (first game):")
    sample = all_games[0]
    print(f"  URL: {sample.get('url', 'N/A')}")
    print(f"  Time Class: {sample.get('time_class', 'N/A')}")
    print(f"  White: {sample.get('white', {}).get('username', 'N/A')} ({sample.get('white', {}).get('rating', 'N/A')})")
    print(f"  Black: {sample.get('black', {}).get('username', 'N/A')} ({sample.get('black', {}).get('rating', 'N/A')})")
    print(f"  Result: {sample.get('white', {}).get('result', 'N/A')} - {sample.get('black', {}).get('result', 'N/A')}")

## Step 2: Perform Comprehensive Analysis

Running detailed analysis across all sections (milestones 1-9).

In [None]:
# Perform comprehensive analysis
print("Running comprehensive analysis...")
print("Note: This may take a few minutes if mistake analysis is enabled.\n")

date_range_str = f"{START_DATE} to {END_DATE}"

analysis_results = analytics_service.analyze_detailed(
    games=all_games,
    username=USERNAME,
    timezone=TIMEZONE,
    include_mistake_analysis=True,  # Milestone 8
    include_ai_advice=False,  # Milestone 9 - requires OpenAI API key
    date_range=date_range_str
)

print("‚úì Analysis complete!")
print(f"\nTotal games analyzed: {analysis_results.get('total_games', 0)}")

## Section 1: Overall Performance

General performance metrics including win rate and rating trends.

In [None]:
# Display Overall Performance
overall = analysis_results['sections']['overall_performance']

print("=" * 60)
print("OVERALL PERFORMANCE")
print("=" * 60)
print(f"Win Rate: {overall.get('win_rate', 0):.1f}%")
print(f"Rating Change: {overall.get('rating_change', 0):+.0f}")
print(f"Rating Trend: {overall.get('rating_trend', 'N/A')}")
print(f"Average Rating: {overall.get('avg_rating', 0):.0f}")
print(f"\nTotal Record:")
print(f"  Wins: {overall.get('total', {}).get('wins', 0)}")
print(f"  Losses: {overall.get('total', {}).get('losses', 0)}")
print(f"  Draws: {overall.get('total', {}).get('draws', 0)}")

# Display daily stats if available
daily_stats = overall.get('daily_stats', [])
if daily_stats:
    print(f"\nDaily Statistics: {len(daily_stats)} days")
    df_daily = pd.DataFrame(daily_stats)
    print("\nSample Daily Stats (first 5 days):")
    print(df_daily.head())

## Section 2: Color Performance

Performance comparison between playing White vs Black pieces.

In [None]:
# Display Color Performance
color_perf = analysis_results['sections']['color_performance']

print("=" * 60)
print("COLOR PERFORMANCE")
print("=" * 60)

# White performance
white = color_perf.get('white', {})
print("\nüî≤ WHITE PIECES:")
print(f"  Win Rate: {white.get('win_rate', 0):.1f}%")
print(f"  Games: {white.get('total', {}).get('wins', 0)}W - {white.get('total', {}).get('losses', 0)}L - {white.get('total', {}).get('draws', 0)}D")

# Black performance
black = color_perf.get('black', {})
print("\n‚¨õ BLACK PIECES:")
print(f"  Win Rate: {black.get('win_rate', 0):.1f}%")
print(f"  Games: {black.get('total', {}).get('wins', 0)}W - {black.get('total', {}).get('losses', 0)}L - {black.get('total', {}).get('draws', 0)}D")

# Comparison
diff = white.get('win_rate', 0) - black.get('win_rate', 0)
stronger = "White" if diff > 0 else "Black"
print(f"\nüìä Stronger Color: {stronger} ({abs(diff):.1f}% difference)")

## Section 3: ELO Progression

Rating changes over the analyzed period.

In [None]:
# Display ELO Progression
elo_prog = analysis_results['sections']['elo_progression']

print("=" * 60)
print("ELO PROGRESSION")
print("=" * 60)
print(f"Rating Change: {elo_prog.get('rating_change', 0):+.0f}")
print(f"Starting Rating: {elo_prog.get('start_rating', 0):.0f}")
print(f"Ending Rating: {elo_prog.get('end_rating', 0):.0f}")
print(f"Peak Rating: {elo_prog.get('peak_rating', 0):.0f}")
print(f"Lowest Rating: {elo_prog.get('lowest_rating', 0):.0f}")

# Display data points
data_points = elo_prog.get('data_points', [])
if data_points:
    print(f"\nRating Data Points: {len(data_points)}")
    df_elo = pd.DataFrame(data_points)
    print("\nSample ELO Data (first 5 games):")
    print(df_elo.head())

## Section 4: Termination Analysis

How games end - both wins and losses.

In [None]:
# Display Termination Analysis
term_wins = analysis_results['sections']['termination_wins']
term_losses = analysis_results['sections']['termination_losses']

print("=" * 60)
print("TERMINATION ANALYSIS")
print("=" * 60)

# Wins
print("\n‚úÖ HOW YOU WIN:")
print(f"  Total Wins: {term_wins.get('total_wins', 0)}")
wins_breakdown = term_wins.get('breakdown', {})
for method, count in sorted(wins_breakdown.items(), key=lambda x: x[1], reverse=True):
    pct = (count / term_wins.get('total_wins', 1)) * 100
    print(f"    {method}: {count} ({pct:.1f}%)")

# Losses
print("\n‚ùå HOW YOU LOSE:")
print(f"  Total Losses: {term_losses.get('total_losses', 0)}")
losses_breakdown = term_losses.get('breakdown', {})
for method, count in sorted(losses_breakdown.items(), key=lambda x: x[1], reverse=True):
    pct = (count / term_losses.get('total_losses', 1)) * 100
    print(f"    {method}: {count} ({pct:.1f}%)")

## Section 5: Opening Performance

Best and worst performing chess openings.

In [None]:
# Display Opening Performance
openings = analysis_results['sections']['opening_performance']

print("=" * 60)
print("OPENING PERFORMANCE")
print("=" * 60)

# Best openings
best = openings.get('best_openings', [])
print("\nüèÜ BEST OPENINGS:")
if best:
    for i, opening in enumerate(best[:5], 1):
        print(f"  {i}. {opening.get('opening', 'N/A')}")
        print(f"     Win Rate: {opening.get('win_rate', 0):.1f}% | Games: {opening.get('games', 0)}")
else:
    print("  No data available")

# Worst openings
worst = openings.get('worst_openings', [])
print("\nüìâ WORST OPENINGS:")
if worst:
    for i, opening in enumerate(worst[:5], 1):
        print(f"  {i}. {opening.get('opening', 'N/A')}")
        print(f"     Win Rate: {opening.get('win_rate', 0):.1f}% | Games: {opening.get('games', 0)}")
else:
    print("  No data available")

## Section 6: Opponent Strength Analysis

Performance against different rating ranges.

In [None]:
# Display Opponent Strength Analysis
opponent = analysis_results['sections']['opponent_strength']

print("=" * 60)
print("OPPONENT STRENGTH ANALYSIS")
print("=" * 60)
print(f"\nAverage Opponent Rating: {opponent.get('avg_opponent_rating', 0):.0f}")

# Performance by rating difference
rating_diff = opponent.get('by_rating_diff', {})
print("\nüìä Performance by Rating Difference:")
categories = ['much_lower', 'lower', 'similar', 'higher', 'much_higher']
labels = {
    'much_lower': '<<< Much Lower (-200+)',
    'lower': '<< Lower (-100 to -199)',
    'similar': '‚âà Similar (¬±99)',
    'higher': '>> Higher (+100 to +199)',
    'much_higher': '>>> Much Higher (+200+)'
}

for cat in categories:
    data = rating_diff.get(cat, {})
    if data.get('games', 0) > 0:
        print(f"\n  {labels[cat]}:")
        print(f"    Win Rate: {data.get('win_rate', 0):.1f}%")
        print(f"    Games: {data.get('games', 0)} ({data.get('wins', 0)}W-{data.get('losses', 0)}L-{data.get('draws', 0)}D)")

## Section 7: Time of Day Analysis

Performance based on when games are played.

In [None]:
# Display Time of Day Analysis
time_perf = analysis_results['sections']['time_of_day']

print("=" * 60)
print("TIME OF DAY ANALYSIS")
print("=" * 60)

time_periods = {
    'morning': 'üåÖ Morning (6am-12pm)',
    'afternoon': '‚òÄÔ∏è Afternoon (12pm-6pm)',
    'evening': 'üåÜ Evening (6pm-10pm)',
    'night': 'üåô Night (10pm-6am)'
}

for period, label in time_periods.items():
    data = time_perf.get(period, {})
    if data.get('games', 0) > 0:
        print(f"\n{label}:")
        print(f"  Win Rate: {data.get('win_rate', 0):.1f}%")
        print(f"  Games: {data.get('games', 0)} ({data.get('wins', 0)}W-{data.get('losses', 0)}L-{data.get('draws', 0)}D)")
        print(f"  Avg Rating: {data.get('avg_rating', 0):.0f}")

# Find best time
best_time = max(time_perf.items(), key=lambda x: x[1].get('win_rate', 0) if isinstance(x[1], dict) else 0)
if best_time and isinstance(best_time[1], dict) and best_time[1].get('games', 0) > 0:
    print(f"\n‚≠ê Best Performance: {time_periods.get(best_time[0], best_time[0])} ({best_time[1].get('win_rate', 0):.1f}% win rate)")

## Section 8: Mistake Analysis (Milestone 8)

Game stage mistake analysis using Stockfish engine (early game, middle game, endgame).

In [None]:
# Display Mistake Analysis
mistake_analysis = analysis_results['sections'].get('mistake_analysis', {})

print("=" * 60)
print("MISTAKE ANALYSIS (Stockfish)")
print("=" * 60)

# Check if analysis was performed
sample_info = mistake_analysis.get('sample_info', {})
if sample_info.get('analyzed_games', 0) > 0:
    print(f"\nSample Info:")
    print(f"  Total Games: {sample_info.get('total_games', 0)}")
    print(f"  Analyzed Games: {sample_info.get('analyzed_games', 0)}")
    print(f"  Sample %: {sample_info.get('sample_percentage', 0):.1f}%")
    
    # Weakest stage
    print(f"\n‚ö†Ô∏è Weakest Stage: {mistake_analysis.get('weakest_stage', 'N/A')}")
    print(f"   Reason: {mistake_analysis.get('weakest_stage_reason', 'N/A')}")
    
    # Display each stage
    stages = {
        'early': 'üìò Early Game (Moves 1-15)',
        'middle': 'üìó Middle Game (Moves 16-40)',
        'endgame': 'üìï Endgame (Moves 41+)'
    }
    
    for stage, label in stages.items():
        stage_data = mistake_analysis.get(stage, {})
        if stage_data.get('total_moves', 0) > 0:
            print(f"\n{label}:")
            print(f"  Total Moves Analyzed: {stage_data.get('total_moves', 0)}")
            print(f"  Inaccuracies: {stage_data.get('inaccuracies', 0)}")
            print(f"  Mistakes: {stage_data.get('mistakes', 0)}")
            print(f"  Blunders: {stage_data.get('blunders', 0)}")
            print(f"  Missed Opportunities: {stage_data.get('missed_opps', 0)}")
            print(f"  Avg CP Loss: {stage_data.get('avg_cp_loss', 0):.1f}")
            
            # Critical mistake game
            critical = stage_data.get('critical_mistake_game')
            if critical:
                print(f"  Worst Game: Move {critical.get('move_number', 'N/A')} - {critical.get('type', 'N/A')} ({critical.get('cp_loss', 0):.0f} CP loss)")
                print(f"    URL: {critical.get('game_url', 'N/A')}")
else:
    print("\n‚ö†Ô∏è Mistake analysis not available")
    print("   Reasons:")
    print("   - Engine analysis disabled")
    print("   - No games to analyze")
    print("   - Stockfish not installed or not in PATH")

## Section 9: AI Coaching Advice (Milestone 9)

AI-generated personalized coaching recommendations based on the analysis.

**Note:** Requires OpenAI API key to be configured.

In [None]:
# Display AI Coaching Advice
ai_advice = analysis_results['sections'].get('ai_coaching_advice', {})

print("=" * 60)
print("AI COACHING ADVICE")
print("=" * 60)

if ai_advice and ai_advice.get('advice'):
    print("\nü§ñ Personalized Recommendations:\n")
    print(ai_advice.get('advice', 'No advice generated'))
    
    # Metadata
    metadata = ai_advice.get('metadata', {})
    if metadata:
        print(f"\n---")
        print(f"Generated: {metadata.get('timestamp', 'N/A')}")
        print(f"Model: {metadata.get('model', 'N/A')}")
        print(f"Tokens: {metadata.get('tokens_used', 'N/A')}")
else:
    print("\n‚ö†Ô∏è AI coaching advice not available")
    print("   Reasons:")
    print("   - OpenAI API key not configured")
    print("   - AI advice generation disabled")
    print("   - Analysis data insufficient")
    print("\nTo enable AI coaching:")
    print("   1. Set OPENAI_API_KEY in .env file")
    print("   2. Set include_ai_advice=True in analysis")

## Summary: Complete Analysis Results

View the complete JSON structure of all analysis results.

In [None]:
# Display complete analysis structure (abbreviated)
print("=" * 60)
print("COMPLETE ANALYSIS SUMMARY")
print("=" * 60)

print(f"\nTotal Games: {analysis_results.get('total_games', 0)}")
print(f"\nAvailable Sections:")
for section_name in analysis_results.get('sections', {}).keys():
    print(f"  ‚úì {section_name}")

# Optional: Save to JSON file
save_json = False  # Set to True to save results

if save_json:
    output_file = f"chess_analysis_{USERNAME}_{START_DATE}_to_{END_DATE}.json"
    with open(output_file, 'w') as f:
        json.dump(analysis_results, f, indent=2, default=str)
    print(f"\n‚úì Results saved to: {output_file}")
else:
    print("\nüìù Set save_json=True to export results to JSON file")