## 1. Setup and Imports

In [3]:
# Import necessary modules
import sys # sys is needed to modify the path
import os # os is needed to get the current working directory

# If you don't know what is meant by path here, it means the list of directories Python searches for modules to import.
# We need to add the parent directory's src and utils folders to the path
# os.path.dirname gets the parent directory, since we're in the notebooks folder
parent_dir = os.path.dirname(os.getcwd())

# Add src directory to path
sys.path.append(os.path.join(parent_dir, 'src'))
sys.path.append(os.path.join(parent_dir, 'utils'))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import custom modules
from data_fetcher import MLBDataFetcher #MLBDataFetcher is a class that fetches MLB data
from data_processor import MLBDataProcessor #MLBDataProcessor is a class that processes MLB data
from analytics import BattingAnalytics, PitchingAnalytics, TeamAnalytics # BattingAnalytics, PitchingAnalytics, TeamAnalytics are classes for performing analytics
from visualizations import StatsVisualizer # StatsVisualizer is a class for visualizing stats
from helpers import get_current_season, TEAM_IDS # get_current_season is a function that returns the current MLB season, TEAM_IDS is a dictionary of team IDs

# Display settings
pd.set_option('display.max_columns', None) #This means to display all columns when printing a DataFrame
pd.set_option('display.width', None) #This means to not limit the width when printing a DataFrame
    
print("âœ“ All modules imported successfully!")

SyntaxError: unterminated triple-quoted string literal (detected at line 299) (data_fetcher.py, line 272)

## 2. Fetching Player Data

Let's fetch some player statistics from the MLB API.

In [None]:
# Initialize the data fetcher
fetcher = MLBDataFetcher()

# Get current season
current_season = get_current_season()
print(f"Analyzing data for the {current_season} season")

In [None]:
# Search for a player - Example: Aaron Judge
player_name = "Aaron Judge"
players = fetcher.search_players(player_name)

if players:
    player = players[0]
    player_id = player['id']
    print(f"Found: {player['fullName']} (ID: {player_id})")
    print(f"Position: {player.get('primaryPosition', {}).get('name', 'N/A')}")
    print(f"Team: {player.get('currentTeam', {}).get('name', 'N/A')}")
else:
    print(f"Player '{player_name}' not found")
    player_id = None

In [None]:
# Fetch player statistics (if player was found)
if player_id:
    player_stats = fetcher.get_player_stats(player_id, current_season, stat_group="hitting")
    print(f"\nSuccessfully fetched statistics for {player_stats.get('fullName', 'Unknown')}")

## 3. Processing Statistics

Now let's process and structure the data.

In [None]:
# Initialize the data processor
processor = MLBDataProcessor()

# Extract batting statistics
if player_id and player_stats:
    batting_df = processor.extract_batting_stats(player_stats)
    
    if not batting_df.empty:
        # Convert to numeric
        batting_df = processor.convert_numeric_columns(batting_df, exclude_cols=['season'])
        
        print("\nBatting Statistics:")
        print(batting_df.to_string(index=False))
    else:
        print("No batting statistics available")

## 4. Advanced Analytics

Calculate advanced baseball metrics.

In [None]:
# Initialize analytics modules
batting_analytics = BattingAnalytics()
pitching_analytics = PitchingAnalytics()

# Example: Calculate advanced metrics from custom data
print("\n=== Advanced Batting Metrics Example ===")

# Sample statistics
hits = 180
doubles = 35
triples = 3
home_runs = 45
walks = 75
at_bats = 550
strikeouts = 150
hbp = 5

# Calculate metrics
avg = batting_analytics.calculate_batting_average(hits, at_bats)
singles = hits - doubles - triples - home_runs
obp = batting_analytics.calculate_on_base_percentage(hits, walks, hbp, at_bats)
slg = batting_analytics.calculate_slugging_percentage(singles, doubles, triples, home_runs, at_bats)
ops = batting_analytics.calculate_ops(obp, slg)
iso = batting_analytics.calculate_iso(slg, avg)
babip = batting_analytics.calculate_babip(hits, home_runs, at_bats, strikeouts)

print(f"Batting Average (AVG): {avg:.3f}")
print(f"On-Base Percentage (OBP): {obp:.3f}")
print(f"Slugging Percentage (SLG): {slg:.3f}")
print(f"OPS: {ops:.3f}")
print(f"Isolated Power (ISO): {iso:.3f}")
print(f"BABIP: {babip:.3f}")

In [None]:
# Example: Pitching analytics
print("\n=== Pitching Analytics Example ===")

# Sample pitching statistics
earned_runs = 65
innings_pitched = 200.0
hits_allowed = 175
walks_allowed = 45
strikeouts_pitched = 245
home_runs_allowed = 22

# Calculate pitching metrics
era = pitching_analytics.calculate_era(earned_runs, innings_pitched)
whip = pitching_analytics.calculate_whip(walks_allowed, hits_allowed, innings_pitched)
k9 = pitching_analytics.calculate_k_per_9(strikeouts_pitched, innings_pitched)
bb9 = pitching_analytics.calculate_bb_per_9(walks_allowed, innings_pitched)
k_bb = pitching_analytics.calculate_k_bb_ratio(strikeouts_pitched, walks_allowed)
fip = pitching_analytics.calculate_fip(home_runs_allowed, walks_allowed, 8, strikeouts_pitched, innings_pitched)

print(f"ERA: {era:.2f}")
print(f"WHIP: {whip:.2f}")
print(f"K/9: {k9:.2f}")
print(f"BB/9: {bb9:.2f}")
print(f"K/BB Ratio: {k_bb:.2f}")
print(f"FIP: {fip:.2f}")

## 5. Visualizations

Create visualizations to analyze the data.

In [None]:
# Initialize visualizer
viz = StatsVisualizer()

# Create sample comparison data
comparison_data = pd.DataFrame({
    'playerName': ['Judge', 'Trout', 'Ohtani', 'Betts'],
    'homeRuns': [45, 38, 44, 35],
    'rbi': [115, 95, 105, 98],
    'avg': [.285, .295, .275, .310],
    'ops': [.950, .920, .940, .900],
    'stolenBases': [8, 12, 15, 22]
})

print("Player Comparison Data:")
print(comparison_data)

In [None]:
# Create batting comparison chart
fig = viz.plot_batting_comparison(
    comparison_data,
    metrics=['homeRuns', 'rbi', 'ops'],
    figsize=(15, 5)
)
plt.show()

In [None]:
# Create scatter plot comparison
fig = viz.plot_scatter_comparison(
    comparison_data,
    x_metric='avg',
    y_metric='homeRuns',
    label_col='playerName',
    show_correlation=True
)
plt.show()

In [None]:
# Create career trajectory (sample data)
career_data = pd.DataFrame({
    'season': [2019, 2020, 2021, 2022, 2023, 2024],
    'homeRuns': [27, 9, 39, 62, 37, 45],
    'avg': [.272, .257, .287, .311, .267, .285],
    'ops': [.921, .756, .916, 1.111, .882, .950]
})

fig = viz.plot_career_trajectory(
    career_data,
    metric='homeRuns',
    player_name='Aaron Judge'
)
plt.show()

## 6. Player Comparisons and Rankings

In [None]:
# Create correlation heatmap
fig = viz.plot_heatmap(
    comparison_data,
    metrics=['homeRuns', 'rbi', 'avg', 'ops', 'stolenBases'],
    title='Player Statistics Correlation Matrix'
)
plt.show()

In [None]:
# Create radar chart for individual player
player_metrics = {
    'Power': 0.92,      # Normalized home run rate
    'Contact': 0.75,    # Normalized batting average
    'Patience': 0.85,   # Normalized walk rate
    'Speed': 0.35,      # Normalized stolen bases
    'Production': 0.88  # Normalized RBI
}

fig = viz.plot_radar_chart(
    player_metrics,
    metrics=['Power', 'Contact', 'Patience', 'Speed', 'Production'],
    player_name='Aaron Judge'
)
plt.show()

## Summary

This notebook demonstrated:
- Fetching player data from the MLB Stats API
- Processing and structuring baseball statistics
- Calculating advanced metrics (OPS, FIP, wOBA, etc.)
- Creating various visualizations for analysis
- Comparing players across multiple dimensions

### Next Steps
- Fetch team-level statistics
- Analyze historical trends
- Build predictive models
- Export analysis reports

In [None]:
print("\n" + "="*50)
print("Analysis Complete!")
print("="*50)