In [1]:
import statsapi
import pandas as pd
import numpy as np
from datetime import datetime
import re

def parse_box_score(box_score):
    """Parse traditional box score string into structured data."""
    lines = box_score.strip().split('\n')
    headers = re.findall(r'\d+', lines[0])
    team_scores = {}
    
    for line in lines[1:]:
        parts = line.split()
        team_name = ' '.join(parts[:-len(headers)-3])
        scores = parts[-len(headers)-3:-3]
        runs, hits, errors = parts[-3], parts[-2], parts[-1]
        team_scores[team_name] = {
            'Inning Scores': scores,
            'Runs': runs,
            'Hits': hits,
            'Errors': errors
        }
    
    return team_scores

# Define the period for analysis
start_date = '2023-04-01'
end_date = '2023-04-30'

# Fetch schedule for all games played between start_date and end_date
sched = statsapi.schedule(start_date=start_date, end_date=end_date)

# Prepare data for DataFrame
data = []

# Process each game in the schedule
for game in sched:
    game_id = game['game_id']
    print(f"Fetching linescore for game ID {game_id}")
    
    try:
        linescore = statsapi.linescore(game_id)
        if isinstance(linescore, str):  # Assuming linescore returns a string format
            parsed_data = parse_box_score(linescore)
            # Append data for each team
            for team, stats in parsed_data.items():
                for inning, score in enumerate(stats['Inning Scores'], start=1):
                    data.append((team, game_id, 'Offensive', inning, score))
                # Derive defensive data (scores of the opponent)
                for opponent_team, opponent_stats in parsed_data.items():
                    if team != opponent_team:
                        for inning, score in enumerate(opponent_stats['Inning Scores'], start=1):
                            data.append((team, game_id, 'Defensive', inning, score))
        else:
            print("Invalid format or missing innings data")
    except Exception as e:
        print(f"Error fetching linescore for game ID {game_id}: {e}")

# Creating the DataFrame
columns = ['Team', 'Game ID', 'Type', 'Inning', 'Score']
df = pd.DataFrame(data, columns=columns)

# Setting MultiIndex
df.set_index(['Team', 'Game ID', 'Type', 'Inning'], inplace=True)

# Display the DataFrame
print(df)


Fetching linescore for game ID 718760
Fetching linescore for game ID 718759
Fetching linescore for game ID 718758
Fetching linescore for game ID 718752
Fetching linescore for game ID 718753
Fetching linescore for game ID 718762
Fetching linescore for game ID 718757
Fetching linescore for game ID 718761
Fetching linescore for game ID 718756
Fetching linescore for game ID 718754
Fetching linescore for game ID 718751
Fetching linescore for game ID 718755
Fetching linescore for game ID 718750
Fetching linescore for game ID 718749
Fetching linescore for game ID 718746
Fetching linescore for game ID 718742
Fetching linescore for game ID 718748
Fetching linescore for game ID 718743
Fetching linescore for game ID 718747
Fetching linescore for game ID 718744
Fetching linescore for game ID 718741
Fetching linescore for game ID 718745
Fetching linescore for game ID 718740
Fetching linescore for game ID 718737
Fetching linescore for game ID 718738
Fetching linescore for game ID 718734
Fetching lin