In [None]:
import requests
from datetime import timedelta, date, datetime
import json
import pandas as pd
import os
import csv
import sys
import time

In [None]:
sys.path.append('../../code')  # Replace '/path/to/code' with the actual path to your 'code' directory
from scrape_new import daterange, get_daily_game_ids

In [None]:
def extract_team_batting_stats(team_data, team_type, stats_list, custom_game_id):
    # Access team-level pitching statistics
    team_stats = team_data.get(team_type, {}).get('statistics', {}).get('hitting', {})
    team_stats_fielding = team_data.get(team_type, {}).get('statistics', {}).get('fielding', {})
    
    if team_stats:
        ab = team_stats['overall'].get('ab', 'N/A')  
        r = team_stats['overall']['runs'].get('total', 'N/A')  
        h = team_stats['overall']['onbase'].get('h', 'N/A')  
        rbi = team_stats['overall'].get('rbi', 'N/A')
        bb = team_stats['overall']['onbase'].get('bb', 'N/A')  
        so = team_stats['overall']['outs'].get('ktotal', 'N/A') 
        pa = team_stats['overall'].get('ap', 'N/A')
        batting_avg = team_stats['overall'].get('avg', 'N/A')
        onbase_perc = team_stats['overall'].get('obp', 'N/A')
        slugging_perc = team_stats['overall'].get('slg', 'N/A')
        onbase_plus_slugging = team_stats['overall'].get('ops', 'N/A')
        pitches = team_stats['overall'].get('pitch_count', 'N/A') # Total pitches
        strikes_total = team_stats['overall'].get('pitches', {}).get('ktotal', 'N/A')  # Total pitches
        wpa_bat = 'N/A'
        leverage_index_avg = 'N/A'
        wpa_bat_pos = 'N/A'
        wpa_bat_neg = 'N/A'
        re24_bat = 'N/A'
        po= team_stats_fielding['overall'].get('po', 'N/A')
        a = team_stats_fielding['overall'].get('a', 'N/A')
     
        stats_list.append({
            'home_away': team_type,
            'ab': ab,
            'r': r,
            'h': h,
            'rbi': rbi,
            'bb': bb,
            'so': so,
            'pa': pa,
            'batting_avg': batting_avg,
            'onbase_perc': onbase_perc,
            'slugging_perc': slugging_perc,
            'onbase_plus_slugging': onbase_plus_slugging,
            'pitches': pitches,
            'strikes_total': strikes_total,
            'wpa_bat': wpa_bat,
            'leverage_index_avg': leverage_index_avg,
            'wpa_bat_pos': wpa_bat_pos,
            'wpa_bat_neg': wpa_bat_neg,
            're24_bat': re24_bat,
            'po': po,
            'a': a,
            'game_id': custom_game_id
        })
        
    return stats_list

In [None]:
def fetch_and_store_batting_stats(api_key, start_date, delay_seconds=1):
    team_stats = []  # Initialize the list outside the for loop to maintain scope throughout the function
    
    for current_date in daterange(start_date):
        year = current_date.year
        month = current_date.month
        day = current_date.day
        game_ids = get_daily_game_ids(api_key, year, month, day)
        for game_id in game_ids[:1]:  # Limiting to the first game for testing or example purposes
            url = f'https://api.sportradar.com/mlb/trial/v7/en/games/{game_id}/extended_summary.json?api_key={api_key}'
            response = requests.get(url)

            if response.status_code == 200:
                data = response.json()
                game_info = data['game']
                date = datetime.strptime(game_info['scheduled'], "%Y-%m-%dT%H:%M:%S%z").strftime("%Y%m%d")
                day_night = game_info['day_night']
                custom_game_id = f"{game_info['home']['abbr']}{game_info['away']['abbr']}{date}{day_night}"

                # Extract team stats for home and away
                extract_team_batting_stats(data['game'], 'home', team_stats, custom_game_id)
                extract_team_batting_stats(data['game'], 'away', team_stats, custom_game_id)
                
                # Delay after processing each game to manage API rate limits
                time.sleep(delay_seconds)
            else:
                print(f"Failed to fetch data: {response.status_code} {response.text}")
                # Optionally, add delay here if retries are intended or possible
                time.sleep(delay_seconds)

            # Break after processing the first game for testing
            break
            
    # Define the path for the file
    file_path = '../../data/batting-test-1.csv'

    # Ensure the directory exists, create if not
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    # Writing to CSV
    with open(file_path, 'w', newline='') as csvfile:
        fieldnames = [
            'home_away',
            'ab',  # At bats
            'r',   # Runs
            'h',   # Hits
            'rbi', # Runs batted in
            'bb',  # Walks (Base on Balls)
            'so',  # Strikeouts
            'pa',  # Plate appearances
            'batting_avg',
            'onbase_perc',
            'slugging_perc',
            'onbase_plus_slugging',
            'pitches',  # Total pitches faced by the team (if applicable)
            'strikes_total',  # Total strikes
            'wpa_bat',
            'leverage_index_avg',
            'wpa_bat_pos',
            'wpa_bat_neg',
            're24_bat',
            'po',  # Putouts (Fielding)
            'a',   # Assists (Fielding)
            'game_id'
        ]

        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for stat in team_stats:
            writer.writerow(stat)

        print(f"Team batting data written to '{file_path}'")

In [None]:
api_key = ''  # Replace with your actual API key
start_date = datetime(2024, 3, 1).date()
fetch_and_store_batting_stats(api_key, start_date, delay_seconds=1)