In [None]:
import requests
from datetime import timedelta, date, datetime
import json
import pandas as pd
import os
import csv
import sys
import time

In [None]:
sys.path.append('../../code')  # Replace '/path/to/code' with the actual path to your 'code' directory
from scrape_new import daterange, get_daily_game_ids

In [None]:
def extract_pitchers(team_data, team_type, pitchers, custom_game_id):
    for player in team_data.get('players', []):
        # Ensure that 'statistics' and 'pitching' keys are present
        stats = player.get('statistics', {})
        pitching_stats = stats.get('pitching', {})
        
        earned_run_avg = pitching_stats.get('seasontd', {}).get('era', '0')
        
        
        if 'overall' in pitching_stats:
            pitcher_name = f"{player.get('preferred_name', '')} {player.get('last_name', '')}"
            ip = pitching_stats['overall'].get('ip_2', '0')  # Innings pitched
            h = pitching_stats['overall']['onbase'].get('h', 'N/A')  # Hits
            r = pitching_stats['overall']['runs'].get('total', 'N/A')  # Runs
            er = pitching_stats['overall']['runs'].get('earned', 'N/A')  # Earned runs
            bb = pitching_stats['overall']['onbase'].get('bb', 'N/A')  # Walks
            so = pitching_stats['overall']['outs'].get('ktotal', 'N/A')  # Strikeouts
            hr = pitching_stats['overall']['onbase'].get('hr', 'N/A')  # Home runs
            batters_faced = pitching_stats['overall'].get('bf', 'N/A')  # Batters faced
            pitches = pitching_stats['overall'].get('pitches', {}).get('count', 'N/A')  # Total pitches
            strikes_total = pitching_stats['overall'].get('pitches', {}).get('ktotal', 'N/A')  # Total pitches
            
            strikes_contact = 'N/A'
            strikes_swinging = pitching_stats['overall'].get('outcome', {}).get('kswing', 'N/A')# Swinging strikes
            strikes_looking = pitching_stats['overall'].get('outcome', {}).get('klook', 'N/A')
            inplay_gb_total = pitching_stats['overall'].get('in_play', {}).get('groundball', 'N/A')  # Ground balls
            inplay_fb_total = pitching_stats['overall'].get('in_play', {}).get('flyball', 'N/A')  # Fly balls
            inplay_ld = pitching_stats['overall'].get('in_play', {}).get('linedrive', 'N/A')  # Line drives
            inplay_unk = 'N/A'  # Unknown inplay
            game_score = 'N/A'
            inherited_runners = pitching_stats['overall'].get('runs', {}).get('ir', 'N/A')
            inherited_score = pitching_stats['overall'].get('runs', {}).get('ira', 'N/A')
            wpa_def = 'N/A'
            leverage_index_avg = 'N/A'
            re24_def = 'N/A'

            pitchers.append({
                'home_away': team_type,
                'pitcher_name': pitcher_name,
                'ip': ip,
                'h': h,
                'r': r,
                'er': er,
                'bb': bb,
                'so': so,
                'hr': hr,
                'earned_run_avg': earned_run_avg,
                'batters_faced': batters_faced,
                'pitches': pitches,
                'strikes_total': strikes_total,
                'strikes_contact': strikes_contact,
                'strikes_swinging': strikes_swinging,
                'strikes_looking': strikes_looking,
                'inplay_gb_total': inplay_gb_total,
                'inplay_fb_total': inplay_fb_total,
                'inplay_ld': inplay_ld,
                'inplay_unk': inplay_unk,
                'game_score': game_score,
                'inherited_runners': inherited_runners,
                'inherited_score': inherited_score,
                'wpa_def': wpa_def,
                'leverage_index_avg': leverage_index_avg,
                're24_def': re24_def,
                'game_id': custom_game_id
            })

    return pitchers

In [None]:
def fetch_and_store_pitchers(api_key, start_date, delay_seconds=1):
    pitchers = []  # Initialize the list outside the for loop to maintain scope throughout the function

    for current_date in daterange(start_date):
        year = current_date.year
        month = current_date.month
        day = current_date.day
        game_ids = get_daily_game_ids(api_key, year, month, day)
        
        for game_id in game_ids:  # Iterating over each game ID
            url = f'https://api.sportradar.com/mlb/trial/v7/en/games/{game_id}/extended_summary.json?api_key={api_key}'
            response = requests.get(url)

            if response.status_code == 200:
                data = response.json()
                game_info = data['game']
                home_abbr = game_info['home']['abbr'] if 'home' in game_info else 'unknown'
                away_abbr = game_info['away']['abbr'] if 'away' in game_info else 'unknown'
                date = datetime.strptime(game_info['scheduled'], "%Y-%m-%dT%H:%M:%S%z").strftime("%Y%m%d")
                day_night = game_info['day_night']
                custom_game_id = f"{home_abbr}{away_abbr}{date}{day_night}"

                extract_pitchers(data['game']['home'], 'home', pitchers, custom_game_id)
                extract_pitchers(data['game']['away'], 'away', pitchers, custom_game_id)
                
                # Delay after processing each game to manage API rate limits
                time.sleep(delay_seconds)
            else:
                print(f"Failed to fetch data: {response.status_code} {response.text}")
                # Optionally, add delay here if retries are intended or possible
                time.sleep(delay_seconds)

    # Define the path for the file
    file_path = '../../data/pitchers-test-1.csv'

    # Ensure the directory exists, create if not
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    # Writing to CSV
    with open(file_path, 'w', newline='') as csvfile:
        fieldnames = [
            'home_away',
            'pitcher_name',
            'ip',
            'h',
            'r',
            'er',
            'bb',
            'so',
            'hr',
            'earned_run_avg',
            'batters_faced',
            'pitches',
            'strikes_total',
            'strikes_contact',
            'strikes_swinging',
            'strikes_looking',
            'inplay_gb_total',
            'inplay_fb_total',
            'inplay_ld',
            'inplay_unk',
            'game_score',
            'inherited_runners',
            'inherited_score',
            'wpa_def',
            'leverage_index_avg',
            're24_def',
            'game_id'
        ]

        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for pitcher in pitchers:
            writer.writerow(pitcher)

        print(f"Pitching data written to '{file_path}'")

In [None]:
api_key = ''  # Replace with your actual API key
start_date = datetime(2024, 3, 1).date()
fetch_and_store_pitchers(api_key, start_date, delay_seconds=1)