In [1]:
# Install any packages needed for model training
%pip install requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Set API Key from SportRadar and import needed libraries for data gathering
import requests
import pandas as pd
import time
from dotenv import load_dotenv
import os

load_dotenv()

API_KEY = os.getenv("API_KEY")
SEASON_YEAR = 2024
BASE_URL = "https://api.sportradar.com/mlb/trial/v8/en"

In [8]:
# Function to get the schedule for MLB games on a certain date
def get_games_on_date(date_str):
    # Split string to get month, day, and year on their own for API url
    date_list = date_str.split("-")
    # Enter data to get sportsradar schedule
    url = f"{BASE_URL}/games/{date_list[2]}/{date_list[0]}/{date_list[1]}/schedule.json?api_key={API_KEY}"
    resp = requests.get(url)
    # If response code is not 200 return nothing
    if resp.status_code != 200:
        return []
    return resp.json().get("games", [])

get_games_on_date("07-02-2025")

[{'id': 'ad7077b7-4185-457b-8cf4-15982f7f26a0',
  'status': 'closed',
  'coverage': 'full',
  'game_number': 1,
  'day_night': 'N',
  'scheduled': '2025-07-03T00:05:00+00:00',
  'home_team': 'd99f919b-1534-4516-8e8a-9cd106c6d8cd',
  'away_team': '75729d34-bca7-4a0f-b3df-6f26c6ad3719',
  'attendance': 27636,
  'duration': '2:19',
  'double_header': False,
  'entry_mode': 'STOMP',
  'reference': '777268',
  'venue': {'name': 'Globe Life Field',
   'market': 'Texas',
   'capacity': 40000,
   'surface': 'turf',
   'address': '734 Stadium Drive',
   'city': 'Arlington',
   'state': 'TX',
   'zip': '76011',
   'country': 'USA',
   'id': 'b4372592-9dac-4387-864b-dfc94dffd645',
   'field_orientation': 'SW',
   'stadium_type': 'retractable',
   'time_zone': 'US/Central',
   'location': {'lat': '32.747300', 'lng': '-97.081820'}},
  'home': {'name': 'Rangers',
   'market': 'Texas',
   'abbr': 'TEX',
   'id': 'd99f919b-1534-4516-8e8a-9cd106c6d8cd',
   'win': 43,
   'loss': 44},
  'away': {'name': 

In [None]:
# Function to get to total runs for a particular game
def get_total_runs(game_id):
    # Get API url
    url = f"{BASE_URL}/games/{game_id}/boxscore.json?api_key={API_KEY}"
    resp = requests.get(url)
    if resp.status_code != 200:
        return None
    data = resp.json()
    # Access runs from home and away team, and add them together for total
    try:
        home_runs = data["game"]["home"]["runs"]
        away_runs = data["game"]["away"]["runs"]
        return home_runs + away_runs
    except:
        return None

get_total_runs("ad7077b7-4185-457b-8cf4-15982f7f26a0")

6

In [None]:
# Function to return overall team statistics such as ERA, Batting Average, OBP, etc.
def get_team_stats(team_id):
    # Get 2025 statistics
    url = f"{BASE_URL}/seasons/2025/REG/teams/{team_id}/statistics.json?api_key={API_KEY}"
    resp = requests.get(url)
    if resp.status_code != 200:
        return None
    data = resp.json()
    try:
        # Write preset paths so I do not have to continously write long paths
        batting = data["statistics"]["hitting"]["overall"]
        pitching = data["statistics"]["pitching"]["overall"]
        fielding = data["statistics"]["fielding"]["overall"]
        return {
            # Batting avg returns string so some reason, so parse to float
            "batting_avg": float(batting["avg"]),
            "batting_obp": batting["obp"],
            "batting_slg": batting["slg"],
            "era": pitching["era"],
            "whip": pitching["whip"],
            "hr_allowed": pitching["onbase"]["hr"],
            "fpct": fielding["fpct"]
        }
    except:
        return None
    
get_team_stats("25507be1-6a68-4267-bd82-e097d94b359b")

{'batting_avg': 0.255,
 'batting_obp': 0.33,
 'batting_slg': 0.45,
 'era': 4.703,
 'whip': 1.3358,
 'hr_allowed': 113,
 'fpct': 0.986}