In [2]:

# ids because mlb can't design a good api
import json
import os
import csv

userDir = 'C:/Users/jeb'
gameDir = userDir + '/2022/games'
boxscoreDir = userDir + '/2022/boxscores'
pitchingDir = userDir + '/2022/pitching'
battingDir = userDir + '/2022/batting'



with open(userDir + '/teams.json', 'r') as f:
    # Load the contents of the file into a dictionary
    teams = json.load(f)['teams']
    
with open(userDir + '/seasons.json', 'r') as f:
    # Load the contents of the file into a dictionary
    seasons = json.load(f)['seasons']

def writeDataToFile(data, fileName):
    # Create the directory if it doesn't exist
    directory = os.path.dirname(fileName)
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(fileName + '.json', "w") as json_file:
        # Write the data to the file in JSON format
        json.dump(data, json_file)
    # # uncomment to write as csv
    # with open(fileName + '.csv' , "w", newline="") as csv_file:
    #     # Create a CSV writer object
    #     writer = csv.writer(csv_file)
    #     # Write the header row
    #     writer.writerow(data[0].keys())
    #     # Write the data rows
    #     for row in data:
    #         writer.writerow(row.values())

def writeGameData(data, team):
    file = gameDir + '/' + team['teamCode'] 
    writeDataToFile(data, file)

def writeBoxscore(data, teamAbbreviation, gameId):
    file = boxscoreDir + '/' + teamAbbreviation + '/' + teamAbbreviation + '-' + gameId
    writeDataToFile(data, file)

def removeDuplicates(list):
    result = []
    for item in list:
        if item not in result:
            result.append(item)
    return result

leagues = []
divisions = []
sports = []

# Print the contents of the dictionary
for team in teams:
    leagues.append(team['league'])
    divisions.append(team['division'])
    sports.append(team['sport'])

leagues = removeDuplicates(leagues)
divisions = removeDuplicates(divisions)
sports = removeDuplicates(sports)

def getTeamById(id):
    for item in teams:
        if item.get('id') == id:
            return item
    return None

def getTeamByCode(teamCode):
    for item in teams:
        if item.get('teamCode') == teamCode:
            return item
    return None 

def getSeasonById(seasons, season_id):
    for season in seasons:
        if season['seasonId'] == season_id:
            return season
    return None   

mostRecentSeason = getSeasonById(seasons, '2022')
print('done loading utils')

done loading utils


In [4]:
# import requests
import statsapi

#ONE TIME POPULATION

### gets all seasons
# for division in divisions:
#     for league in leagues:
#         params = {
#             'all': True,
#             'sportId': 1,
#             'divisionId': division['id'],
#             'leagueId': league['id']
#         }
#         print(statsapi.get('seasons', params, force=False))

### gets all games in a season
def getSeasonGameData(season):
    startDate = season['seasonStartDate']
    endDate = season['seasonEndDate']
    for team in teams:
        games = statsapi.schedule(start_date=startDate, end_date=endDate, team=team['id'])
        writeGameData(games, team)
# getSeasonGameData(mostRecentSeason)

#you need to manually rename all folders because the abbreviations in game objects are different than team ids
#for example this creates the folder LAN which is actually LAD for LA dodgers
### takes games.json from above and gets all game boxscores for each team
def readGameData():
    files = os.listdir(gameDir)
    for file in files:
        currentTeamAbbr = os.path.splitext(file)[0]
        print("Starting Reading Games for " + currentTeamAbbr)
        filePath = os.path.join(gameDir, file)
        with open(filePath, 'r') as jsonFile:
            games = json.load(jsonFile)
            for game in games:
                currentGameId = str(game['game_id'])
                data = statsapi.boxscore_data(currentGameId, timecode=None)
                writeBoxscore(data, currentTeamAbbr, currentGameId)
        print("Finished Reading Games for " + currentTeamAbbr)
        
#readGameData()



Starting Reading Games for ana
Finished Reading Games for ana
Starting Reading Games for ari
Finished Reading Games for ari
Starting Reading Games for atl


KeyboardInterrupt: 

In [26]:
import os
import json

def load_json_data(file_path):
    with open(file_path) as file:
        json_data = json.load(file)
    return json_data


def create_team_object(team_info, team_stats):
    team_object = {
        'id': team_info['id'],
        'abbreviation': team_info['abbreviation'],
        'batting': team_stats['batting'],
        'pitching': team_stats['pitching']
    }
    return team_object


def create_game_object(folder_name, json_data):
    game_object = {}

    # Create away team object
    away_team_info = json_data['teamInfo']['away']
    away_team_stats = json_data['away']['teamStats']
    game_object['awayTeam'] = create_team_object(away_team_info, away_team_stats)

    # Create home team object
    home_team_info = json_data['teamInfo']['home']
    home_team_stats = json_data['home']['teamStats']
    game_object['homeTeam'] = create_team_object(home_team_info, home_team_stats)

    # Determine the outcome based on team's performance
    team_name = folder_name.lower()
    is_away_team = away_team_info['abbreviation'].lower() == team_name
    is_away_team_loss = away_team_stats['batting']['runs'] < home_team_stats['batting']['runs']

    if is_away_team and is_away_team_loss:
        game_object['outcome'] = 'loss'
    else:
        game_object['outcome'] = 'win'


    return game_object


# Define the directory path
directory_path = r'C:\Users\jeb\2022\boxscores'

# Collect unique folder names
unique_folders = collect_unique_folder_names(directory_path)

# Create an empty dictionary to store the game results
games_data = {}

# Iterate over each unique folder
for folder_name in unique_folders:
    # Create an empty list to store the game data
    games_data[folder_name] = []

    # Define the folder path
    folder_path = os.path.join(directory_path, folder_name)

    # Iterate over the files in the folder
    for file_name in os.listdir(folder_path):
        # Check if the file is a JSON file
        if file_name.endswith('.json'):
            # Define the file path
            file_path = os.path.join(folder_path, file_name)

            # Load the JSON data from the file
            json_data = load_json_data(file_path)

            # Create the game object
            game_object = create_game_object(folder_name, json_data)

            # Add the game object to the respective folder's game data list
            games_data[folder_name].append(game_object)

# Iterate over each folder and game data
for folder_name, game_data in games_data.items():
    print(f"Folder: {folder_name}")
    for game_index, game in enumerate(game_data, start=1):
        outcome = game['outcome']
        away_team_abbreviation = game['awayTeam']['abbreviation']
        home_team_abbreviation = game['homeTeam']['abbreviation']
        away_team_runs = game['awayTeam']['batting']['runs']
        home_team_runs = game['homeTeam']['batting']['runs']

        print(f"Game {game_index}: Outcome: {outcome}, Away Team: {away_team_abbreviation} ({away_team_runs} runs), Home Team: {home_team_abbreviation} ({home_team_runs} runs)")


Folder: tb
Game 1: Outcome: loss, Away Team: TB (0 runs), Home Team: BAL (3 runs)
Game 2: Outcome: win, Away Team: TB (6 runs), Home Team: BAL (4 runs)
Game 3: Outcome: loss, Away Team: TB (3 runs), Home Team: BAL (5 runs)
Game 4: Outcome: loss, Away Team: TB (1 runs), Home Team: BAL (5 runs)
Game 5: Outcome: loss, Away Team: TB (1 runs), Home Team: BAL (2 runs)
Game 6: Outcome: loss, Away Team: TB (0 runs), Home Team: BAL (1 runs)
Game 7: Outcome: win, Away Team: TB (7 runs), Home Team: BAL (6 runs)
Game 8: Outcome: loss, Away Team: TB (3 runs), Home Team: MIL (5 runs)
Game 9: Outcome: loss, Away Team: TB (3 runs), Home Team: MIL (4 runs)
Game 10: Outcome: loss, Away Team: TB (4 runs), Home Team: NYY (10 runs)
Game 11: Outcome: win, Away Team: TB (4 runs), Home Team: NYY (2 runs)
Game 12: Outcome: win, Away Team: TB (3 runs), Home Team: NYY (1 runs)
Game 13: Outcome: loss, Away Team: TB (7 runs), Home Team: NYY (8 runs)
Game 14: Outcome: loss, Away Team: TB (6 runs), Home Team: BAL (7