In [5]:
import requests
import pandas as pd
import time
import numpy as np

In [6]:
# SEASON = "2025"
# BASE_URL = "https://statsapi.mlb.com/api/v1"

# # Step 1: Get all MLB teams
# teams_url = f"{BASE_URL}/teams?sportId=1"
# teams_data = requests.get(teams_url).json()
# teams = teams_data.get("teams", [])

# all_players = []

# # Step 2: Get roster for each team, excluding pitchers
# for team in teams:
#     team_id = team["id"]
#     team_name = team["name"]
#     roster_url = f"{BASE_URL}/teams/{team_id}/roster"
    
#     roster_data = requests.get(roster_url).json()
#     roster = roster_data.get("roster", [])
    
#     for player in roster:
#         position = player["position"]["name"]
#         if position == "Pitcher":
#             continue  # skip pitchers
        
#         all_players.append({
#             "PlayerId": player["person"]["id"],
#             "PlayerName": player["person"]["fullName"],
#             "TeamId": team_id,
#             "TeamName": team_name,
#             "Position": position
#         })
        


# # Step 3: Get hitting stats for those players
# all_player_stats = []

# for player in all_players:
#     player_id = player["PlayerId"]
#     stats_url = f"{BASE_URL}/people/{player_id}/stats"
    
#     params = {
#         "stats": "season",
#         "group": "hitting",
#         "season": SEASON,
#         "gameType": "R"
#     }
    
#     stats_data = requests.get(stats_url, params=params).json()
    
#     all_player_stats.append({
#         **player,
#         "RawStats": stats_data
#     })
    
#     time.sleep(0.05)  # polite delay

# # Step 4: Flatten the nested RawStats JSON
# flattened_list = []

# for row in all_player_stats:
#     player_info = {
#         "PlayerId": row["PlayerId"],
#         "PlayerName": row["PlayerName"],
#         "TeamName": row["TeamName"],
#         "Position": row["Position"]
#     }
    
#     raw = row["RawStats"]
    
#     if "stats" in raw and raw["stats"]:
#         splits = raw["stats"][0].get("splits", [])
#         if splits and "stat" in splits[0]:
#             stats = splits[0]["stat"]
#             combined = {**player_info, **stats}
#             flattened_list.append(combined)
#         else:

#             flattened_list.append(player_info)
#     else:
#         flattened_list.append(player_info)

# # Step 5: Convert to DataFrame
# df_flat = pd.DataFrame(flattened_list)
# df_flat.to_csv('API Output-Batters.csv', index=False)

In [7]:
SEASON = "2025"
BASE_URL = "https://statsapi.mlb.com/api/v1"

# Step 1: Get all MLB teams
teams_url = f"{BASE_URL}/teams?sportId=1"
teams_data = requests.get(teams_url).json()
teams = teams_data.get("teams", [])

all_players = []

# Step 2: Get roster for each team, excluding pitchers
for team in teams:
    team_id = team["id"]
    team_name = team["name"]
    roster_url = f"{BASE_URL}/teams/{team_id}/roster"
    
    roster_data = requests.get(roster_url).json()
    roster = roster_data.get("roster", [])
    
    for player in roster:
        position = player["position"]["name"]
        if position != "Pitcher":
            continue  # skip non-pitchers
        
        all_players.append({
            "PlayerId": player["person"]["id"],
            "PlayerName": player["person"]["fullName"],
            "TeamId": team_id,
            "TeamName": team_name,
            "Position": position
        })
        


# Step 3: Get hitting stats for those players
all_player_stats = []

for player in all_players:
    player_id = player["PlayerId"]
    stats_url = f"{BASE_URL}/people/{player_id}/stats"
    
    params = {
        "stats": "season",
        "group": "pitching",
        "season": SEASON,
        "gameType": "R"
    }
    
    stats_data = requests.get(stats_url, params=params).json()
    
    all_player_stats.append({
        **player,
        "RawStats": stats_data
    })
    
    time.sleep(0.05)  # polite delay

# Step 4: Flatten the nested RawStats JSON
flattened_list = []

for row in all_player_stats:
    player_info = {
        "PlayerId": row["PlayerId"],
        "PlayerName": row["PlayerName"],
        "TeamName": row["TeamName"],
        "Position": row["Position"]
    }
    
    raw = row["RawStats"]
    
    if "stats" in raw and raw["stats"]:
        splits = raw["stats"][0].get("splits", [])
        if splits and "stat" in splits[0]:
            stats = splits[0]["stat"]
            combined = {**player_info, **stats}
            flattened_list.append(combined)
        else:

            flattened_list.append(player_info)
    else:
        flattened_list.append(player_info)

# Step 5: Convert to DataFrame
df_flat = pd.DataFrame(flattened_list)
df_flat.to_csv('API Output-Pitchers.csv', index=False)