In [2]:
import pandas as pd

In [4]:
df_today = pd.read_csv('data/2025/2025_05_23_game_.csv')

In [5]:
import requests
def inning_run_1(game_id):
    url = f"https://statsapi.mlb.com/api/v1/game/{game_id}/linescore"
    try:
        response = requests.get(url)
        data = response.json()

        inning_scores = data.get("innings", [])
        if len(inning_scores) >= 1:
            home_runs = inning_scores[0]['home']['runs']
            away_runs = inning_scores[0]['away']['runs']
            return 1 if (home_runs > 0 or away_runs > 0) else 0
        return 0
    except Exception as e:
        print(f"Error con gamePk {game_id}: {e}")
        return 0


In [7]:
# Models

import os
import joblib

path_model = './model'  # Carpeta donde están los .pkl
models = {}

for file_ in os.listdir(path_model):
    if file_.endswith('.pkl'):
        
        name = file_.replace('.pkl', '')
        models[name] = joblib.load(os.path.join(path_model, file_))

In [9]:
model =models['Gradient _Boosting']

In [10]:
features = [
    "home_pitcher_true_freq", "away_pitcher_true_freq",
    "home_pitcher_vs_team_freq", "away_pitcher_vs_team_freq",
    "home_pitcher_vs_team_freq_count", "away_pitcher_vs_team_freq_count",
    "home_pitcher_last3_freq_1st", "away_pitcher_last3_freq_1st",
    "home_pitcher_momentum", "away_pitcher_momentum",
    "home_pitcher_vs_away_team_momentum", "away_pitcher_vs_home_team_momentum",
    "home_team_inning1_scaled", "away_team_inning1_scaled",
    "umpire_inning1_scaled", "stadium_inning1_scaled"
]

In [27]:

threshold = 0.85
X = df_today[features]
y_proba = model.predict_proba(X)
y_pred = (y_proba[:, 1] >= threshold).astype(int)
# Crear DataFrame copia con columnas nuevas
df_result = df_today.copy()
df_result[f'proba_0'] = y_proba[:, 0]  # probabilidad clase 0
df_result[f'proba_1'] = y_proba[:, 1]  # probabilidad clase 1
df_result[f'pred'] = y_pred
df_filtrado = df_result[(df_result[f'proba_1'] >= threshold)]
columnas_resultado = [
        'game_id', 'home_team', 'away_team',
        f'proba_0', f'proba_1', f'pred'
    ] + [col for col in features if col in df_filtrado.columns]


In [28]:
from IPython.display import display
columnas_a_mostrar = ['home_team', 'away_team', 'proba_0', 'proba_1', 'pred']

display(df_filtrado[columnas_a_mostrar])

Unnamed: 0,home_team,away_team,proba_0,proba_1,pred
0,Boston Red Sox,Baltimore Orioles,0.086059,0.913941,1
4,Washington Nationals,San Francisco Giants,0.005782,0.994218,1


In [35]:
df_result[columnas_a_mostrar]

Unnamed: 0,home_team,away_team,proba_0,proba_1,pred
0,Boston Red Sox,Baltimore Orioles,0.086059,0.913941,1
1,Cincinnati Reds,Chicago Cubs,0.458613,0.541387,0
2,Pittsburgh Pirates,Milwaukee Brewers,0.339228,0.660772,0
3,Washington Nationals,San Francisco Giants,0.005782,0.994218,1
4,Tampa Bay Rays,Toronto Blue Jays,0.39887,0.60113,0
5,Detroit Tigers,Cleveland Guardians,0.269852,0.730148,0
6,New York Mets,Los Angeles Dodgers,0.245951,0.754049,0
7,Atlanta Braves,San Diego Padres,0.737201,0.262799,0
8,Chicago White Sox,Texas Rangers,0.259989,0.740011,0
9,Houston Astros,Seattle Mariners,0.610714,0.389286,0


In [37]:
threshold = 0.75
df_result = df_result[(df_result[f'proba_1'] >= threshold) | (df_result[f'proba_0'] >= threshold)]
display(df_result[columnas_a_mostrar])

Unnamed: 0,home_team,away_team,proba_0,proba_1,pred
0,Boston Red Sox,Baltimore Orioles,0.086059,0.913941,1
3,Washington Nationals,San Francisco Giants,0.005782,0.994218,1
6,New York Mets,Los Angeles Dodgers,0.245951,0.754049,0
11,St. Louis Cardinals,Arizona Diamondbacks,0.762713,0.237287,0
