In [39]:
import pandas as pd

In [40]:
df_2025 = pd.read_csv('./data/2025/data_2025_test.csv')

In [41]:
features = [
    "home_pitcher_true_freq", "away_pitcher_true_freq",
    "home_pitcher_vs_team_freq", "away_pitcher_vs_team_freq",
    "home_pitcher_vs_team_freq_count", "away_pitcher_vs_team_freq_count",
    "home_pitcher_last3_freq_1st", "away_pitcher_last3_freq_1st",
    "home_pitcher_momentum", "away_pitcher_momentum",
    "home_pitcher_vs_away_team_momentum", "away_pitcher_vs_home_team_momentum",
    "home_team_inning1_scaled", "away_team_inning1_scaled",
    "umpire_inning1_scaled", "stadium_inning1_scaled"
]

In [42]:
import requests
def inning_run_1(game_id):
    url = f"https://statsapi.mlb.com/api/v1/game/{game_id}/linescore"
    try:
        response = requests.get(url)
        data = response.json()

        inning_scores = data.get("innings", [])
        if len(inning_scores) >= 1:
            home_runs = inning_scores[0]['home']['runs']
            away_runs = inning_scores[0]['away']['runs']
            return 1 if (home_runs > 0 or away_runs > 0) else 0
        return 0
    except Exception as e:
        print(f"Error con gamePk {game_id}: {e}")
        return 0


In [43]:
df_2025['target']=df_2025['game_id'].apply(inning_run_1)

In [44]:
df_2025

Unnamed: 0,game_id,home_team,away_team,stadium,day_or_night,home_pitcher,away_pitcher,home_pitcher_true_freq,away_pitcher_true_freq,home_pitcher_vs_team_freq,...,away_team_momentum,home_pitcher_momentum,away_pitcher_momentum,home_pitcher_vs_away_team_momentum,away_pitcher_vs_home_team_momentum,home_plate_umpire_inning1_freq,stadium_inning1_freq,stadium_inning1_scaled,umpire_inning1_scaled,target
0,777869,Texas Rangers,Houston Astros,Globe Life Field,Día,Jack Leiter,Framber Valdez,0.428571,0.222222,-1.00,...,-0.522200,0.238095,-0.222222,-0.760295,0.006546,0.562500,0.423077,0.191763,0.778846,0
1,777873,Baltimore Orioles,Washington Nationals,Oriole Park at Camden Yards,Día,Zach Eflin,Michael Soroka,0.083333,0.125000,0.00,...,-0.137184,0.250000,-0.125000,-0.387184,-0.447760,0.722222,0.504950,0.699534,1.000000,1
2,777880,Philadelphia Phillies,Pittsburgh Pirates,Citizens Bank Park,Día,Mick Abel,Paul Skenes,0.000000,0.100000,-1.00,...,-0.345591,0.000000,-0.100000,-0.345591,-0.561538,0.470588,0.504854,0.698937,0.651584,0
3,777876,Boston Red Sox,Atlanta Braves,Fenway Park,Día,Brayan Bello,Spencer Schwellenbach,0.352941,0.076923,-1.00,...,-0.429222,0.313725,0.589744,-0.742948,-1.357473,0.600000,0.519231,0.788098,0.830769,1
4,777871,Toronto Blue Jays,Detroit Tigers,Rogers Centre,Día,José Berríos,Jackson Jobe,0.095238,0.500000,1.00,...,-0.313300,-0.095238,0.166667,-0.218061,-0.607939,0.000000,0.490196,0.608028,0.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,777812,Athletics,Los Angeles Angels,Sutter Health Park,Día,Luis Severino,Tyler Anderson,0.181818,0.166667,-1.00,...,-0.080720,-0.181818,0.500000,0.101098,-0.913008,0.600000,0.523810,0.795367,0.879104,1
62,777821,Detroit Tigers,Cleveland Guardians,Comerica Park,Noche,Jack Flaherty,Tanner Bibee,0.125000,0.368421,0.25,...,-0.339400,0.208333,-0.368421,-0.547733,0.078131,0.370370,0.504762,0.715830,0.000000,0
63,777814,Pittsburgh Pirates,Milwaukee Brewers,PNC Park,Noche,Mike Burrows,Aaron Civale,0.000000,0.466667,-1.00,...,-0.409756,0.000000,0.200000,-0.409756,-0.756660,0.542857,0.451923,0.495192,0.660341,1
64,777820,Washington Nationals,Atlanta Braves,Nationals Park,Noche,Trevor Williams,AJ Smith-Shawver,0.222222,0.400000,0.00,...,-0.429222,0.111111,-0.400000,-0.540333,0.019887,0.371429,0.403846,0.294439,0.004051,1


In [45]:
# Models

import os
import joblib

path_model = './model'  # Carpeta donde están los .pkl
models = {}

for file_ in os.listdir(path_model):
    if file_.endswith('.pkl'):
        name = file_.replace('.pkl', '')
        models[name] = joblib.load(os.path.join(path_model, file_))


In [46]:
result = {}

for name_model, model in models.items():
    pred = model.predict(df_2025[features])
    result[name_model] = pred

In [47]:
for model_name, pred in result.items():
    df_2025[f'pred_{model_name}'] = pred

In [55]:
df_2025.columns

Index(['game_id', 'home_team', 'away_team', 'stadium', 'day_or_night',
       'home_pitcher', 'away_pitcher', 'home_pitcher_true_freq',
       'away_pitcher_true_freq', 'home_pitcher_vs_team_freq',
       'away_pitcher_vs_team_freq', 'home_pitcher_vs_team_freq_count',
       'away_pitcher_vs_team_freq_count', 'home_pitcher_last3_freq_1st',
       'away_pitcher_last3_freq_1st', 'home_team_inning1_last10_freq',
       'away_team_inning1_last10_freq', 'home_team_inning1_scaled',
       'away_team_inning1_scaled', 'home_team_momentum', 'away_team_momentum',
       'home_pitcher_momentum', 'away_pitcher_momentum',
       'home_pitcher_vs_away_team_momentum',
       'away_pitcher_vs_home_team_momentum', 'home_plate_umpire_inning1_freq',
       'stadium_inning1_freq', 'stadium_inning1_scaled',
       'umpire_inning1_scaled', 'target', 'pred_LogisticRegression',
       'pred_RandomForest', 'pred_Naive_Bayes', 'pred_SVM',
       'pred_Gradient _Boosting', 'pred_XGBoost'],
      dtype='object')

In [56]:
model_pred = [
  "pred_LogisticRegression",	"pred_RandomForest",	"pred_Gradient _Boosting",	"pred_SVM",	"pred_Naive_Bayes",	"pred_XGBoost"
]

In [57]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [58]:
metrics = {}
for modelo in model_pred:
    y_true = df_2025['target']
    y_pred = df_2025[modelo]
    
    metrics[modelo] = {
        'accuracy': accuracy_score(y_true, y_pred),
        'f1_score': f1_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred)
    }

# Convertimos a DataFrame para visualizar
df_metrics= pd.DataFrame(metrics).T 

In [59]:
df_metrics  

Unnamed: 0,accuracy,f1_score,precision,recall
pred_LogisticRegression,0.454545,0.142857,0.333333,0.090909
pred_RandomForest,0.560606,0.472727,0.590909,0.393939
pred_Gradient _Boosting,0.530303,0.311111,0.583333,0.212121
pred_SVM,0.545455,0.482759,0.56,0.424242
pred_Naive_Bayes,0.5,0.652632,0.5,0.939394
pred_XGBoost,0.515152,0.238095,0.555556,0.151515
