In [None]:
import pandas as pd
import joblib

def load_model(model_path):
    """Loads the trained model from file."""
    return joblib.load(model_path)

def preprocess_data(file_path):
    """Loads and preprocesses the input data for inference."""
    data = pd.read_csv(file_path)
    data = data[['team','seed','rank','net_rating','off_rating','off_rating_rank','def_rating','def_rating_rank','adj_tempo','adj_tempo_rank','free_throws','free_throws_rank','two_pt_fg','two_pt_fg_rank','three_pt_fg','three_pt_fg_rank','def_free_throws','def_free_throws_rank','def_two_pt_fg','def_two_pt_fg_rank','def_three_pt_fg','def_three_pt_fg_rank','avg_hgt','avg_hgt_rank','eff_hgt','eff_hgt_rank','c_hgt','c_hgt_rank','pf_hgt','pf_hgt_rank','sf_hgt','sf_hgt_rank','sg_hgt','sg_hgt_rank','pg_hgt','pg_hgt_rank','experience','experience_rank','bench','bench_rank']]
    return data

def make_predictions(model, X):
    """Generates probability predictions using the trained model."""
    return model.predict_proba(X.drop('team', axis=1))

def normalize_probabilities(probs_list):
    """Normalizes probabilities to ensure logical consistency across rounds."""
    probs_df = pd.DataFrame(probs_list).cumprod()
    return probs_df.div(probs_df.max(axis=1), axis=0)

if __name__ == "__main__":
    r32_model_path = "best_classification_model_round_32.pkl"
    s16_model_path = "best_classification_model_sweet_16.pkl"
    e8_model_path = "best_classification_model_elite_8.pkl"
    f4_model_path = "best_classification_model_final_4.pkl"
    f_model_path = "best_classification_model_championship.pkl"
    c_model_path = "best_classification_model_champion.pkl"
    
    new_data_path = "../data/inference.csv"
    
    r32_model = load_model(r32_model_path)
    s16_model = load_model(s16_model_path)
    e8_model = load_model(e8_model_path)
    f4_model = load_model(f4_model_path)
    f_model = load_model(f_model_path)
    c_model = load_model(c_model_path)
    
    new_data = preprocess_data(new_data_path)
    
    r32_predictions = make_predictions(r32_model, new_data)[:, 1]
    s16_predictions = make_predictions(s16_model, new_data)[:, 1]
    e8_predictions = make_predictions(e8_model, new_data)[:, 1]
    f4_predictions = make_predictions(f4_model, new_data)[:, 1]
    f_predictions = make_predictions(f_model, new_data)[:, 1]
    c_predictions = make_predictions(c_model, new_data)[:, 1]
    
    normalized_probs = normalize_probabilities([r32_predictions, s16_predictions, e8_predictions, f4_predictions, f_predictions, c_predictions])
    
    new_data['round_32_prob'] = normalized_probs.iloc[0]
    new_data['sweet_16_prob'] = normalized_probs.iloc[1]
    new_data['elite_8_prob'] = normalized_probs.iloc[2]
    new_data['final_4_prob'] = normalized_probs.iloc[3]
    new_data['championship_prob'] = normalized_probs.iloc[4]
    new_data['champion_prob'] = normalized_probs.iloc[5]
    
    new_data.to_csv("predictions.csv", index=False)
    print("Normalized probability predictions for all models appended and saved to predictions.csv")

Normalized probability predictions for all models appended and saved to predictions.csv
