In [1]:
import xgboost as xgb
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load your dataset
# Assuming your data is in a CSV file or can be loaded into a DataFrame
data = pd.read_csv('/Users/nickdimmitt/Desktop/lumber/ncaab/daily-predict.csv')  # Replace with your actual data file
categorical_columns = ['Conf_home', 'Conf_away']

# Encode categorical columns using LabelEncoder (or OneHotEncoder if that was used during training)
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

X = data.drop(columns=['home','away'])  # Drop the target column or any non-feature columns

# List of model file paths (replace with your actual paths)
model_files = ['models/away_1h_score_xgb_model.json', 'models/home_1h_score_xgb_model.json', 'models/away_2h_score_xgb_model.json','models/home_2h_score_xgb_model.json','models/total_xgb_model.json']  # Add as many models as you have

# Dictionary to store predictions
predictions = {}

# Load each model, perform predictions, and save the output
for idx, model_file in enumerate(model_files):
    # Load the model
    model = xgb.Booster()
    model.load_model(model_file)
    
    # Predict on the dataset
    preds = model.predict(xgb.DMatrix(X))
    
    # Store the predictions with a unique column name
    predictions[f'prediction_model_{idx+1}'] = preds

# Convert predictions to a DataFrame
predictions_df = pd.DataFrame(predictions)

# Combine the predictions with the original data (optional)
result = pd.concat([data, predictions_df], axis=1)

# Save the combined result to a CSV file
result.to_csv('predictions_with_models.csv', index=False)

print(result)


                      home                 away  Rk_home  Conf_home  \
0                 Canisius     Western Michigan      339          9   
1           William & Mary     Georgia Southern      229          6   
2               New Mexico           St. John's       53         11   
3                 Campbell                 Navy      305          6   
4                Stonehill           Lindenwood      348         12   
5                Princeton            Merrimack       91          8   
6             Saint Mary's             Nebraska       42         20   
7                  Clemson            Boise St.       35          0   
8         Jacksonville St.     Coastal Carolina      232          7   
9                  Harvard             Colorado      261          8   
10              Coppin St.             Miami FL      363         10   
11               Manhattan  Fairleigh Dickinson      308          9   
12      North Carolina A&T          The Citadel      262          6   
13    

In [2]:
result.to_csv("/Users/nickdimmitt/Desktop/lumber/ncaab/daily-preds-2.csv")