In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('/Users/colesprouse/Desktop/Projects/Personal Projects/sports_modeling/data/train_df.csv')

In [4]:
df.drop('Unnamed: 0',axis=1,inplace=True)

In [6]:
df.columns

Index(['point_differential', 'home_offensive_efficiency',
       'away_offensive_efficiency', 'home_defensive_efficiency',
       'away_defensive_efficiency'],
      dtype='object')

In [8]:
data = df

In [10]:
pip install lightgbm

Collecting lightgbm
  Obtaining dependency information for lightgbm from https://files.pythonhosted.org/packages/56/f3/6f77fe5fa45722582e52efc2833288fa670abfbec57eea433e83820f5b90/lightgbm-4.1.0-py3-none-macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64.whl.metadata
  Downloading lightgbm-4.1.0-py3-none-macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64.whl.metadata (19 kB)
Downloading lightgbm-4.1.0-py3-none-macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: lightgbm
Successfully installed lightgbm-4.1.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

# Preparing the data
X = data.drop(['point_differential'], axis=1)
y = data['point_differential']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initializing models
rf_model = RandomForestRegressor(random_state=42)
xgb_model = XGBRegressor(random_state=42)

# Training and evaluating models
models = [rf_model, xgb_model]
model_names = ['Random Forest', 'XGBoost']
r2_scores = {}

for model, name in zip(models, model_names):
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    r2_scores[name] = r2_score(y_test, y_pred)

r2_scores



from sklearn.metrics import mean_squared_error
import numpy as np

# RMSE scores for the models
rmse_scores = {}

for model, name in zip(models, model_names):
    y_pred = model.predict(X_test_scaled)
    rmse_scores[name] = np.sqrt(mean_squared_error(y_test, y_pred))

# Predicting point differentials for a few samples from the test set
sample_indices = [0, 1, 2, 3, 4]  # Sample index for prediction
sample_X_test = X_test_scaled[sample_indices]
sample_predictions = {}

for model, name in zip(models, model_names):
    sample_predictions[name] = model.predict(sample_X_test)

rmse_scores, sample_predictions



{'Random Forest': 0.9973426576180144, 'XGBoost': 0.9938437763315187}

### Read in todays today

In [15]:
nov_16 = pd.read_csv('/Users/colesprouse/Desktop/Projects/Personal Projects/sports_modeling/data/11_16Final.csv')

In [20]:
nov_16.drop('Unnamed: 0',axis=1,inplace=True)
nov_16.dropna(inplace=True)

In [25]:

def predict_point_differential(input_df, scaler, rf_model):
    # Extracting the necessary features
    features = input_df[['home_offensive_efficiency', 'away_offensive_efficiency',
                         'home_defensive_efficiency', 'away_defensive_efficiency']]
    
    # Scaling the features
    features_scaled = scaler.transform(features)
    
    # Predicting the point differential
    predicted_diff = rf_model.predict(features_scaled)
    
    # Constructing the output dataframe
    output_df = input_df[['home_team', 'away_team', 'home_point_spread']].copy()
    output_df['predicted_point_differential'] = predicted_diff
    
    return output_df


In [27]:
rdf = predict_point_differential(nov_16,scaler,rf_model)

In [39]:
def bet_suggestions(df):
    """
    Adds a 'Bet Suggestion' column to the dataframe based on the logic.

    Parameters:
    df (DataFrame): DataFrame with columns 'home_point_spread' and 'predicted_point_differential'.

    Returns:
    DataFrame: Original DataFrame with an added 'Bet Suggestion' column.
    """
    bet_suggestions = []

    for _, row in df.iterrows():
        spread_abs = abs(row['home_point_spread'])
        ppd = row['predicted_point_differential']

        if ppd > 0:
            if spread_abs < ppd:
                suggestion = f"Bet on {row['home_team']}"
            else:
                suggestion = f"Bet on {row['away_team']}"
        else:  # ppd < 0
            if spread_abs > abs(ppd):
                suggestion = f"Bet on {row['away_team']}"
            else:
                suggestion = f"Bet on {row['home_team']}"

        bet_suggestions.append(suggestion)

    df['Bet Suggestion'] = bet_suggestions
    return df

# Apply the function to your dataframe
result_df = bet_suggestions(rdf)


In [40]:
result_df

Unnamed: 0,home_team,away_team,home_point_spread,predicted_point_differential,Bet Suggestion
1,Montana,North Dakota State,-8.0,8.57,Bet on Montana
2,Minnesota,Missouri,2.0,9.43,Bet on Minnesota
3,William & Mary,Omaha,1.0,7.79,Bet on William & Mary
4,Notre Dame,Auburn,14.5,-3.29,Bet on Auburn
6,North Florida,Presbyterian,7.5,6.3,Bet on Presbyterian
7,New Mexico,UT Arlington,-14.0,-13.96,Bet on UT Arlington
8,California,Montana State,-13.5,4.18,Bet on Montana State
9,Hawaii,Niagara,-13.5,11.3,Bet on Niagara
10,Wake Forest,Utah,6.0,6.45,Bet on Wake Forest
11,Southern Illinois,Chicago State,-12.5,8.59,Bet on Chicago State


In [41]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.


In [42]:
import joblib

# Save the scaler
joblib.dump(scaler, '/Users/colesprouse/Desktop/Projects/Personal Projects/sports_modeling/scaler.joblib')

# Save the Random Forest model
joblib.dump(rf_model, '/Users/colesprouse/Desktop/Projects/Personal Projects/sports_modeling/random_forest_model.joblib')


['/Users/colesprouse/Desktop/Projects/Personal Projects/sports_modeling/random_forest_model.joblib']