In [7]:
import warnings 
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import datetime as dt
import copy
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)


# Background/Ideas

- Features will be based off seasonal,weekly,career, and last 5 based values
- Idea is that certain players which can be differentiated by career based values paired with weekly performance and or seasonal (team strength proxy) can be paired to build something relatively predictive.
- Interactivity can be dependent on clicking and choosing assortment of players and identifying/projecting current projections.
- Data seems to get updated weekly so these predictions would change over time as well.

# Data

This section focuses on pulling the data and prepping/aggregating the dependent variable. (Fantasy Points)

In [8]:
roster_data = nfl.import_seasonal_rosters([2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000,1999])
pbp_df = pd.DataFrame(nfl.import_pbp_data([2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000,1999]))
weekly_df = pd.DataFrame(nfl.import_weekly_data([2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000,1999]))
# injuries_df = pd.DataFrame(nfl.import_injuries([2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000]))
schedules_df = pd.DataFrame(nfl.import_schedules([2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000,1999]))

2024 done.
2023 done.
2022 done.
2021 done.
2020 done.
2019 done.
2018 done.
2017 done.
2016 done.
2015 done.
2014 done.
2013 done.
2012 done.
2011 done.
2010 done.
2009 done.
2008 done.
2007 done.
2006 done.
2005 done.
2004 done.
2003 done.
2002 done.
2001 done.
2000 done.
1999 done.
Downcasting floats.
Downcasting floats.


# Base Data Transformations

- Getting General RB and WR stats and creating complete dataframe.

In [171]:
## Basic PBP Passing Stats

def get_opposing_team(df):
    if df['home_team'] == df['team']:
        val = df['away_team']
    elif df['away_team'] == df['team']:
        val = df['home_team']
    else:
        val = None

    return val




# team = roster_data[roster_data['depth_chart_position'].isin(['RB','WR'])][['season','player_id','team','depth_chart_position']]

team = roster_data[['season','player_id','team','depth_chart_position']]

receiver_rusher_stats =  pbp_df[(pbp_df['receiver_player_id'].notnull()) | (pbp_df['rusher_player_id'].notnull())]
                         

receiver_rusher_stats['two_points'] = np.where(receiver_rusher_stats['two_point_conv_result'] == 'success',1,0)
                         
receiver_rusher_stats.rename(columns = {'complete_pass':'reception'},inplace = True)



receiver_stats= receiver_rusher_stats.groupby(['game_id', 'game_date', 'week','div_game','posteam','defteam', 'home_team', 'away_team', 'weather', 'stadium',  'spread_line', 'total_line', 'roof', 'surface', 'temp', 'wind', 'home_coach', 'away_coach', 'receiver_player_id', 'receiver_player_name','season']).agg({
    'passing_yards': 'sum',
    'air_yards': 'sum',
    'pass_touchdown': 'sum', 
    'pass_attempt': 'sum',
    'reception': 'sum',
    'interception': 'sum', #the passing stats are duplicated for receivers
    'rush_attempt': 'sum',
    'rushing_yards': 'sum',# Sum passing yards
    'rush_touchdown': 'sum',
    'lateral_rush': 'sum',
    'receiving_yards': 'sum',
    'yards_after_catch':'sum',
    'touchdown':'sum',
    'fumble': 'sum',
    'two_points': 'sum'
}).reset_index()


rushing_stats = receiver_rusher_stats.groupby(['game_id', 'game_date', 'week', 'div_game','posteam','defteam', 'home_team', 'away_team', 'weather', 'stadium',  'spread_line', 'total_line', 'roof', 'surface', 'temp', 'wind', 'home_coach', 'away_coach', 'rusher_player_id', 'rusher_player_name','season']).agg({
    'passing_yards': 'sum',
    'air_yards': 'sum',
    'pass_touchdown': 'sum', 
    'pass_attempt': 'sum',
    'reception': 'sum',
    'interception': 'sum',
    'rush_attempt': 'sum',
    'rushing_yards': 'sum',# Sum passing yards
    'rush_touchdown': 'sum',
    'lateral_rush': 'sum',
    'receiving_yards': 'sum',
    'yards_after_catch': 'sum',
    'touchdown':'sum',
    'fumble': 'sum',
    'two_points': 'sum'
}).reset_index()


## Grabbing seasonal info


team['team'] = team['team'].replace({'OAK':'LV', 'STL':'LA', 'SD':'LAC','HST':'HOU', 'BLT':'BAL', 'CLV':'CLE','SL':'LA','ARZ':'ARI'})


# team.rename(columns = {'player_id':'passer_player_id'},inplace = True)

## Standardizing Columns
rushing_stats.rename(columns = {'rusher_player_id':'player_id'}, inplace = True)

receiver_stats.rename(columns = {'receiver_player_id':'player_id'}, inplace = True)

rushing_stats.rename(columns = {'rusher_player_name':'player_name'}, inplace = True)

receiver_stats.rename(columns = {'receiver_player_name':'player_name'}, inplace = True)


rusher_receiver_df = pd.concat([receiver_stats,rushing_stats])




rusher_receiver_df = rusher_receiver_df.merge(team, on = ['player_id','season'], how = 'inner')


## Aggregate average score to opposition 

rusher_receiver_df['opponent_team'] = rusher_receiver_df.apply(get_opposing_team,axis = 1)

print('Number Missing Opponent:' + str(rusher_receiver_df[rusher_receiver_df['opponent_team'].isna()].shape[0]))
rusher_receiver_df = rusher_receiver_df[~rusher_receiver_df['opponent_team'].isna()]



game_score_info = schedules_df[['season','home_score','away_score','game_id']].copy()




rusher_receiver_df = rusher_receiver_df.merge(game_score_info, on = ['game_id','season'], how = 'left')



rusher_receiver_df = rusher_receiver_df.groupby(['game_id', 'game_date', 'week', 'div_game', 'posteam','defteam','home_team', 'away_team', 'weather', 'stadium',  'spread_line', 'total_line', 'roof', 'surface', 'temp', 'wind', 'home_coach', 'away_coach', 'player_id', 'player_name','season','home_score','away_score','team','depth_chart_position']).agg({
    'passing_yards': 'sum',
    'air_yards': 'sum',
    'pass_touchdown': 'sum', 
    'pass_attempt': 'sum',
    'reception': 'sum',
    'interception': 'sum',
    'rush_attempt': 'sum',
    'rushing_yards': 'sum',# Sum passing yards
    'rush_touchdown': 'sum',
    'lateral_rush': 'sum',
    'receiving_yards': 'sum',
    'yards_after_catch': 'sum',
    'touchdown':'sum',
    'fumble': 'sum',
    'two_points': 'sum'
}).reset_index()


#Checking the passing stats dataframe
rusher_receiver_df.head(2)

  receiver_rusher_stats['two_points'] = np.where(receiver_rusher_stats['two_point_conv_result'] == 'success',1,0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  receiver_rusher_stats['two_points'] = np.where(receiver_rusher_stats['two_point_conv_result'] == 'success',1,0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  receiver_rusher_stats.rename(columns = {'complete_pass':'reception'},inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guid

Number Missing Opponent:1972


Unnamed: 0,game_id,game_date,week,div_game,posteam,defteam,home_team,away_team,weather,stadium,spread_line,total_line,roof,surface,temp,wind,home_coach,away_coach,player_id,player_name,season,home_score,away_score,team,depth_chart_position,passing_yards,air_yards,pass_touchdown,pass_attempt,reception,interception,rush_attempt,rushing_yards,rush_touchdown,lateral_rush,receiving_yards,yards_after_catch,touchdown,fumble,two_points
0,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,3.5,46.0,outdoors,grass,68.0,12.0,Steve Mariucci,Dan Reeves,00-0000316,J.Anderson,2001,16.0,13.0,ATL,RB,8.0,0.0,0.0,2.0,1.0,0.0,26.0,86.0,1.0,0.0,8.0,0.0,1.0,0.0,0
1,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,3.5,46.0,outdoors,grass,68.0,12.0,Steve Mariucci,Dan Reeves,00-0002876,C.Chandler,2001,16.0,13.0,ATL,QB,0.0,0.0,0.0,0.0,0.0,0.0,3.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0


# Creating Fantasy Points Column


Offensive Players:

- Passing Yards: 1 point per 25 yards
- Passing Touchdowns: 4 points
- Passing Interceptions: -2 points
- Rushing Yards: 1 point per 10 yards
- Rushing Touchdowns: 6 points
- Receptions: 1 points (only if using PPR scoring)
- Receiving Yards: 1 point per 10 yards
- Receiving Touchdowns: 6 points
- 2-Point Conversions: 2 points
- Fumbles Lost: -2 points
- Fumble Recovered for a Touchdown: 6 points

In [172]:
rusher_receiver_df['fantasy_points'] = ((rusher_receiver_df['passing_yards']/25 )
                                         + (rusher_receiver_df['pass_touchdown'] * 4) + 
                                         (rusher_receiver_df['interception'] * -2) +
                                         (rusher_receiver_df['reception'] * 1) +
                                         (rusher_receiver_df['touchdown'] * 6) +
                                         (rusher_receiver_df['receiving_yards'] * .1) +
                                         (rusher_receiver_df['fumble'] * -2) +
                                         (rusher_receiver_df['two_points'] * 2))

In [81]:
rusher_receiver_df.head(5)['fantasy_points']

0     8.12
1     1.14
2    13.24
3     6.64
4     7.36
Name: fantasy_points, dtype: float64

# Last 5

In [173]:

df_rusher_receiver_game_level = rusher_receiver_df.groupby(['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam', 'player_name', 'player_id']).agg({
    # Game level
    'home_team': 'first',
    'away_team': 'first',

    # Play level
    'fantasy_points': 'sum',
    'passing_yards': 'sum',
    'air_yards': 'sum',
    'pass_touchdown': 'sum', 
    'pass_attempt': 'sum',
    'reception': 'sum',
    'interception': 'sum',
    'rush_attempt': 'sum',
    'rushing_yards': 'sum',# Sum passing yards
    'rush_touchdown': 'sum',
    'lateral_rush': 'sum',
    'receiving_yards': 'sum',
    'yards_after_catch': 'sum',
    'touchdown':'sum',
    'fumble': 'sum',
    'two_points': 'sum'

})

df_rusher_receiver_game_level["home"] = df_rusher_receiver_game_level["home_team"] == df_rusher_receiver_game_level.index.get_level_values("posteam")
df_rusher_receiver_game_level.drop(columns=['home_team', 'away_team'], inplace=True)

In [174]:
def calc_agg_stats(group, fields, career=True):
    # Create a copy to avoid modifying the original
    # df = pd.DataFrame({'game_date': group['game_date']}, index=group.index)
    df = pd.DataFrame(index=group.index)
    
    # Sort chronologically
    group_sorted = group.sort_values('game_date')

    # Calculate the number of unique games for career, season, and prior season
    if career:
        df['n_games_career'] = range(len(group_sorted))

    df['n_games_season'] = group_sorted.groupby(
        group_sorted.index.get_level_values('season')
    ).cumcount()

    # df['n_games_prior_season'] = group_sorted.groupby(
    #     group_sorted.index.get_level_values('season')
    # ).transform('size').shift()



    # Calculate aggregate stats
    for field in fields:
        if career:
            # Career stats
            df[f'{field}_mean_career'] = group_sorted[field].transform(lambda x: x.expanding().mean().shift())
            df[f'{field}_total_career'] = group_sorted[field].transform(lambda x: x.expanding().sum().shift())
        
        # Season stats
        df[f'{field}_mean_season'] = group_sorted.groupby([group_sorted.index.get_level_values('season')])[field].transform(lambda x: x.expanding().mean().shift())
        df[f'{field}_total_season'] = group_sorted.groupby([group_sorted.index.get_level_values('season')])[field].transform(lambda x: x.expanding().sum().shift())

        # # Prior season stats
        # df[f'{field}_mean_prior_season'] = group_sorted.groupby([group_sorted.index.get_level_values('season') - 1])[field].transform('mean')
        
        # Last 5 games
        df[f'{field}_mean_last5'] = group_sorted[field].transform(lambda x: x.rolling(window=5, min_periods=1).mean().shift())
        df[f'{field}_total_last5'] = group_sorted[field].transform(lambda x: x.rolling(window=5, min_periods=1).sum().shift())
        # Last Game
        df[f'{field}_last'] = group_sorted[field].shift()
    return df

In [200]:
fields = ['fantasy_points','reception','rushing_yards','touchdown','receiving_yards','fumble','passing_yards','pass_touchdown','two_points']


# Apply the function
df_rusher_receiver_game_level = df_rusher_receiver_game_level.groupby(['player_name', 'player_id']).apply(calc_agg_stats, fields=fields)



KeyError: 'fantasy_points'

In [201]:
df_rusher_receiver_game_level = df_rusher_receiver_game_level.reset_index(0).reset_index(0).drop(columns = ['player_name','player_id']).reset_index()

# Opponent Last Scored

In [202]:
schedules_df_copy = schedules_df[schedules_df['game_id'].isin(schedules_df['game_id'].unique()) & (schedules_df['gameday'] >= '2001-09-09')]
schedules_df_copy.rename(columns = {'gameday':'game_date'}, inplace = True)

home_teams = schedules_df_copy[['game_id', 'game_date','season','home_team','away_score','week']].copy()

away_teams = schedules_df_copy[['game_id', 'game_date','season','away_team','home_score','week']].copy()

home_teams.rename(columns = {'home_team':'team','away_score':'points_allowed'}, inplace = True)
away_teams.rename(columns = {'away_team':'team','home_score':'points_allowed'}, inplace = True)

points_allowed_df = pd.concat([home_teams,away_teams])

points_allowed_df = points_allowed_df.groupby(['game_id', 'game_date','season','week','team']).agg({'points_allowed':'sum'})

group_sorted = points_allowed_df.sort_values('week')

pa_df = group_sorted.groupby(['team']).apply(calc_agg_stats, fields=['points_allowed']).reset_index(0).drop(columns = 'team').reset_index()[['game_id','game_date','season','week','team','points_allowed_mean_season','points_allowed_mean_last5']]


pa_df.rename(columns = {'team':'opponent_team'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedules_df_copy.rename(columns = {'gameday':'game_date'}, inplace = True)


### Features from pa_df are the oppositions points allowed until a certain measure of time

In [203]:

rusher_receiver_features = rusher_receiver_df.merge(df_rusher_receiver_game_level, how = 'inner' ,on = ['game_id','game_date','week','season','posteam','defteam','player_name','player_id'])
rusher_receiver_features['opponent_team'] = np.where(rusher_receiver_features['team'] == rusher_receiver_features['home_team'],rusher_receiver_features['away_team'],rusher_receiver_features['home_team'])
rusher_receiver_features = rusher_receiver_features.merge(pa_df , how = 'inner',on = ['game_date','season','week','opponent_team','game_id'])


rusher_receiver_features = rusher_receiver_features.fillna(0)

# Last Game Stats

In [204]:
df_combined = rusher_receiver_features.copy()

In [205]:
# Calculate the percentage of null values in each column
null_percentages = rusher_receiver_features.isnull().mean() * 100

# Sort the percentages in descending order for better readability
null_percentages = null_percentages.sort_values(ascending=False)

# Format the output to display percentages with two decimal places
null_percentages_formatted = null_percentages.apply(lambda x: f"{x:.2f}%")

# Print the results
print("Percentage of Null Values in Each Column:")
print(null_percentages_formatted)





Percentage of Null Values in Each Column:
game_id                      0.00%
touchdown_total_last5        0.00%
fumble_mean_season           0.00%
fumble_total_career          0.00%
fumble_mean_career           0.00%
                             ...  
rush_touchdown               0.00%
rushing_yards                0.00%
rush_attempt                 0.00%
interception                 0.00%
points_allowed_mean_last5    0.00%
Length: 109, dtype: object


In [None]:
# Ensure 'temp' and 'wind' are numeric
df_combined['temp'] = pd.to_numeric(df_combined['temp'], errors='coerce')
df_combined['wind'] = pd.to_numeric(df_combined['wind'], errors='coerce')

# Calculate mean 'temp' and 'wind' by stadium
temp_wind_means = (
    df_combined.groupby('stadium')[['temp', 'wind']]
    .mean()
    .reset_index()
)

# Merge the mean values back to the original DataFrame
df_combined = pd.merge(
    df_combined,
    temp_wind_means,
    on='stadium',
    how='left',
    suffixes=('', '_mean')
)

# Impute missing 'temp' and 'wind' with the group mean values
df_combined['temp'].fillna(df_combined['temp_mean'], inplace=True)
df_combined['wind'].fillna(df_combined['wind_mean'], inplace=True)

# If any missing 'temp' or 'wind' values remain, fill them with the overall mean
df_combined['temp'].fillna(df_combined['temp'].mean(), inplace=True)
df_combined['wind'].fillna(df_combined['wind'].mean(), inplace=True)

# Drop the temporary mean columns
df_combined.drop(columns=['temp_mean', 'wind_mean'], inplace=True)

# For the rest of the columns, fill missing values with 0
# Exclude 'temp' and 'wind' as they've already been imputed
columns_to_fill = df_combined.columns.difference(['temp', 'wind'])
df_combined[columns_to_fill] = df_combined[columns_to_fill].fillna(0)

# Check if any missing values remain
remaining_nulls = df_combined.isnull().sum()
if remaining_nulls.sum() > 0:
    print("Remaining null values after imputation:")
    print(remaining_nulls[remaining_nulls > 0])
else:
    print("All missing values have been imputed.")

In [207]:
x_vars = ['home_team','away_team','spread_line',
 'n_games_career',
 'n_games_season',
 'fantasy_points_mean_career',
 'fantasy_points_total_career',
 'fantasy_points_mean_season',
 'fantasy_points_total_season',
 'fantasy_points_mean_last5',
 'fantasy_points_total_last5',
 'fantasy_points_last',
 'reception_mean_career',
 'reception_total_career',
 'reception_mean_season',
 'reception_total_season',
 'reception_mean_last5',
 'reception_total_last5',
 'reception_last',
 'rushing_yards_mean_career',
 'rushing_yards_total_career',
 'rushing_yards_mean_season',
 'rushing_yards_total_season',
 'rushing_yards_mean_last5',
 'rushing_yards_total_last5',
 'rushing_yards_last',
 'touchdown_mean_career',
 'touchdown_total_career',
 'touchdown_mean_season',
 'touchdown_total_season',
 'touchdown_mean_last5',
 'touchdown_total_last5',
 'touchdown_last',
 'receiving_yards_mean_career',
 'receiving_yards_total_career',
 'receiving_yards_mean_season',
 'receiving_yards_total_season',
 'receiving_yards_mean_last5',
 'receiving_yards_total_last5',
 'receiving_yards_last',
 'fumble_mean_career',
 'fumble_total_career',
 'fumble_mean_season',
 'fumble_total_season',
 'fumble_mean_last5',
 'fumble_total_last5',
 'fumble_last',
 'passing_yards_mean_career',
 'passing_yards_total_career',
 'passing_yards_mean_season',
 'passing_yards_total_season',
 'passing_yards_mean_last5',
 'passing_yards_total_last5',
 'passing_yards_last',
 'pass_touchdown_mean_career',
 'pass_touchdown_total_career',
 'pass_touchdown_mean_season',
 'pass_touchdown_total_season',
 'pass_touchdown_mean_last5',
 'pass_touchdown_total_last5',
 'pass_touchdown_last',
 'two_points_mean_career',
 'two_points_total_career',
 'two_points_mean_season',
 'two_points_total_season',
 'two_points_mean_last5',
 'two_points_total_last5',
 'two_points_last',
 'opponent_team',
 'points_allowed_mean_season',
 'points_allowed_mean_last5']

# Feature Selection

In [219]:
from nfl_model import NFLModel


In [208]:
def get_dummy_variables(df, drop_first=True, dummy_na=False):
    """
    Converts non-numerical columns in a DataFrame to dummy variables.

    Parameters:
    - df: pandas DataFrame
        The input DataFrame containing the data.
    - drop_first: bool, default=False
        Whether to drop the first level of categorical variables to avoid the dummy variable trap.
    - dummy_na: bool, default=False
        Add a column to indicate NaNs, if False NaNs are ignored.

    Returns:
    - df_dummies: pandas DataFrame
        The DataFrame with non-numeric columns converted to dummy variables.
    """
    # Identify non-numeric columns
    non_numeric_cols = df.select_dtypes(exclude=['number', 'bool']).columns.tolist()

    # If there are no non-numeric columns, return the original DataFrame
    if not non_numeric_cols:
        print("No non-numerical columns to convert.")
        return df.copy()

    # Convert categorical variables to dummy variables
    df_dummies = pd.get_dummies(df, columns=non_numeric_cols, drop_first=drop_first, dummy_na=dummy_na)

    return df_dummies

In [209]:
# columns_to_include = df_combined.columns.difference(['game_id', 'game_date', 'player_name'])
final_df = df_combined[x_vars + ['fantasy_points']].copy()
# final_df['player_id'] = final_df['player_id'].astype('category')
final_df = get_dummy_variables(final_df)

y_var = 'fantasy_points'

In [220]:
# Initialize the model
model = NFLModel(data=final_df, target_variable=y_var)

# Preprocess data
model.preprocess_data()

# Perform feature selection
model.feature_selection()

# Evaluate models
model.evaluate_models()

# Get and print the results
results_df = model.get_results()
print(results_df)

Data preprocessing completed.
Lasso selected features: ['n_games_career', 'n_games_season', 'fantasy_points_total_season', 'fantasy_points_total_last5', 'reception_mean_career', 'reception_mean_season', 'reception_total_season', 'reception_mean_last5', 'reception_total_last5', 'reception_last', 'rushing_yards_last', 'touchdown_total_career', 'touchdown_total_season', 'receiving_yards_mean_career', 'receiving_yards_total_career', 'receiving_yards_total_last5', 'fumble_mean_career', 'fumble_total_last5', 'passing_yards_mean_career', 'pass_touchdown_total_career', 'two_points_mean_last5', 'points_allowed_mean_season', 'home_team_TEN', 'away_team_NE', 'away_team_NYJ', 'opponent_team_BUF', 'opponent_team_DET']
Elastic Net selected features: ['spread_line', 'n_games_career', 'n_games_season', 'fantasy_points_mean_career', 'fantasy_points_total_career', 'fantasy_points_total_season', 'fantasy_points_total_last5', 'reception_mean_career', 'reception_mean_season', 'reception_total_season', 'rec

# EBM

In [210]:
from interpret.glassbox import ExplainableBoostingRegressor

from sklearn.model_selection import train_test_split



In [211]:
final_Vars = [x for x in list(final_df.columns) if x != y_var]

In [212]:
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
            final_df[final_Vars], final_df[y_var], test_size= .2, random_state=1082
        )

In [214]:
ebm = ExplainableBoostingRegressor(max_bins = 40, reg_alpha = 1)


ebm.fit(X_train_raw,y_train)





## Performance

In [215]:
from sklearn.metrics import mean_absolute_error,mean_squared_error



In [216]:
preds = ebm.predict(X_test_raw)

# preds_df['actual'] = y_test['fantasy_points']



print("MAE: " + str(mean_absolute_error(preds, y_test)))

print("MSE: " + str(mean_squared_error(preds, y_test)))




MAE: 5.44361484638693
MSE: 58.2249309925513
