In [134]:
import pandas as pd
import numpy as np

df = pd.read_csv('spreadspoke_scores.csv')

df.drop(['Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24'], axis=1, inplace=True)
df.dropna(subset=['team_favorite_id'], inplace=True)
lookup_table = pd.read_csv('nfl_teams.csv')
lookup_table = lookup_table[['team_name', 'team_id']]
lookup_table.columns = ['full_name', 'abbreviation']

df['team_home_abbreviation'] = df['team_home'].map(lookup_table.set_index('full_name')['abbreviation'])
df['team_away_abbreviation'] = df['team_away'].map(lookup_table.set_index('full_name')['abbreviation'])

team_stats_df = pd.read_csv('weekly_team_data.csv')

team_stats_df = team_stats_df.rename(columns={'team': 'team_abbreviation'})

df['home_points_scored_so_far'] = df.groupby(['team_home', 'schedule_season'])['score_home'].cumsum()
df['away_points_scored_so_far'] = df.groupby(['team_away', 'schedule_season'])['score_away'].cumsum()
df['home_points_allowed_so_far'] = df.groupby(['team_home', 'schedule_season'])['score_away'].cumsum()
df['away_points_allowed_so_far'] = df.groupby(['team_away', 'schedule_season'])['score_home'].cumsum()


In [135]:
df = df[~df['schedule_week'].isin(['Superbowl', 'Wildcard', 'Division', 'Conference'])]
df['schedule_week'] = df['schedule_week'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['schedule_week'] = df['schedule_week'].astype(int)


In [136]:
df = df.merge(
    team_stats_df,
    how='left',
    left_on=['team_home_abbreviation', 'schedule_season', 'schedule_week'],
    right_on=['team_abbreviation', 'season', 'week'],
    suffixes=('', '_home')
)

df = df.merge(
    team_stats_df,
    how='left',
    left_on=['team_away_abbreviation', 'schedule_season', 'schedule_week'],
    right_on=['team_abbreviation', 'season', 'week'],
    suffixes=('', '_away')
)


In [137]:
df['winner_against_spread'] = np.where(
    ((df['score_home'] - df['score_away']) > -df['spread_favorite']) & (df['team_favorite_id'] == df['team_home_abbreviation']) |
    ((df['score_home'] - df['score_away']) > df['spread_favorite']) & (df['team_favorite_id'] == df['team_away_abbreviation']),
    1, 0
)
df.dropna(subset=['winner_against_spread', 'spread_favorite'], inplace=True)

In [138]:
df.dropna(subset=['rushing_yards_away'], inplace=True)

In [139]:
df['home_yards_so_far'] = df.groupby(['team_home', 'schedule_season'])['yards_gained'].cumsum()

In [140]:
df['away_yards_so_far'] = df.groupby(['team_away', 'schedule_season'])['yards_gained_away'].cumsum()

In [141]:
df['home_touchdowns_so_far'] = df.groupby(['team_home', 'schedule_season'])['touchdown'].cumsum()
df['away_touchdowns_so_far'] = df.groupby(['team_away', 'schedule_season'])['touchdown_away'].cumsum()

In [142]:
df['home_interceptions_so_far'] = df.groupby(['team_home', 'schedule_season'])['interception'].cumsum()
df['away_interceptions_so_far'] = df.groupby(['team_away', 'schedule_season'])['interception_away'].cumsum()
df['home_air_yards_so_far'] = df.groupby(['team_home', 'schedule_season'])['air_yards'].cumsum()
df['away_air_yards_so_far'] = df.groupby(['team_away', 'schedule_season'])['air_yards_away'].cumsum()
df['home_fumbles_so_far'] = df.groupby(['team_home', 'schedule_season'])['fumble'].cumsum()
df['away_fumbles_so_far'] = df.groupby(['team_away', 'schedule_season'])['fumble_away'].cumsum()

In [143]:
df['home_qb_scrambled_so_far'] = df.groupby(['team_home', 'schedule_season'])['qb_scramble'].cumsum()
df['away_qb_scrambled_so_far'] = df.groupby(['team_away', 'schedule_season'])['qb_scramble_away'].cumsum()

In [144]:
df['home_pass_snaps_pct_so_far'] = df.groupby(['team_home', 'schedule_season'])['pass_snaps_pct'].cumsum()
df['away_pass_snaps_pct_so_far'] = df.groupby(['team_away', 'schedule_season'])['pass_snaps_pct_away'].cumsum()

In [148]:
from sklearn.neural_network import MLPClassifier

features = [
    'home_points_scored_so_far', 'away_points_scored_so_far',
    'home_points_allowed_so_far', 'away_points_allowed_so_far',
    'home_yards_so_far', 'away_yards_so_far', 'home_touchdowns_so_far', 'away_touchdowns_so_far',
    'home_interceptions_so_far', 'away_interceptions_so_far', 'home_air_yards_so_far', 'away_air_yards_so_far',
    'home_fumbles_so_far', 'away_fumbles_so_far', 'home_qb_scrambled_so_far', 'away_qb_scrambled_so_far',
    'home_pass_snaps_pct_so_far', 'away_pass_snaps_pct_so_far'
]

for col in features:
  if col in df.columns and df[col].isnull().any():
    df[col] = df[col].fillna(df[col].mean())

X = df[features]
y = df['winner_against_spread']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1020)

X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=0.25, random_state=112)

model = MLPClassifier(hidden_layer_sizes=(150,15), max_iter=1000, random_state=50)
model.fit(X_train, y_train)

y_pred = model.predict(X_dev)

accuracy = accuracy_score(y_dev, y_pred)
print(accuracy)

0.5664939550949913


In [149]:
y_pred_baseline = np.ones(len(y_test))

accuracy_baseline = accuracy_score(y_test, y_pred_baseline)
print(accuracy_baseline)

0.49740932642487046


In [152]:
from sklearn.linear_model import LogisticRegression

logisticmodel = LogisticRegression(max_iter=100)
X_train = X_train[['home_points_scored_so_far','home_points_allowed_so_far', 'away_points_scored_so_far', 'away_points_allowed_so_far']]
X_test = X_test[['home_points_scored_so_far','home_points_allowed_so_far', 'away_points_scored_so_far', 'away_points_allowed_so_far']]
logisticmodel.fit(X_train, y_train)

y_pred_logistic = logisticmodel.predict(X_test)

accuracy_logreg = accuracy_score(y_test, y_pred_logistic)
print(accuracy_logreg)

Logistic Regression Accuracy: 0.6442141623488774
