In [1]:
#import required modules
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
import pickle

In [2]:
dfa = pd.read_csv ('2022schedtrimmed.csv')
print(dfa)

                     For                Against
0          Buffalo Bills       Los Angeles Rams
1     New Orleans Saints        Atlanta Falcons
2       Cleveland Browns      Carolina Panthers
3    San Francisco 49ers          Chicago Bears
4    Pittsburgh Steelers     Cincinnati Bengals
..                   ...                    ...
267     Cleveland Browns    Pittsburgh Steelers
268   Kansas City Chiefs      Las Vegas Raiders
269     Los Angeles Rams       Seattle Seahawks
270    Arizona Cardinals    San Francisco 49ers
271       Dallas Cowboys  Washington Commanders

[272 rows x 2 columns]


In [3]:
#unpack pickled dataframes

modeling = pd.read_pickle("modeling-Copy1.pkl")
forecast = pd.read_pickle("forecast-Copy1.pkl")

#change differential into win/loss
modeling["Differential"] = modeling["Differential"].apply(lambda x: 1 if x > 0 else 0)
modeling.head()

Unnamed: 0_level_0,Differential,QB FOR,WR FOR,TE FOR,RB FOR,DT FOR,LT FOR,CB FOR,LB FOR,S FOR,...,DT AGAINST,LT AGAINST,CB AGAINST,LB AGAINST,S AGAINST,RT AGAINST,EDGE AGAINST,C AGAINST,LG AGAINST,RG AGAINST
gamename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Chiefs Texans 2020,1,90.2,84.5,86.1,65.7,87.6,71.2,71.9,50.4,75.4,...,59.8,75.1,64.5,68.6,76.7,59.4,87.0,67.3,59.1,53.4
Seahawks Falcons 2020,1,92.0,81.9,79.6,77.9,68.4,74.1,87.6,76.4,73.4,...,90.1,79.7,56.8,76.6,72.0,53.0,72.0,72.1,87.7,47.0
Bills Jets 2020,1,64.1,78.3,60.2,69.3,65.1,73.1,74.3,65.6,77.7,...,81.3,74.4,79.0,73.9,87.9,60.6,73.1,72.0,56.1,65.6
Raiders Panthers 2020,1,79.9,75.5,83.2,87.1,73.1,64.9,67.4,79.0,69.3,...,83.7,79.3,63.8,65.9,76.4,76.2,54.7,63.4,58.6,63.6
Bears Lions 2020,1,76.6,80.8,58.0,66.6,91.7,58.6,62.5,61.9,66.8,...,85.9,75.5,82.8,72.3,74.4,74.4,83.0,74.9,64.9,78.7


In [4]:
#split modeling data in to test and train (75/25)
X = modeling.drop("Differential", axis = 1)
Y = modeling["Differential"]

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state = 0)

In [5]:
#scaling the data
#A logistic regression model gives nice results with both training and test data

from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(X_train)
train_scaled = scaler.transform(X_train)
test_scaled = scaler.transform(X_test)
X_scaled1 = scaler.transform(X)

scaler2 = preprocessing.StandardScaler().fit(X)
X_scaled2 = scaler2.transform(X)
forecast_scaled = scaler2.transform(forecast)

In [6]:
#A bagged SVC gives nice results with both training and test data
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier

In [7]:
AB = BaggingClassifier(base_estimator = SVC(tol = .1, C = .9), n_jobs = -1, n_estimators = 50).fit(train_scaled,Y_train)
print("Test model: ",np.sum(np.array(AB.predict(test_scaled)) == np.array(Y_test))/len(X_test))
print("Train model: ",np.sum(np.array(AB.predict(train_scaled)) == np.array(Y_train))/len(X_train))

Test model:  0.5871212121212122
Train model:  0.8181818181818182


In [8]:
LR = LogisticRegression().fit(train_scaled,Y_train)
print("Test model: ", np.sum(np.array(LR.predict(test_scaled)) == np.array(Y_test))/len(X_test))
print("Train model: ",np.sum(np.array(LR.predict(train_scaled)) == np.array(Y_train))/len(X_train))

Test model:  0.571969696969697
Train model:  0.6515151515151515


In [9]:
#cross validating
#looks like the the model will be correct ~66% of the time on average
from sklearn.model_selection import cross_val_score
cvscore = cross_val_score(AB, X_scaled1, Y, cv = 10)
np.average(cvscore)

0.6544115004492363

In [10]:
#looks like the the model will be correct ~60% of the time on average
cvscore2 = cross_val_score(LR, X_scaled1, Y, cv = 10)
np.average(cvscore2)

0.599487870619946

In [11]:
#applying the model to forecast the 2022 season
AB = BaggingClassifier(base_estimator = SVC(tol = .1, C = .9), n_jobs = -1, n_estimators = 50).fit(X_scaled2,Y)
forecast["result_ML"] = AB.predict(forecast_scaled)

#applying the model to forecast the 2022 season
LR = LogisticRegression().fit(X_scaled2,Y)
forecast["result_madden_new"] = LR.predict(forecast_scaled)

In [12]:
#relabel and export to csv
forecast["result_ML"] = forecast["result_ML"].apply(lambda x: "W" if x > 0 else "L")
forecast["result_ML"].to_frame().to_csv("2022predictions.csv")

forecast["result_madden_new"] = forecast["result_madden_new"].apply(lambda x: "W" if x > 0 else "L")
forecast["result_madden_new"].to_frame().to_csv("2022predictionsmaddenstyle.csv")

In [13]:
#Sanity Check: is this better than just predicting the favorite to win?
#power rankings sourced from https://www.nfl.com/news/nfl-power-rankings-cowboys-up-colts-down-as-preseason-rolls-on
#not bad, model provides 3% increase in accuracy
teams = ["Buccaneers", "Chiefs", "Bills",  "Packers", "Ravens", "Rams", "Seahawks", "Browns", "49ers", "Steelers", "Titans", "Chargers", "Saints", "Patriots", "Dolphins", "Colts", "Vikings", "Cardinals", "Bears", "Cowboys", "Raiders", "Commanders", "Giants", "Broncos", "Falcons", "Eagles", "Panthers", "Jets", "Jaguars", "Bengals", "Lions", "Texans"]
rankdict = {}
for i in range(1, 33):
        rankdict[teams[i - 1]] = i
season2022 = X.tail(272)
results2022 = np.array(Y.tail(272))
season2022 = season2022.reset_index()
predictresults = np.array(season2022["gamename"].apply(lambda x: rankdict[x.split(" ")[0]] < rankdict[x.split(" ")[1]]))
sum(predictresults.astype(int) == results2022)/272

0.6286764705882353

In [14]:
madden_results = forecast["result_madden_new"]
ML_results = forecast["result_ML"]

In [15]:
madden_results
#madden_results['row_num'] = madden_results.reset_index().index


gamename
Bills Rams 2022            W
Saints Falcons 2022        W
Browns Panthers 2022       W
49ers Bears 2022           W
Steelers Bengals 2022      L
                          ..
Browns Steelers 2022       W
Chiefs Raiders 2022        L
Rams Seahawks 2022         W
Cardinals 49ers 2022       W
Cowboys Commanders 2022    W
Name: result_madden_new, Length: 272, dtype: object

In [16]:
ML_results

gamename
Bills Rams 2022            W
Saints Falcons 2022        W
Browns Panthers 2022       W
49ers Bears 2022           W
Steelers Bengals 2022      L
                          ..
Browns Steelers 2022       W
Chiefs Raiders 2022        L
Rams Seahawks 2022         W
Cardinals 49ers 2022       W
Cowboys Commanders 2022    W
Name: result_ML, Length: 272, dtype: object

In [17]:
mergedNFL = pd.concat([madden_results, ML_results], axis=1)
#mergedNFL['row_num'] = madden_results.reset_index().index

In [18]:
mergedNFL

Unnamed: 0_level_0,result_madden_new,result_ML
gamename,Unnamed: 1_level_1,Unnamed: 2_level_1
Bills Rams 2022,W,W
Saints Falcons 2022,W,W
Browns Panthers 2022,W,W
49ers Bears 2022,W,W
Steelers Bengals 2022,L,L
...,...,...
Browns Steelers 2022,W,W
Chiefs Raiders 2022,L,L
Rams Seahawks 2022,W,W
Cardinals 49ers 2022,W,W


In [19]:
dfa

Unnamed: 0,For,Against
0,Buffalo Bills,Los Angeles Rams
1,New Orleans Saints,Atlanta Falcons
2,Cleveland Browns,Carolina Panthers
3,San Francisco 49ers,Chicago Bears
4,Pittsburgh Steelers,Cincinnati Bengals
...,...,...
267,Cleveland Browns,Pittsburgh Steelers
268,Kansas City Chiefs,Las Vegas Raiders
269,Los Angeles Rams,Seattle Seahawks
270,Arizona Cardinals,San Francisco 49ers


In [20]:
#df0 = pd.concat([ML_results, madden_results])
#madden_results.merge(ML_results, on='row_num')
df3 = pd.concat([mergedNFL.reset_index(drop=True),dfa.reset_index(drop=True)], axis=1)
df3

Unnamed: 0,result_madden_new,result_ML,For,Against
0,W,W,Buffalo Bills,Los Angeles Rams
1,W,W,New Orleans Saints,Atlanta Falcons
2,W,W,Cleveland Browns,Carolina Panthers
3,W,W,San Francisco 49ers,Chicago Bears
4,L,L,Pittsburgh Steelers,Cincinnati Bengals
...,...,...,...,...
267,W,W,Cleveland Browns,Pittsburgh Steelers
268,L,L,Kansas City Chiefs,Las Vegas Raiders
269,W,W,Los Angeles Rams,Seattle Seahawks
270,W,W,Arizona Cardinals,San Francisco 49ers


In [21]:
df3

Unnamed: 0,result_madden_new,result_ML,For,Against
0,W,W,Buffalo Bills,Los Angeles Rams
1,W,W,New Orleans Saints,Atlanta Falcons
2,W,W,Cleveland Browns,Carolina Panthers
3,W,W,San Francisco 49ers,Chicago Bears
4,L,L,Pittsburgh Steelers,Cincinnati Bengals
...,...,...,...,...
267,W,W,Cleveland Browns,Pittsburgh Steelers
268,L,L,Kansas City Chiefs,Las Vegas Raiders
269,W,W,Los Angeles Rams,Seattle Seahawks
270,W,W,Arizona Cardinals,San Francisco 49ers


In [22]:
df3.to_csv('2022overallpredictions.csv')

In [23]:
# create a list of our conditions
conditions = [
    (df3['result_madden_new']== 'L') & (df3['result_ML']== 'L'),
    (df3['result_madden_new']== 'W') & (df3['result_ML']== 'W'),
    (df3['result_madden_new']== 'L') & (df3['result_ML']=='W'),
    (df3['result_madden_new']== 'W') & (df3['result_ML']=='L')
    ]

# create a list of the values we want to assign for each condition
values = ['Same','Same', 'DifferentLW', 'DifferentWL']

# create a new column and use np.select to assign values to it using our lists as arguments
df3['Madden_vs_ML'] = np.select(conditions, values)

# display updated DataFrame
df3.head()


Unnamed: 0,result_madden_new,result_ML,For,Against,Madden_vs_ML
0,W,W,Buffalo Bills,Los Angeles Rams,Same
1,W,W,New Orleans Saints,Atlanta Falcons,Same
2,W,W,Cleveland Browns,Carolina Panthers,Same
3,W,W,San Francisco 49ers,Chicago Bears,Same
4,L,L,Pittsburgh Steelers,Cincinnati Bengals,Same


In [24]:
df3.to_csv('2022overallpredictions1.csv')

In [25]:
EPA = pd.read_csv ('../3. Machine Learning Analysis/FootballEPA.csv')

In [26]:
EPA

Unnamed: 0.1,Unnamed: 0,Abbr,O_EPA/play,D_EPA/play
0,0,ARI,0.075,-0.032
1,1,ATL,-0.071,0.082
2,2,BAL,0.011,0.061
3,3,BUF,0.095,-0.12
4,4,CAR,-0.124,-0.023
5,5,CHI,-0.083,0.018
6,6,CIN,0.043,-0.019
7,7,CLE,0.018,0.02
8,8,DAL,0.061,-0.084
9,9,DEN,0.029,0.008


In [27]:
EPA['Abbr'] = EPA['Abbr'].replace({'ARI' : 'Arizona Cardinals'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'ATL' : 'Atlanta Falcons'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'BAL' : 'Baltimore Ravens'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'BUF' : 'Buffalo Bills'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'LAR' : 'Los Angeles Rams'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'NO' : 'New Orleans Saints'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'CAR' : 'Carolina Panthers'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'CHI' : 'Chicago Bears'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'CIN' : 'Cincinnati Bengals'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'CLE' : 'Cleveland Browns'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'DAL' : 'Dallas Cowboys'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'DEN' : 'Denver Broncos'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'DET' : 'Detroit Lions'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'GB' : 'Green Bay Packers'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'HOU' : 'Houston Texans'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'IND' : 'Indianapolis Colts'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'JAX' : 'Jacksonville Jaguars'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'KC' : 'Kansas City Chiefs'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'LV' : 'Las Vegas Raiders'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'MIA' : 'Miami Dolphins'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'MIN' : 'Minnesota Vikings'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'NE' : 'New England Patriots'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'NYG' : 'New York Giants'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'NYJ' : 'New York Jets'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'PHI' : 'Philadelphia Eagles'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'PIT' : 'Pittsburgh Steelers'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'SEA' : 'Seattle Seahawks'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'SF' : 'San Francisco 49ers'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'TB' : 'Tampa Bay Buccaneers'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'TEN' : 'Tennessee Titans'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'LAC' : 'Los Angeles Chargers'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'WAS' : 'Washington Commanders'}, regex=True)
EPA['Abbr'] = EPA['Abbr'].replace({'LA' : 'Los Angeles Rams'}, regex=True)
#EPA.columns.values[1] = 'For'











In [28]:
EPA

Unnamed: 0.1,Unnamed: 0,Abbr,O_EPA/play,D_EPA/play
0,0,Arizona Cardinals,0.075,-0.032
1,1,Atlanta Falcons,-0.071,0.082
2,2,Baltimore Ravens,0.011,0.061
3,3,Buffalo Bills,0.095,-0.12
4,4,Carolina Panthers,-0.124,-0.023
5,5,Chicago Bears,-0.083,0.018
6,6,Cincinnati Bengals,0.043,-0.019
7,7,Cleveland Browns,0.018,0.02
8,8,Dallas Cowboys,0.061,-0.084
9,9,Denver Broncos,0.029,0.008


In [29]:
a = df3['For']
a

0            Buffalo Bills
1       New Orleans Saints
2         Cleveland Browns
3      San Francisco 49ers
4      Pittsburgh Steelers
              ...         
267       Cleveland Browns
268     Kansas City Chiefs
269       Los Angeles Rams
270      Arizona Cardinals
271         Dallas Cowboys
Name: For, Length: 272, dtype: object

In [30]:
xyz = EPA['Abbr']
xyz

0         Arizona Cardinals
1           Atlanta Falcons
2          Baltimore Ravens
3             Buffalo Bills
4         Carolina Panthers
5             Chicago Bears
6        Cincinnati Bengals
7          Cleveland Browns
8            Dallas Cowboys
9            Denver Broncos
10            Detroit Lions
11        Green Bay Packers
12           Houston Texans
13       Indianapolis Colts
14     Jacksonville Jaguars
15       Kansas City Chiefs
16         Los Angeles Rams
17     Los Angeles Chargers
18        Las Vegas Raiders
19           Miami Dolphins
20        Minnesota Vikings
21     New England Patriots
22       New Orleans Saints
23          New York Giants
24            New York Jets
25      Philadelphia Eagles
26      Pittsburgh Steelers
27         Seattle Seahawks
28      San Francisco 49ers
29     Tampa Bay Buccaneers
30         Tennessee Titans
31    Washington Commanders
Name: Abbr, dtype: object

In [31]:
for col in EPA.columns:
    print(col)

Unnamed: 0
Abbr
O_EPA/play
D_EPA/play


In [32]:
df3

Unnamed: 0,result_madden_new,result_ML,For,Against,Madden_vs_ML
0,W,W,Buffalo Bills,Los Angeles Rams,Same
1,W,W,New Orleans Saints,Atlanta Falcons,Same
2,W,W,Cleveland Browns,Carolina Panthers,Same
3,W,W,San Francisco 49ers,Chicago Bears,Same
4,L,L,Pittsburgh Steelers,Cincinnati Bengals,Same
...,...,...,...,...,...
267,W,W,Cleveland Browns,Pittsburgh Steelers,Same
268,L,L,Kansas City Chiefs,Las Vegas Raiders,Same
269,W,W,Los Angeles Rams,Seattle Seahawks,Same
270,W,W,Arizona Cardinals,San Francisco 49ers,Same


In [33]:
df3['For'].value_counts()

Buffalo Bills            9
Kansas City Chiefs       9
Tennessee Titans         9
Houston Texans           9
Cincinnati Bengals       9
Miami Dolphins           9
New York Jets            9
Denver Broncos           9
Las Vegas Raiders        9
Los Angeles Chargers     9
Jacksonville Jaguars     9
Pittsburgh Steelers      9
Baltimore Ravens         9
New England Patriots     9
Indianapolis Colts       9
Cleveland Browns         9
Los Angeles Rams         8
Detroit Lions            8
Minnesota Vikings        8
Chicago Bears            8
Arizona Cardinals        8
San Francisco 49ers      8
Atlanta Falcons          8
Seattle Seahawks         8
Green Bay Packers        8
Philadelphia Eagles      8
Carolina Panthers        8
Washington Commanders    8
New Orleans Saints       8
Tampa Bay Buccaneers     8
New York Giants          8
Dallas Cowboys           8
Name: For, dtype: int64

In [34]:
df3['Against'].value_counts()

Los Angeles Rams         9
Minnesota Vikings        9
Philadelphia Eagles      9
Green Bay Packers        9
San Francisco 49ers      9
New York Giants          9
New Orleans Saints       9
Atlanta Falcons          9
Seattle Seahawks         9
Dallas Cowboys           9
Tampa Bay Buccaneers     9
Arizona Cardinals        9
Washington Commanders    9
Carolina Panthers        9
Chicago Bears            9
Detroit Lions            9
Cincinnati Bengals       8
New England Patriots     8
Indianapolis Colts       8
Buffalo Bills            8
Las Vegas Raiders        8
Denver Broncos           8
Pittsburgh Steelers      8
Baltimore Ravens         8
Tennessee Titans         8
Houston Texans           8
Jacksonville Jaguars     8
Cleveland Browns         8
Miami Dolphins           8
New York Jets            8
Los Angeles Chargers     8
Kansas City Chiefs       8
Name: Against, dtype: int64

In [35]:
df3['For'].value_counts()['Kansas City Chiefs']

9

In [36]:
df3.to_csv('2022overallpredictions1.csv')