# At halftime of an NFL game, enter the appropriate statistics for the home and away team. Please see a summary of the specific statistics below for any clarifications

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import BaggingRegressor
import numpy as np
import pandas as pd

In [2]:
# loading historical NFL play data
reduced = pd.read_csv("data/final.csv")
reduced = reduced.drop(columns="Unnamed: 0")

In [3]:
reduced.head()

Unnamed: 0,scores,predscores,tmhalfsc,opphalfsc,patt,ypa,comppct,ratt,ypr,int_pct,sack_pct,sack_yd_pct,fum_pct
0,16.0,24.75,6.0,13.0,19.0,6.736842,0.578947,8.0,1.5,0.105263,0.095238,-0.52381,0.0
1,38.0,24.75,10.0,7.0,25.0,7.0,0.64,13.0,3.384615,0.0,0.038462,-0.230769,0.025641
2,20.0,17.75,13.0,10.0,11.0,4.272727,0.636364,15.0,3.466667,0.0,0.153846,-0.769231,0.071429
3,7.0,24.75,0.0,10.0,15.0,5.466667,0.666667,10.0,2.2,0.0,0.0,0.0,0.04
4,14.0,25.5,7.0,6.0,21.0,8.333333,0.714286,15.0,2.733333,0.047619,0.045455,-0.318182,0.027027


### Statistics for each team (ok to enter as fractions):
- team_predicted: 
- team_half_sc: team's total score at halftime
- team_patt: team's total number of pass attempts
- team_ypa: team's yards per pass attempt
- team_comppct: team's passing completing percentage
- team_ratt: team's total number of rushing attempts
- team_ypr: team's yards pre rushing attempt
- team_int_pct: offense's interceptions thrown / pass attempts

In [4]:
away_predicted = (37 / 2) - (5.5 / 2)
home_predicted = (37 / 2) + (5.5 / 2)
away_half_sc = 2
home_half_sc = 16
away_patt = 17
home_patt = 16
away_ypa = 3.8
home_ypa = 6.0
away_comppct = 12 / 17
home_comppct = 12 / 16
away_ratt = 15
home_ratt = 11
away_ypr = 1.7
home_ypr = 12.8
away_int_pct = 0
home_int_pct = 1 / 17

In [5]:
predscores = [home_predicted, away_predicted]
tmhalfsc = [home_half_sc, away_half_sc]
opphalfsc = [away_half_sc, home_half_sc]
patt = [home_patt, away_patt]
ypa = [home_ypa, away_ypa]
comppct = [home_comppct, away_comppct]
ratt = [home_ratt, away_ratt]
ypr = [home_ypr, away_ypr]
int_pct = [home_int_pct, away_int_pct]

In [6]:
thursday_night = np.array([predscores, tmhalfsc, opphalfsc, patt, 
                           ypa, comppct, ratt, ypr, int_pct]).T
final_score = [30, 9]
pre_game_pred = [int(x) for x in predscores]

In [7]:
# dropping statistically insignificant features
X = reduced.drop(columns=["scores", "int_pct", "sack_pct", 
                          "sack_yd_pct", "fum_pct"])
y = reduced.scores

In [8]:
bag = BaggingRegressor(LinearRegression(), n_estimators=59, max_samples=0.92,
                             bootstrap=True, n_jobs=-1)

In [9]:
bag.fit(X, y)

BaggingRegressor(base_estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=0.92, n_estimators=59, n_jobs=-1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [10]:
pred = bag.predict(thursday_night)

In [11]:
pred = [int(x) for x in pred]

In [15]:
results = pd.DataFrame([pre_game_pred, pred, final_score], 
                       columns=["Home", "Away"], 
                       index=["Pre-Game Prediction", "Half-Time Prediction", 
                              "Actual"])

In [16]:
results

Unnamed: 0,Home,Away
Pre-Game Prediction,21,15
Half-Time Prediction,26,9
Actual,30,9
