<a href="https://colab.research.google.com/github/daniel-hrusovsky/Predicting-Unique-National-Football-League-Scores/blob/main/Predicting_Unique_Scores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Importing Google Drive to Access Spreadsheets

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from numpy import nan as na

In [3]:
#Reading Data -- acquired from Pro Football Reference

gami = pd.read_excel('/content/drive/MyDrive/Colab/ScorigamiData.xlsx')
scorigami = pd.DataFrame(gami)

# Data Cleaning

In [4]:
#Replacing null values with 0
scorigami.fillna(0, inplace = True)

In [5]:
#Normalizing Data into a [0,1] scale
scorigami.TO = (scorigami.TO/10)
scorigami.OppTO = (scorigami.OppTO/10)
scorigami.TmScore = (scorigami.TmScore/100)
scorigami.OppScore = (scorigami.OppScore/100)
scorigami.FirstD = (scorigami.FirstD/100)
scorigami.Opp1stD = (scorigami.Opp1stD/100)
scorigami.TotYd = (scorigami.TotYd/1000)
scorigami.PassYd = (scorigami.PassYd/1000)
scorigami.RushYd = (scorigami.RushYd/1000)
scorigami.OppTotYd = (scorigami.OppTotYd/1000)
scorigami.OppPassYd = (scorigami.OppPassYd/1000)
scorigami.OppRushYd = (scorigami.OppRushYd/1000)
scorigami.TwoPtAtt = (scorigami.TwoPtAtt/5)
scorigami.TwoPtConv = (scorigami.TwoPtConv/5)
scorigami.OppTwoAtt = (scorigami.OppTwoAtt/5)
scorigami.OppTwoComp = (scorigami.OppTwoComp/5)
scorigami.Safeties = (scorigami.Safeties/2)
scorigami.OppSafeties = (scorigami.OppSafeties/2)

#Removing Unnecessary Variables
scorigami.pop('Season')
scorigami.pop('SeasonDec')
scorigami.pop('ExpOff')
scorigami.pop('ExpDef')
scorigami.pop('ExpSpT')

#Overview of former game dataset
scorigami.head()

Unnamed: 0,TmScore,OppScore,FirstD,TotYd,PassYd,RushYd,TO,Opp1stD,OppTotYd,OppPassYd,OppRushYd,OppTO,TwoPtAtt,TwoPtConv,OppTwoAtt,OppTwoComp,Safeties,OppSafeties,Scorigami
0,0.27,0.3,0.2,0.398,0.32,0.078,0.1,0.22,0.452,0.328,0.124,0.1,0.4,0.2,0.0,0.0,0.0,0.0,0.0
1,0.13,0.38,0.14,0.162,0.094,0.068,0.1,0.22,0.452,0.321,0.131,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0
2,0.17,0.34,0.19,0.382,0.323,0.059,0.2,0.27,0.475,0.329,0.146,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.35,0.16,0.25,0.373,0.221,0.152,0.1,0.17,0.278,0.21,0.068,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0
4,0.24,0.22,0.16,0.224,0.141,0.083,0.2,0.2,0.367,0.184,0.183,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0


# Modeling

In [6]:
#Creating a training and 20% testing split

import sklearn
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(scorigami.drop(['Scorigami'], axis=1), scorigami['Scorigami'], test_size=0.2, random_state = 42)

In [7]:
#Applying a MLP neural network to split

from sklearn.neural_network import MLPClassifier

mlp_class = MLPClassifier(random_state=42)
mlp_class.fit(x_train, y_train)
mlp_class.score(x_test, y_test)



0.9710982658959537

# Predicting Future Matches

In [8]:
#Adding in Team Data for Forecast

team = pd.read_excel('/content/drive/MyDrive/Colab/TEAMDATA.xlsx')

#Creating a dataframe of the data

forecast = pd.DataFrame(team)

#Overview of the recent team statistic dataset
forecast.head()

Unnamed: 0,Team,TmScore,TotYd,PassYd,RushYd,FirstD,TO,Safeties,TwoPtConv,TwoPtAtt
0,Chicago Bears,18.705882,301.352941,193.588235,107.764706,18.176471,0.941176,0.0001,0.03,0.235294
1,Cincinnati Bengals,28.529412,362.411765,272.764706,89.647059,22.352941,1.294118,0.058824,0.058824,0.176471
2,Buffalo Bills,31.823529,389.647059,244.058823,145.588235,23.117647,0.352941,0.058824,0.117647,0.352941
3,Denver Broncos,25.176471,323.823529,208.705882,115.117647,19.235294,1.058824,0.0001,0.03,0.058824
4,Cleveland Browns,15.058824,308.0,216.588235,91.411765,18.235294,2.117647,0.0001,0.235294,0.411765


In [9]:
def model(i, t):
  #Apply randomized Poisson distribution for each team's variables 10,000 times
  TotYd = pd.Series(np.random.poisson(forecast.TotYd[forecast.Team == i], size = [10000]))
  RushYd = pd.Series(np.random.poisson(forecast.RushYd[forecast.Team == i], size = [10000]))
  PassYd = pd.Series(np.random.poisson(forecast.PassYd[forecast.Team == i], size = [10000]))
  TmScore = pd.Series(np.random.poisson(forecast.TmScore[forecast.Team == i], size = [10000]))
  TwoPtConv = pd.Series(np.random.poisson((forecast.TwoPtConv[forecast.Team == i]), size = [10000]))
  TwoPtAtt = pd.Series(np.random.poisson((forecast.TwoPtAtt[forecast.Team == i]), size = [10000]))
  Safeties = pd.Series(np.random.poisson((forecast.Safeties[forecast.Team == i]), size = [10000]))
  TO = pd.Series(np.random.poisson((forecast.TO[forecast.Team == i]), size = [10000]))
  FirstD = pd.Series(np.random.poisson((forecast.FirstD[forecast.Team == i]), size = [10000]))
  OppTotYd = pd.Series(np.random.poisson(forecast.TotYd[forecast.Team == t], size = [10000]))
  OppRushYd = pd.Series(np.random.poisson(forecast.RushYd[forecast.Team == t], size = [10000]))
  OppPassYd = pd.Series(np.random.poisson(forecast.PassYd[forecast.Team == t], size = [10000]))
  OppScore = pd.Series(np.random.poisson(forecast.TmScore[forecast.Team == t], size = [10000]))
  OppTwoConv = pd.Series(np.random.poisson((forecast.TwoPtConv[forecast.Team == t]), size = [10000]))
  OppTwoAtt = pd.Series(np.random.poisson((forecast.TwoPtAtt[forecast.Team == t]), size = [10000]))
  OppSafeties = pd.Series(np.random.poisson((forecast.Safeties[forecast.Team == t]), size = [10000]))
  OppTO = pd.Series(np.random.poisson((forecast.TO[forecast.Team == t]), size = [10000]))
  Opp1stD = pd.Series(np.random.poisson((forecast.FirstD[forecast.Team == t]), size = [10000]))
  teams = pd.DataFrame()
  teams = pd.concat([TotYd, RushYd, PassYd, TmScore, TwoPtConv, TwoPtAtt, Safeties, TO, FirstD], axis = 1)
  teams = teams.rename(columns = {0:'TotYd', 1:'RushYd', 2:'PassYd', 3:'TmScore', 4:'TwoPtConv', 5:'TwoPtAtt', 6:'Safeties', 7:'TO', 8:'FirstD'})
  oppteams = pd.DataFrame()
  oppteams = pd.concat([OppTotYd, OppRushYd, OppPassYd, OppScore, OppTwoConv, OppTwoAtt, OppSafeties, OppTO, Opp1stD], axis = 1)
  oppteams = oppteams.rename(columns = {0:'OppTotYd', 1:'OppRushYd', 2:'OppPassYd', 3:'OppScore', 4:'OppTwoComp', 5:'OppTwoAtt', 6:'OppSafeties', 7:'OppTO', 8:'Opp1stD'})
  #Combine team and opponent datasets to match predictive model
  predicting = pd.concat([teams, oppteams], axis = 1)
  predicting = pd.DataFrame(predicting, columns = ['TmScore', 'OppScore', 'FirstD', 'TotYd', 'PassYd', 'RushYd', 'TO', 'Opp1stD', 'OppTotYd', 'OppPassYd', 'OppRushYd',
                                                 'OppTO', 'TwoPtAtt', 'TwoPtConv', 'OppTwoAtt', 'OppTwoComp', 'Safeties', 'OppSafeties'])
  #Normalize new dataframe
  predicting.TotYd = (predicting.TotYd/1000)
  predicting.PassYd = (predicting.PassYd/1000)
  predicting.RushYd = (predicting.RushYd/1000)
  predicting.TmScore = (predicting.TmScore/100)
  predicting.TwoPtConv = (predicting.TwoPtConv/(5))
  predicting.TwoPtAtt = (predicting.TwoPtAtt/(5))
  predicting.Safeties = (predicting.Safeties/(2))
  predicting.TO = (predicting.TO/(10))
  predicting.FirstD = (predicting.FirstD/(100))
  predicting.OppTotYd = (predicting.OppTotYd/1000)
  predicting.OppPassYd = (predicting.OppPassYd/1000)
  predicting.OppRushYd = (predicting.OppRushYd/1000)
  predicting.OppScore = (predicting.OppScore/100)
  predicting.OppTwoComp = (predicting.OppTwoComp/(5))
  predicting.OppTwoAtt = (predicting.OppTwoAtt/(5))
  predicting.OppSafeties = (predicting.OppSafeties/(2))
  predicting.OppTO = (predicting.OppTO/(10))
  predicting.Opp1stD = (predicting.Opp1stD/(100))
  result = pd.Series(mlp_class.predict(predicting))
  final_result = result[result == 1].count()
  return final_result


In [10]:
model("Detroit Lions", "Baltimore Ravens")
#Model results in how many games out of 10,000 result in a scorigami

np.int64(119)