In [None]:
import psycopg2

import pandas as pds

import sqlalchemy

import numpy as np

import matplotlib.pyplot as plt

import scipy.stats as scipy

import statistics as stats

import math as math

import seaborn as sns

import yellowbrick as yb

# Create Database Engine

In [None]:
Engine = sqlalchemy.create_engine('postgresql+psycopg2://postgres:georgetown@nflstats.cb6meldrm5db.us-east-1.rds.amazonaws.com:5432/nfl_stats', pool_recycle=3600);

dbConnection = Engine.connect();

# Create Dataframe and QC

In [None]:
df_table = pds.read_sql("""select * from final_table_joined""", dbConnection);

In [None]:
df_table.describe()

In [None]:
df_table.head(10)

In [None]:
df_table.isna().sum()

In [None]:
df_table.hist(figsize=(20,10))

# Baseline Mode Features

In [None]:
features = ['home_win_percentage', 'away_win_percentage']
x = df_table[features].values
y = df_table['home_outcome'].values

# Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from yellowbrick.classifier import ROCAUC
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import PrecisionRecallCurve
from yellowbrick.classifier import ClassPredictionError

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

DTCmodel = DecisionTreeClassifier()

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(DTCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(DTCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

# Gaussian NB

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from yellowbrick.classifier import ROCAUC
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import PrecisionRecallCurve
from yellowbrick.classifier import ClassPredictionError

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

classes = ['loss', 'win']

GNBmodel = GaussianNB()

# Classification Report
cr_visualizer = ClassificationReport(GNBmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(GNBmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from yellowbrick.classifier import ROCAUC
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import PrecisionRecallCurve
from yellowbrick.classifier import ClassPredictionError

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

RFCmodel = RandomForestClassifier()

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(RFCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(RFCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()    

# Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from yellowbrick.classifier import ROCAUC
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import PrecisionRecallCurve
from yellowbrick.classifier import ClassPredictionError

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

GBCmodel = GradientBoostingClassifier()

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(GBCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(GBCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

# Voting Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from yellowbrick.classifier import ROCAUC
from yellowbrick.classifier import ConfusionMatrix

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

DTCinput = DecisionTreeClassifier(criterion = 'gini', max_depth = 3, min_samples_split = 0.2)
GNBinput = GaussianNB()
RFCinput = RandomForestClassifier()
GBCinput = GradientBoostingClassifier()

VCmodel = VotingClassifier(estimators = [('dtc', DTCinput), ('gnb', GNBinput), ('rfc', RFCinput), ('gbc', GBCinput)], voting = 'hard')

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(VCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(VCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

# Bagging Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from yellowbrick.classifier import ROCAUC
from yellowbrick.classifier import ConfusionMatrix

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

BCmodel = BaggingClassifier(GaussianNB())

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(BCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(BCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

#  AdaBoost Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from sklearn.ensemble import BaggingClassifier

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

ABCmodel = AdaBoostClassifier(RandomForestClassifier())

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(ABCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(ABCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

# Stacking Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold
from yellowbrick.model_selection import LearningCurve
from sklearn.ensemble import BaggingClassifier

splits = train_test_split(x, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits

DTCinput = DecisionTreeClassifier()
RFCinput = RandomForestClassifier()
GNBinput = GaussianNB()
GBCinput = GradientBoostingClassifier()
BCinput = BaggingClassifier(GaussianNB())
ABCinput = AdaBoostClassifier(RandomForestClassifier())

estimators = [('dtc', DTCinput), ('gnb', GNBinput), ('rfc', RFCinput), ('gbc', GBCinput), ('bc', BCinput), ('abc', ABCinput)]

SCmodel = StackingClassifier(estimators = estimators, final_estimator = LogisticRegression(), cv = 12, stack_method = 'auto')

classes = ['loss', 'win']

# Classification Report
cr_visualizer = ClassificationReport(SCmodel, classes=classes, support=True)

cr_visualizer.fit(X_train, y_train)        # Fit the visualizer and the model
cr_visualizer.score(X_test, y_test)        # Evaluate the model on the test data
cr_visualizer.show()  

# Cross-Validation
cv = StratifiedKFold(n_splits = 12)

cv_visualizer = CVScores(SCmodel, cv = cv, scoring = 'f1_weighted')

cv_visualizer.fit(x, y)
cv_visualizer.show()

# Close Database Connection

In [None]:
dbConnection.close()