In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [6]:
# Load and preprocess data
constructedData = pd.read_csv('../MAIN/Combining Data/combined_data.csv')
categoricalFeatures = ['pff_passCoverage', 'pff_manZone']
decisionTreeData = pd.get_dummies(constructedData, columns=categoricalFeatures)

# Define features and target
featureHeaders = ['totalDistanceTraveledByPossessionTeam', 'absoluteYardlineNumber', 'quarter', 'down', 'yardsToGo',
                  'pff_passCoverage_2-Man', 'pff_passCoverage_Bracket', 'pff_passCoverage_Cover 6-Left',
                  'pff_passCoverage_Cover-0', 'pff_passCoverage_Cover-1', 'pff_passCoverage_Cover-1 Double',
                  'pff_passCoverage_Cover-2', 'pff_passCoverage_Cover-3', 'pff_passCoverage_Cover-3 Cloud Left',
                  'pff_passCoverage_Cover-3 Cloud Right', 'pff_passCoverage_Cover-3 Double Cloud',
                  'pff_passCoverage_Cover-3 Seam', 'pff_passCoverage_Cover-6 Right', 'pff_passCoverage_Goal Line',
                  'pff_passCoverage_Miscellaneous', 'pff_passCoverage_Prevent', 'pff_passCoverage_Quarters',
                  'pff_passCoverage_Red Zone', 'pff_manZone_Man', 'pff_manZone_Other', 'pff_manZone_Zone']
X = decisionTreeData[featureHeaders]
y = decisionTreeData['isDropback']

# Train-test split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=42)

In [25]:
# Decision Tree Classifier (Baseline)
print("=== Decision Tree Classifier ===")
clfObj = DecisionTreeClassifier(max_depth=3)
clf = clfObj.fit(XTrain, yTrain)
yPred = clf.predict(XTest)
print(f"Accuracy of Decision Tree: {int(accuracy_score(yTest, yPred)*100)}%")

# Random Forest Classifier
print("\n=== Random Forest Classifier ===")
rf = RandomForestClassifier(n_estimators=100, random_state=42, bootstrap=True)
rf.fit(XTrain, yTrain)
yPredRF = rf.predict(XTest)
print(f"Accuracy of Random Forest: {int(accuracy_score(yTest, yPredRF)*100)}%")

=== Decision Tree Classifier ===
Accuracy of Decision Tree: 63%

=== Random Forest Classifier ===
Accuracy of Random Forest: 65%


In [26]:
# Drop rows with NaN values
XTrain = XTrain.dropna()
yTrain = yTrain.loc[XTrain.index]
XTest = XTest.dropna()
yTest = yTest.loc[XTest.index]

# Gradient Boosting Classifier (e.g., XGBoost-like approach)
print("\n=== Gradient Boosting Classifier ===")
gb = GradientBoostingClassifier(random_state=42)
gb.fit(XTrain, yTrain)
yPredGB = gb.predict(XTest)
print(f"Accuracy of Gradient Boosting: {int(accuracy_score(yTest, yPredGB)*100)}%")

# Support Vector Machine (SVM)
print("\n=== Support Vector Machine ===")
svm = SVC(kernel='linear', random_state=42)  # Linear kernel for simplicity
svm.fit(XTrain, yTrain)
yPredSVM = svm.predict(XTest)
print(f"Accuracy of SVM: {int(accuracy_score(yTest, yPredSVM)*100)}%")


=== Gradient Boosting Classifier ===
Accuracy of Gradient Boosting: 67%

=== Support Vector Machine ===
Accuracy of SVM: 65%
