# Previous Models

To find the best model, we had to go through multiple variations of random forest, linear regression, and other models. This file will go through some of those models. For the final model that was used, please refer to the file ```final_model.ipynb```

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

## Random forest on all features with total distance

In [5]:
# Load and preprocess data
constructedData = pd.read_csv('Combining Data/combined_data.csv')
categoricalFeatures = ['pff_passCoverage', 'pff_manZone']
decisionTreeData = pd.get_dummies(constructedData, columns=categoricalFeatures)

# Define features and target
featureHeaders = ['totalDistanceTraveledByPossessionTeam', 'absoluteYardlineNumber', 'quarter', 'down', 'yardsToGo',
                  'pff_passCoverage_2-Man', 'pff_passCoverage_Bracket', 'pff_passCoverage_Cover 6-Left',
                  'pff_passCoverage_Cover-0', 'pff_passCoverage_Cover-1', 'pff_passCoverage_Cover-1 Double',
                  'pff_passCoverage_Cover-2', 'pff_passCoverage_Cover-3', 'pff_passCoverage_Cover-3 Cloud Left',
                  'pff_passCoverage_Cover-3 Cloud Right', 'pff_passCoverage_Cover-3 Double Cloud',
                  'pff_passCoverage_Cover-3 Seam', 'pff_passCoverage_Cover-6 Right', 'pff_passCoverage_Goal Line',
                  'pff_passCoverage_Miscellaneous', 'pff_passCoverage_Prevent', 'pff_passCoverage_Quarters',
                  'pff_passCoverage_Red Zone', 'pff_manZone_Man', 'pff_manZone_Other', 'pff_manZone_Zone']
X = decisionTreeData[featureHeaders]
y = decisionTreeData['isDropback']

# Train-test split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Decision Tree Classifier (Baseline)
print("=== Decision Tree Classifier ===")
clfObj = DecisionTreeClassifier(max_depth=3)
clf = clfObj.fit(XTrain, yTrain)
yPred = clf.predict(XTest)
print(f"Accuracy of Decision Tree: {int(accuracy_score(yTest, yPred)*100)}%")

# Random Forest Classifier
print("\n=== Random Forest Classifier ===")
rf = RandomForestClassifier(n_estimators=100, random_state=42, bootstrap=True)
rf.fit(XTrain, yTrain)
yPredRF = rf.predict(XTest)
print(f"Accuracy of Random Forest: {int(accuracy_score(yTest, yPredRF)*100)}%")

=== Decision Tree Classifier ===
Accuracy of Decision Tree: 63%

=== Random Forest Classifier ===
Accuracy of Random Forest: 65%


## Random Forest on all distances for each player position

### For all positions

In [8]:
# Load and preprocess data
constructedData = pd.read_csv('Combining Data/combined_data.csv')
categoricalFeatures = ['pff_passCoverage', 'pff_manZone']
decisionTreeData = pd.get_dummies(constructedData, columns=categoricalFeatures)

# Define features and target
featureHeaders = ['distance_C', 'distance_DT', 'distance_FB', 'distance_FS', 'distance_G', 'distance_ILB', 'distance_OLB', 'distance_QB', 
                  'absoluteYardlineNumber', 'quarter', 'down', 'yardsToGo',
                  'pff_passCoverage_2-Man', 'pff_passCoverage_Bracket', 'pff_passCoverage_Cover 6-Left',
                  'pff_passCoverage_Cover-0', 'pff_passCoverage_Cover-1', 'pff_passCoverage_Cover-1 Double',
                  'pff_passCoverage_Cover-2', 'pff_passCoverage_Cover-3', 'pff_passCoverage_Cover-3 Cloud Left',
                  'pff_passCoverage_Cover-3 Cloud Right', 'pff_passCoverage_Cover-3 Double Cloud',
                  'pff_passCoverage_Cover-3 Seam', 'pff_passCoverage_Cover-6 Right', 'pff_passCoverage_Goal Line',
                  'pff_passCoverage_Miscellaneous', 'pff_passCoverage_Prevent', 'pff_passCoverage_Quarters',
                  'pff_passCoverage_Red Zone', 'pff_manZone_Man', 'pff_manZone_Other', 'pff_manZone_Zone']
X = decisionTreeData[featureHeaders]
y = decisionTreeData['isDropback']

# Train-test split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=42)

### For 3 most important positions

In [10]:
# Load and preprocess data
constructedData = pd.read_csv('Combining Data/combined_data.csv')
categoricalFeatures = ['pff_passCoverage', 'pff_manZone']
decisionTreeData = pd.get_dummies(constructedData, columns=categoricalFeatures)

# Define features and target
featureHeaders = ['distance_RB', 'distance_QB', 'distance_WR',
                  'absoluteYardlineNumber', 'quarter', 'down', 'yardsToGo',
                  'pff_passCoverage_2-Man', 'pff_passCoverage_Bracket', 'pff_passCoverage_Cover 6-Left',
                  'pff_passCoverage_Cover-0', 'pff_passCoverage_Cover-1', 'pff_passCoverage_Cover-1 Double',
                  'pff_passCoverage_Cover-2', 'pff_passCoverage_Cover-3', 'pff_passCoverage_Cover-3 Cloud Left',
                  'pff_passCoverage_Cover-3 Cloud Right', 'pff_passCoverage_Cover-3 Double Cloud',
                  'pff_passCoverage_Cover-3 Seam', 'pff_passCoverage_Cover-6 Right', 'pff_passCoverage_Goal Line',
                  'pff_passCoverage_Miscellaneous', 'pff_passCoverage_Prevent', 'pff_passCoverage_Quarters',
                  'pff_passCoverage_Red Zone', 'pff_manZone_Man', 'pff_manZone_Other', 'pff_manZone_Zone']
X = decisionTreeData[featureHeaders]
y = decisionTreeData['isDropback']

# Train-test split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=42)

### Random forest

In [11]:
# Random Forest Classifier
print("\n=== Random Forest Classifier ===")
rf = RandomForestClassifier(n_estimators=100, random_state=42, bootstrap=True, class_weight='balanced')
rf.fit(XTrain, yTrain)
yPredRF = rf.predict(XTest)
print(f"Accuracy of Random Forest: {int(accuracy_score(yTest, yPredRF)*100)}%")


print(classification_report(yTest, yPredRF))


=== Random Forest Classifier ===
Accuracy of Random Forest: 69%
              precision    recall  f1-score   support

       False       0.63      0.54      0.58      1933
        True       0.72      0.79      0.76      2905

    accuracy                           0.69      4838
   macro avg       0.68      0.67      0.67      4838
weighted avg       0.69      0.69      0.69      4838



## Random forest on each player position

In [12]:
constructedData = pd.read_csv('Combining Data/combined_data.csv')
categoricalFeatures = ['pff_passCoverage', 'pff_manZone']
decisionTreeData = pd.get_dummies(constructedData, columns=categoricalFeatures)

base_features = ['absoluteYardlineNumber', 'quarter', 'down', 'yardsToGo',
                 'pff_passCoverage_2-Man', 'pff_passCoverage_Bracket', 'pff_passCoverage_Cover 6-Left',
                 'pff_passCoverage_Cover-0', 'pff_passCoverage_Cover-1', 'pff_passCoverage_Cover-1 Double',
                 'pff_passCoverage_Cover-2', 'pff_passCoverage_Cover-3', 'pff_passCoverage_Cover-3 Cloud Left',
                 'pff_passCoverage_Cover-3 Cloud Right', 'pff_passCoverage_Cover-3 Double Cloud',
                 'pff_passCoverage_Cover-3 Seam', 'pff_passCoverage_Cover-6 Right', 'pff_passCoverage_Goal Line',
                 'pff_passCoverage_Miscellaneous', 'pff_passCoverage_Prevent', 'pff_passCoverage_Quarters',
                 'pff_passCoverage_Red Zone', 'pff_manZone_Man', 'pff_manZone_Other', 'pff_manZone_Zone']

# Identify distance features
distance_features = [col for col in decisionTreeData.columns if col.startswith('distance_')]
results = {}

# Loop through each distance feature
for distance_feature in distance_features:
    print(f"\n=== Evaluating with feature: {distance_feature} ===")
    
    featureHeaders = [distance_feature] + base_features
    X = decisionTreeData[featureHeaders]
    y = decisionTreeData['isDropback']
    XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=42)

    rf = RandomForestClassifier(n_estimators=100, random_state=42, bootstrap=True)
    rf.fit(XTrain, yTrain)
    yPredRF = rf.predict(XTest)
    accuracy = accuracy_score(yTest, yPredRF)
    
    results[distance_feature] = accuracy
    print(f"Accuracy with {distance_feature}: {accuracy*100:.2f}%")




=== Evaluating with feature: distance_C ===
Accuracy with distance_C: 65.65%

=== Evaluating with feature: distance_DT ===
Accuracy with distance_DT: 63.95%

=== Evaluating with feature: distance_FB ===
Accuracy with distance_FB: 64.10%

=== Evaluating with feature: distance_FS ===
Accuracy with distance_FS: 64.01%

=== Evaluating with feature: distance_G ===
Accuracy with distance_G: 64.41%

=== Evaluating with feature: distance_ILB ===
Accuracy with distance_ILB: 64.10%

=== Evaluating with feature: distance_OLB ===
Accuracy with distance_OLB: 64.12%

=== Evaluating with feature: distance_QB ===
Accuracy with distance_QB: 66.31%

=== Evaluating with feature: distance_RB ===
Accuracy with distance_RB: 66.85%

=== Evaluating with feature: distance_T ===
Accuracy with distance_T: 65.52%

=== Evaluating with feature: distance_TE ===
Accuracy with distance_TE: 65.87%

=== Evaluating with feature: distance_WR ===
Accuracy with distance_WR: 66.33%
