## Defensive Team Classifiers Stats

In [1]:
# some useful mysklearn package import statements and reloads
import importlib

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

# uncomment once you paste your mypytable.py into mysklearn package
import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

# uncomment once you paste your myclassifiers.py into mysklearn package
import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDummyClassifier, MyNaiveBayesClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation

In [2]:
import numpy as np

table = MyPyTable()
table.load_from_file("data/Defensive_Team_Data.csv")


# 2. Define the features you want to use
feature_names = [
    "drb_per_game", "stl_per_game", "blk_per_game", 
    "dws", "dbpm", "drb_percent"
]

# This replaces any "NA" strings with the column's average, making the column fully numeric.
for col in feature_names:
    table.replace_missing_values_with_column_average(col)

# 4. Extract X (Features)
# We find the index of each desired column, then grab those values from table.data
col_indices = [table.column_names.index(name) for name in feature_names]

X = []
for row in table.data:
    # Create a new row containing only the selected features
    sample = [row[i] for i in col_indices]
    X.append(sample)

# 5. Extract y (Target)
y_col = table.get_column("voted")
# Ensure y is integer (0/1) not float (0.0/1.0)
y = [int(val) for val in y_col]

# 6. Preprocessing
X_normalized = myutils.normalize_table(X)
#X_discretized = discretize_data(X, n_bins=10)

print(f"Loaded {len(X)} samples using MyPyTable.")
print(f"Features: {feature_names}")
print(f"Sample Raw: {X[0]}")
print(f"Sample Norm: {X_normalized[0]}")
#print(f"Sample Disc: {X_discretized[0]}")

Loaded 24616 samples using MyPyTable.
Features: ['drb_per_game', 'stl_per_game', 'blk_per_game', 'dws', 'dbpm', 'drb_percent']
Sample Raw: [3.8, 0.8, 0.7, 1.7, 0.2, 21.0]
Sample Norm: [0.29230769230769227, 0.21621621621621623, 0.11666666666666665, 0.26732673267326734, 0.340958605664488, 0.21]


In [3]:
# Define Classifiers
knn_classifier = MyKNeighborsClassifier(n_neighbors=5)
dummy_classifier = MyDummyClassifier()
# nb_classifier = MyNaiveBayesClassifier()

classifiers = [
    ("Dummy Classifier", dummy_classifier, X_normalized), 
    ("kNN (k=5)", knn_classifier, X_normalized),          
    #("Naive Bayes", nb_classifier, X_discretized)         
]

n_splits = 10
folds = myevaluation.stratified_kfold_split(X, y, n_splits=n_splits, shuffle=True, random_state=42)

for name, clf, X_dataset in classifiers:
    print(f"--- Evaluating {name} ---")
    
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    
    all_y_true = []
    all_y_pred = []
    
    for train_idx, test_idx in folds:
        X_train = [X_dataset[i] for i in train_idx]
        y_train = [y[i] for i in train_idx]
        X_test = [X_dataset[i] for i in test_idx]
        y_test = [y[i] for i in test_idx]

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        accuracies.append(myevaluation.accuracy_score(y_test, y_pred))
        precisions.append(myevaluation.binary_precision_score(y_test, y_pred, pos_label=1))
        recalls.append(myevaluation.binary_recall_score(y_test, y_pred, pos_label=1))
        f1_scores.append(myevaluation.binary_f1_score(y_test, y_pred, pos_label=1))
        
        all_y_true.extend(y_test)
        all_y_pred.extend(y_pred)

    print(f"Accuracy:  {sum(accuracies)/len(accuracies):.3f}")
    print(f"Precision: {sum(precisions)/len(precisions):.3f}")
    print(f"Recall:    {sum(recalls)/len(recalls):.3f}")
    print(f"F1 Score:  {sum(f1_scores)/len(f1_scores):.3f}")
    
    matrix = myevaluation.confusion_matrix(all_y_true, all_y_pred, labels=[0, 1])
    print("Confusion Matrix (0=No, 1=Yes):")
    print(f"     Pred 0   Pred 1")
    print(f"True 0:  {matrix[0][0]}      {matrix[0][1]}")
    print(f"True 1:  {matrix[1][0]}      {matrix[1][1]}")
    print("\n")

--- Evaluating Dummy Classifier ---
Accuracy:  0.931
Precision: 0.000
Recall:    0.000
F1 Score:  0.000
Confusion Matrix (0=No, 1=Yes):
     Pred 0   Pred 1
True 0:  22910      0
True 1:  1706      0


--- Evaluating kNN (k=5) ---
Accuracy:  0.942
Precision: 0.615
Recall:    0.428
F1 Score:  0.504
Confusion Matrix (0=No, 1=Yes):
     Pred 0   Pred 1
True 0:  22453      457
True 1:  977      729


