## Import Libraries, Class, and Functions

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import random
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance

In [3]:
class BasketballNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BasketballNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [4]:
def ordinal_encode(column, data):
    mapping = {value: idx for idx, value in enumerate(data[column].unique())}
    data[column] = data[column].map(mapping)
    return data

def import_data():

    # Load the CSV data into a DataFrame
    data = pd.read_csv('nba-player-data.csv')
    
    # Remove the 'Rk' column
    if 'Rk' in data.columns:
        del data['Rk']
    
    # List of columns to be ordinally encoded
    encode = ["Player", "Pos", "Tm"]
    
    # Apply ordinal encoding to each column in 'encode'
    for each in encode:
        data = ordinal_encode(each, data)
    data = data.to_numpy() 
    temp_data = []
    for array in data:
        temp_array = []
        for item in array:
            temp_array.append(float(item))
        temp_data.append(temp_array)
    data = temp_data

    train_input = []
    train_output = []
    test_input = []
    test_output = []
    runs = len(data)
    while runs != 0:
        random_integer = random.randint(0, runs - 1)
        which_data = random.randint(0,4)
        if np.isnan(data[random_integer]).any():
                runs -= 1
                continue
        if which_data > 0:
            train_input.append(data[random_integer][0:-1])
            train_output.append(data[random_integer][-1])
        else:
            test_input.append(data[random_integer][0:-1])
            test_output.append(data[random_integer][-1])
            
        runs -= 1
        
    return train_input, train_output, test_input, test_output

## Import Data

In [6]:
# Import and preprocess data
train_input, train_output, test_input, test_output = import_data()
train_input = torch.tensor(train_input, dtype=torch.float32)
train_output = torch.tensor(train_output, dtype=torch.float32).view(-1, 1)
test_input = torch.tensor(test_input, dtype=torch.float32)
test_output = torch.tensor(test_output, dtype=torch.float32).view(-1, 1)

## Initialize Parameters 

In [8]:
# Parameters
input_size = train_input.shape[1]  # Number of features (e.g., stats)
hidden_size = 64  # Number of neurons in the hidden layer
output_size = 1  # Output is a single value representing points scored

## Create Model

In [10]:
# Create the model
model = BasketballNet(input_size, hidden_size, output_size)

In [11]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Train Data 

Some key things to remember. Each epoche is training the full train data set. Loss is just how far off the actual output is from the forward propogation output that the network generates. 

In [14]:
# Training loop
num_epochs = 1000
train_loss = []
for epoch in range(num_epochs):
    # Forward pass
    #print(train_input)
    outputs = model(train_input)
    #print(outputs)
    loss = criterion(outputs, train_output)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 100 == 0:
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        train_loss.append(loss.item())

Epoch [100/1000], Loss: 2.5137
Epoch [200/1000], Loss: 0.4487
Epoch [300/1000], Loss: 0.2672
Epoch [400/1000], Loss: 0.1904
Epoch [500/1000], Loss: 0.1456
Epoch [600/1000], Loss: 0.1134
Epoch [700/1000], Loss: 0.0889
Epoch [800/1000], Loss: 0.0680
Epoch [900/1000], Loss: 0.0525
Epoch [1000/1000], Loss: 0.0412


## Test the accuracy of the model

In [16]:
# Test the model
with torch.no_grad():
    predictions = model(test_input)

test_output = [float(each) for each in test_output]
predictions = [round(float(each), 2) for each in predictions]
    
# Print predictions and compare with actual values
#print("Predictions:", predictions)
#print("Actual:", test_output)

# Calculate and print Mean Absolute Error (MAE)
mae = sum(abs(to - pred) for to, pred in zip(test_output, predictions)) / len(test_output)

print(f"Mean Absolute Error (MAE): {mae}")

Predictions: [1.35, 16.88, 14.38, 7.45, 13.85, 27.37, 15.08, 14.66, 4.79, 14.74, 20.56, 3.67, 3.02, 2.24, 6.09, 20.56, 3.11, 14.61, 4.06, 9.56, 15.08, 17.24, 5.15, 3.74, 3.97, 16.38, 6.88, 13.85, 14.05, 9.84, 4.45, 1.18, 5.22, 7.54, 6.58, 2.43, 9.1, 2.73, 5.7, 0.24, 5.84, 4.14, 6.62, 12.98, 4.15, 6.61, 5.37, 13.31, 13.91, 2.88, 13.31, 4.29, 7.84, 7.38, 12.47, 2.2, 4.14, 23.84, 20.14, 24.16, 9.02, 7.01, 20.28, 3.24, 8.83, 11.08, 3.56, 5.37, 30.12, 6.93, 8.12, 2.88, 4.46, 7.52, 5.11, 7.58, 4.16, 3.62, 2.05, 11.21, 7.52, 3.78, 5.1, 4.57, 3.62, 2.63, 5.27, 19.64, 11.31, 3.34, 13.91, 5.69, 11.31, 16.55, 10.59, 14.94, 14.94, 14.61, 5.69, 14.76, 3.24, 13.32, 13.32, 0.98, 6.93, 7.73, 7.58, 7.73]
Actual: [1.399999976158142, 16.899999618530273, 14.0, 7.400000095367432, 14.0, 26.899999618530273, 15.600000381469727, 14.600000381469727, 5.5, 15.100000381469727, 19.899999618530273, 3.4000000953674316, 2.9000000953674316, 2.299999952316284, 5.900000095367432, 19.899999618530273, 3.200000047683716, 14

## Vizualizations

Confusion Matrix

In [None]:
test_output = [round(float(each), 0) for each in test_output]
predictions = [round(float(each), 0) for each in predictions]


# Compute confusion matrix
cm = confusion_matrix(test_output, predictions)

# Plot confusion matrix
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()