<a href="https://colab.research.google.com/github/apurbaanik/ML_Final_Project_NBA/blob/main/ML_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Machine Learning Final Project - NBA Prediction
Team: Anik Barua, Kashyapa Jayasekera

CS-UY 4563 B

Date: 11-16-2022

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import OneHotEncoder
%matplotlib inline

# Part 1: Import Data

In [None]:
# Loading the data
url = 'https://raw.githubusercontent.com/apurbaanik/ML_Final_Project_NBA/main/NBA_Team_Stats.csv'
df = pd.read_csv(url)
print(df)

     No           Team    G   Min    Pts   Reb   Ast  Stl  Blk    To    Pf  \
0     1        Chicago  103  48.4   96.0  44.1  23.1  8.6  4.3  13.0  21.1   
1     2           Utah  102  48.3   98.6  40.8  24.7  7.6  4.8  14.7  24.3   
2     3        Phoenix   86  48.6   99.3  41.9  25.6  9.2  5.3  14.4  21.7   
3     4     L.A.Lakers   95  48.3  104.8  42.9  24.3  8.7  6.8  14.7  22.9   
4     5    San Antonio   91  48.4   92.5  44.1  21.9  6.2  6.9  15.3  21.2   
..   ..            ...  ...   ...    ...   ...   ...  ...  ...   ...   ...   
720  25     Sacramento   82  48.3  110.3  42.9  23.7  7.2  4.5  13.5  18.9   
721  26        Orlando   82  48.2  104.2  44.3  23.7  6.8  4.5  13.8  19.7   
722  27        Detroit   82  48.2  104.8  43.0  23.5  7.7  4.8  13.4  21.9   
723  28       Portland   82  48.1  106.2  42.9  22.9  8.0  4.5  13.7  21.1   
724  29  Oklahoma City   82  48.3  103.7  45.6  22.2  7.6  4.6  13.3  18.3   

     Dreb  Oreb  FGPct  3PPct  FTPct    Eff  Deff       Year  W

# Part 2: Preprocessing

In [None]:
# Preprocessing our data
# 1. Delete column: No, Team, Year
# 2. Encoding for Won column
# 3. Normalize

# Delete column: No, Team, Year
df = df.drop(columns=['No', 'Team', 'Year'])
print(df)

       G   Min    Pts   Reb   Ast  Stl  Blk    To    Pf  Dreb  Oreb  FGPct  \
0    103  48.4   96.0  44.1  23.1  8.6  4.3  13.0  21.1  29.2  14.9  0.449   
1    102  48.3   98.6  40.8  24.7  7.6  4.8  14.7  24.3  29.5  11.3  0.483   
2     86  48.6   99.3  41.9  25.6  9.2  5.3  14.4  21.7  29.8  12.1  0.466   
3     95  48.3  104.8  42.9  24.3  8.7  6.8  14.7  22.9  29.7  13.2  0.480   
4     91  48.4   92.5  44.1  21.9  6.2  6.9  15.3  21.2  32.2  11.9  0.468   
..   ...   ...    ...   ...   ...  ...  ...   ...   ...   ...   ...    ...   
720   82  48.3  110.3  42.9  23.7  7.2  4.5  13.5  18.9  33.4   9.6  0.460   
721   82  48.2  104.2  44.3  23.7  6.8  4.5  13.8  19.7  35.2   9.1  0.434   
722   82  48.2  104.8  43.0  23.5  7.7  4.8  13.4  21.9  32.0  11.0  0.430   
723   82  48.1  106.2  42.9  22.9  8.0  4.5  13.7  21.1  32.5  10.4  0.443   
724   82  48.3  103.7  45.6  22.2  7.6  4.6  13.3  18.3  35.2  10.4  0.430   

     3PPct  FTPct    Eff  Deff  Won  
0    0.323  0.741  111.6 

In [None]:
X = np.array(df[['G','Min','Pts','Reb','Ast','Stl','Blk','To','Pf','Dreb','Oreb','FGPct','3PPct','FTPct','Eff','Deff']])
y = np.array(df[['Won']])

# Implementing OneHotEncoding on y
ohe = OneHotEncoder()
transformed = ohe.fit_transform(y)
y = (transformed.toarray())[:,1].reshape(X.shape[0],1)

In [None]:
# Normalize our values in X
scaler = Normalizer(norm='l2').fit(X)
X_Normalized = scaler.transform(X)
print(X_Normalized)

[[ 0.52030187  0.24449136  0.48494154 ...  0.00374314  0.56374455
   0.0884008 ]
 [ 0.50714396  0.24014758  0.49023916 ...  0.0038185   0.57824355
   0.08700999]
 [ 0.44261081  0.25012658  0.51106109 ...  0.00384454  0.60267124
   0.06999427]
 ...
 [ 0.42150703  0.24776389  0.53870654 ...  0.00401974  0.59165194
  -0.0858435 ]
 [ 0.41717056  0.24470614  0.54028675 ...  0.00386646  0.59573991
  -0.09666147]
 [ 0.42137685  0.24820124  0.53288755 ...  0.00388489  0.58992758
  -0.09866385]]


In [None]:

# Splitting the data in 80% training, 10% test, 10% validation
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y,test_size = 0.1, random_state=10, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size = 0.1, random_state=10, shuffle=True)

In [None]:
# Check to make sure everything is as expected
print('X_train:' + str(X_train.shape))
print('y_train:' + str(y_train.shape))
print('X_val: \t'  + str(X_val.shape))
print('y_val: \t'  + str(y_val.shape))
print('X_test: '  + str(X_test.shape))
print('y_test: '  + str(y_test.shape))

X_train:(586, 16)
y_train:(586, 1)
X_val: 	(66, 16)
y_val: 	(66, 1)
X_test: (73, 16)
y_test: (73, 1)


# Part 2: Models