# All-NBA Team Classification Model

Will use various classification algorithms(logisitic regression, KNN, SVM, etc) and evaluate performance on dataset.

In [19]:
import pandas as pd
import numpy as np 
import torch
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import sklearn
from sklearn import *
from sklearn.metrics import *
import pickle

## Load and analyze dataset

Preview features (player, games started, minutes played, etc.) and training examples (the players).

In [21]:
DATADIR = "DataSet.csv"
dataSet = pd.read_csv(DATADIR)
dataSet.head()

Unnamed: 0,Player,G,GPnS%,GPnSround%,GS,MP,FG,FGA,FG%,3P,...,STL,BLK,TOV,PF,PTS,PER,WS,BPM,VORP,All-NBA?
0,Precious Achiuwa,61,0.055556,0.0556,4,12.1,2.0,3.7,0.544,0.0,...,0.3,0.5,0.7,1.5,5.0,14.2,1.3,-4.5,-0.5,0
1,Jaylen Adams,7,0.0,0.0,0,2.6,0.1,1.1,0.125,0.0,...,0.0,0.0,0.0,0.1,0.3,-6.5,-0.1,-19.8,-0.1,0
2,Steven Adams,58,0.805556,0.8056,58,27.7,3.3,5.3,0.614,0.0,...,0.9,0.7,1.3,1.9,7.6,15.1,4.0,-0.8,0.5,0
3,Bam Adebayo,64,0.888889,0.8889,64,33.5,7.1,12.5,0.57,0.0,...,1.2,1.0,2.6,2.3,18.7,22.7,8.8,4.7,3.6,0
4,LaMarcus Aldridge,26,0.319444,0.3194,23,25.9,5.4,11.4,0.473,1.2,...,0.4,1.1,1.0,1.8,13.5,15.7,1.1,-0.6,0.2,0


Look at list of all features.

In [22]:
dataSet.columns

Index(['Player', 'G', 'GPnS%', 'GPnSround%', 'GS', 'MP', 'FG', 'FGA', 'FG%',
       '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'TRB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'PER', 'WS', 'BPM', 'VORP', 'All-NBA?'],
      dtype='object')

Remove player column from dataset and store it in numpy array.

In [23]:
players = dataSet.pop('Player')
players = np.array(players)

Examine players numpy array.

In [24]:
players

array(['Precious Achiuwa', 'Jaylen Adams', 'Steven Adams', ...,
       'Andre Drummond', 'Klay Thompson', 'Kyle Lowry'], dtype=object)

## Find correlation between features and target variable

Use a correlation matrix to determine the features with the strongest correlation to the target variable (All-NBA?). Look at the leftmost and rightmost column to analyze strongest features.

In [26]:
dataSet.corr()

Unnamed: 0,G,GPnS%,GPnSround%,GS,MP,FG,FGA,FG%,3P,3PA,...,STL,BLK,TOV,PF,PTS,PER,WS,BPM,VORP,All-NBA?
G,1.0,0.644048,0.64405,0.6518,0.677836,0.563086,0.545842,0.312437,0.426092,0.407417,...,0.501245,0.31506,0.464435,0.537767,0.556157,0.373455,0.649935,0.418482,0.445407,0.27589
GPnS%,0.644048,1.0,1.0,0.996531,0.829198,0.773304,0.757392,0.232433,0.520975,0.517485,...,0.647228,0.424666,0.681339,0.592467,0.768619,0.431505,0.749425,0.425806,0.645637,0.454324
GPnSround%,0.64405,1.0,1.0,0.996531,0.8292,0.773305,0.757393,0.232436,0.520978,0.517487,...,0.64723,0.424663,0.681341,0.59247,0.768621,0.431507,0.749424,0.425807,0.645635,0.454319
GS,0.6518,0.996531,0.996531,1.0,0.823447,0.770902,0.755349,0.229701,0.51247,0.509921,...,0.649246,0.426319,0.68198,0.591923,0.766773,0.434229,0.761546,0.428671,0.660347,0.47153
MP,0.677836,0.829198,0.8292,0.823447,1.0,0.885644,0.8935,0.266607,0.66582,0.68045,...,0.764768,0.424174,0.769628,0.733838,0.881998,0.477032,0.698219,0.485237,0.591943,0.402598
FG,0.563086,0.773304,0.773305,0.770902,0.885644,1.0,0.978258,0.320324,0.630389,0.636975,...,0.680876,0.415161,0.833224,0.608996,0.990561,0.626358,0.766775,0.546805,0.745419,0.551768
FGA,0.545842,0.757392,0.757393,0.755349,0.8935,0.978258,1.0,0.187148,0.714527,0.737432,...,0.699897,0.320167,0.844348,0.580166,0.983143,0.540082,0.693233,0.483639,0.691801,0.524672
FG%,0.312437,0.232433,0.232436,0.229701,0.266607,0.320324,0.187148,1.0,-0.030192,-0.091338,...,0.164176,0.419553,0.1566,0.353747,0.274616,0.739823,0.36468,0.674722,0.256115,0.127382
3P,0.426092,0.520975,0.520978,0.51247,0.66582,0.630389,0.714527,-0.030192,1.0,0.982699,...,0.49332,-0.013838,0.509614,0.326612,0.686357,0.263852,0.411678,0.348323,0.427656,0.297344
3PA,0.407417,0.517485,0.517487,0.509921,0.68045,0.636975,0.737432,-0.091338,0.982699,1.0,...,0.510657,-0.015015,0.543764,0.339993,0.693867,0.223661,0.383742,0.305194,0.411805,0.297626
