In [777]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import numpy as np
import seaborn as sns
import math
import pickle

In [778]:
'''
PTS: Points per game
FP: Fantasy points per game
FGA: Field goals attempted per game
FTM: Free throw makes per game
MIN: Minutes per game
rankings: NBA 2K rating
'''
# Importing CSV data into pandas dataframe
df = pd.read_csv("..\data\data.csv")
# Chose features that show linear behaviour when graphed against rankings
df = df[['PTS', 'FP', 'FGA', 'FTM', 'MIN', 'rankings']]
df.dropna()

Unnamed: 0,PTS,FP,FGA,FTM,MIN,rankings
0,14.4,31.9,12.4,2.4,32.5,80.0
1,9.5,19.3,8.5,1.1,24.5,76.0
2,6.3,11.1,4.8,0.9,15.8,71.0
3,2.9,5.5,2.5,0.2,10.2,68.0
4,3.0,6.3,2.8,0.3,11.2,71.0
...,...,...,...,...,...,...
2407,6.8,14.7,6.1,1.3,17.0,73.0
2408,13.9,25.7,12.5,1.4,31.7,77.0
2409,10.1,18.8,8.8,1.9,24.7,73.0
2410,16.1,33.3,13.1,3.2,32.4,84.0


In [779]:
X = np.array(df.drop(['rankings'], axis=1))
y = np.array(df['rankings'])

In [780]:
# Scaled features down to [-1,1]
scaler = preprocessing.StandardScaler()
scaled_X = scaler.fit_transform(X)

In [781]:
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.5)

In [782]:
clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression()

In [783]:
# Compute various success metrics for regression
r2 = r2_score(y_true=y_test, y_pred=clf.predict(X_test))
mse = mean_squared_error(y_true=y_test, y_pred=clf.predict(X_test))
rmse = math.sqrt(mse)
print(f"R2: {r2}, MSE: {mse}, RMSE: {rmse}")

R2: 0.881547999455247, MSE: 3.959240909539362, RMSE: 1.9897841364176572


In [784]:
with open('model.pkl', 'wb') as f:
    pickle.dump(clf, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)