In [117]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

data = pd.read_csv("mvps.csv", index_col=0)

In [118]:
data

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,TOV,PF,PTS,Year,W,L,W/L%,GB,SRS,MVP Votes
0,A.C. Green,PF,27,LAL,82,21,26.4,3.1,6.6,0.476,...,1.2,1.4,9.1,1991,58,24,0.707,5.0,6.73,0.0
1,Byron Scott,SG,29,LAL,82,82,32.1,6.1,12.8,0.477,...,1.0,1.8,14.5,1991,58,24,0.707,5.0,6.73,0.0
2,Elden Campbell,PF,22,LAL,52,0,7.3,1.1,2.4,0.455,...,0.3,1.4,2.8,1991,58,24,0.707,5.0,6.73,0.0
3,Irving Thomas,PF,25,LAL,26,0,4.2,0.7,1.9,0.340,...,0.5,0.9,1.8,1991,58,24,0.707,5.0,6.73,0.0
4,James Worthy,SF,29,LAL,78,74,38.6,9.2,18.7,0.492,...,1.6,1.5,21.4,1991,58,24,0.707,5.0,6.73,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14087,Spencer Hawes,PF,28,MIL,54,1,14.8,2.5,5.1,0.484,...,0.9,1.4,6.2,2017,42,40,0.512,9.0,-0.45,0.0
14088,Steve Novak,PF,33,MIL,8,0,2.8,0.3,0.9,0.286,...,0.0,0.1,0.6,2017,42,40,0.512,9.0,-0.45,0.0
14089,Terrence Jones,PF,25,MIL,54,12,23.5,4.3,9.1,0.470,...,0.9,1.2,10.8,2017,42,40,0.512,9.0,-0.45,0.0
14090,Thon Maker,C,19,MIL,57,34,9.9,1.5,3.2,0.459,...,0.3,1.5,4.0,2017,42,40,0.512,9.0,-0.45,0.0


In [119]:
PREDICTORS = [p for p in data.columns if p not in ["Player", "Tm", "Pos", "MVP Votes", "Year"]]
TARGET = "MVP Votes"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [127]:
len(PREDICTORS)

31

In [120]:
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [121]:
def init_layers(inputs):
    layers = []
    for i in range(1, len(inputs)):
        layers.append([
            np.random.rand(inputs[i-1], inputs[i]) / 5 - .1,
            np.ones((1,inputs[i]))
        ])
    return layers

In [122]:
def forward(batch, layers):
    hidden = [batch.copy()]
    for i in range(len(layers)):
        batch = np.matmul(batch, layers[i][0]) + layers[i][1]
        hidden.append(batch.copy())
        if i < len(layers) - 1:
            batch = np.maximum(batch, 0)
        
    return layers, batch, hidden

In [125]:
def backward(layers, hidden, grad, lr):
    for i in range(len(layers)-1, -1, -1):
        print(f"Layer {i}")
        if i != len(layers) - 1:
            grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))
        
        grad = grad.T
        w_grad = np.matmul(grad, hidden[i]).T
        b_grad = grad.T
        
        layers[i][0] -= (w_grad + layers[i][0] * .01) * lr
        layers[i][1] -= b_grad * lr
        
        grad = np.matmul(layers[i][0], grad).T
    return layers

## Input

1,31

## Outputs

1,25
1,25
1,10
1,1

## Weights

31,25
25,25
25,10
10,1

In [124]:
layer_conf = [len(PREDICTORS),25,25,10,1]
lr = 2e-3
epochs=150

layers = init_layers(layer_conf)

for epoch in range(epochs+1):
    epoch_loss = 0

    for i, row in enumerate(train_x):
        row = row.copy().reshape((1,train_x.shape[1]))
        layers, pred, hidden = forward(row, layers)

        loss = mse_grad(train_y[i,0], pred[0,0])
        epoch_loss += loss ** 2

        layers = backward(layers, hidden, np.array(loss).reshape(1,1), lr/train_x.shape[0])
    
    if epoch % 10 == 0:
        _, valid_preds, _ = forward(valid_x, layers)
        
        print(f"{epoch} Train: {epoch_loss/train_x.shape[0]} Valid: {mse(valid_preds,valid_y)}")

0 Train: 4128.774670998684 Valid: 4249.708262993707
10 Train: 4117.575207061994 Valid: 4237.858833145232
20 Train: 4106.327294795145 Valid: 4225.761806960189
30 Train: 4092.155593829098 Valid: 4209.827975231823
40 Train: 4013.307422126987 Valid: 4115.69234151883
50 Train: 3643.752076622798 Valid: 3710.7376805342255
60 Train: 2747.351484633631 Valid: 2762.8483198174517
70 Train: 2135.0136452069582 Valid: 2018.1166520829229
80 Train: 1897.7670490074024 Valid: 1605.8168320528596
90 Train: 1766.3985398259247 Valid: 1403.487528818823
100 Train: 1686.8170039463964 Valid: 1355.6076306771417
110 Train: 1610.1019969181593 Valid: 1393.9442127672996
120 Train: 1564.1540836417548 Valid: 1409.47644696942
130 Train: 1513.679212457258 Valid: 1359.8473807505782
140 Train: 1469.0184524561232 Valid: 1341.8936802452797
150 Train: 1425.6213487045584 Valid: 1285.1308786375898


In [113]:
_, test_preds, _ = forward(test_x, layers)
mse(test_preds,test_y)

2634.684486894264

In [116]:
test = data.iloc[-len(test_x):].reset_index()
combined = pd.concat([test, pd.DataFrame(test_preds)], axis=1)
combined.columns = list(combined.columns[:-1]) + ["Pred Pts Won"]
combined.sort_values("Pts Won", ascending=False)

Unnamed: 0,index,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,PF,PTS,Year,W,L,W/L%,GB,SRS,Pts Won,Pred Pts Won
1805,13783,Kevin Garnett,PF,0.058795,MIN,1.171440,1.939334,1.880308,3.041269,2.741043,...,0.751097,2.662314,2004,1.428952,-1.250866,1.352002,-1.191189,1.305799,1219.0,454.762606
996,12974,LeBron James,PF,0.294511,MIA,0.932406,1.732928,1.732020,3.177385,2.352990,...,-0.577577,3.096417,2013,2.049151,-1.873114,1.981822,-1.191189,1.561916,1207.0,899.846397
202,12180,Derrick Rose,PG,-1.119785,CHI,1.131601,1.904933,1.682591,2.587551,2.762601,...,-0.215211,2.795884,2011,1.739052,-1.561990,1.666912,-1.191189,1.452464,1182.0,501.325824
1044,13022,LeBron James,SF,0.058795,MIA,0.374662,1.251312,1.692477,3.132013,2.590133,...,-0.456789,3.146506,2012,0.498653,-1.561990,1.287735,-1.191189,1.275153,1074.0,536.076789
356,12334,Tim Duncan,PF,-0.176921,SAS,1.131601,1.904933,1.870422,2.587551,2.223640,...,1.234252,2.512047,2003,1.584002,-1.406428,1.512671,-1.191189,1.259830,962.0,450.183500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,12692,DeSagana Diop,C,-1.119785,CLE,0.135628,-0.778353,-0.729553,-0.951449,-0.923894,...,0.267943,-0.994172,2004,-0.354121,0.538098,-0.447482,0.785303,-0.649002,0.0,0.198213
713,12691,Dajuan Wagner,SG,-1.591217,CLE,-0.342439,-0.743952,-0.423092,-0.270872,0.024678,...,0.147154,-0.292928,2004,-0.354121,0.538098,-0.447482,0.785303,-0.649002,0.0,0.090225
712,12690,Carlos Boozer,PF,-1.119785,CLE,0.892568,1.698526,1.405788,1.453256,1.102600,...,0.992675,1.209737,2004,-0.354121,0.538098,-0.447482,0.785303,-0.649002,0.0,-0.210942
711,12689,Zydrunas Ilgauskas,C,0.058795,CLE,1.131601,1.904933,0.951039,1.362513,1.512211,...,1.838195,1.493574,2003,-1.749570,1.938156,-1.861363,1.317436,-2.076248,0.0,-0.269048
