# Basic Games Regression
<a id="top"></a>

Treating games as the sum of each team's players stats.
0. [Helper Methods](#helpers)
1. [Score Differential Regressor](#score-diff)
    * [SVD regression](#score-diff-lr), [RFT regression](#score-diff-logr)
    * Only take raw game scores and betting lines
2. [Past Win% Differential Regressor](#winper)
    * [SVD regression](#winper-lr), [RFT regression](#winper-logr)
    * Take past win% over 10, 25, 50, 75, 100 games for each team
3. [Box Score Regressor](#boxscore)
    * Takes in overall box score for either team 

In [11]:
import pandas as pd
import numpy as np
import plotly.express as px

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR

In [12]:
pd.set_option('display.max_columns', 50)

## Helper Methods
[to top](#top)
<a id="helpers">

In [20]:
def SVGRegression(X_input,y_input):
    X_train, X_test, y_train, y_test = train_test_split(X_input, y_input, test_size=.1)

    print(f"X: {X_input.columns.tolist()}")
    print(f"y: {y_input.columns.tolist()}")
    
    reg = SVR().fit(X_train, np.ravel(y_train))
    y_pred = reg.predict(X_test)

    # The coefficients
    print("Coefficients: \n", reg.dual_coef_)
    # The mean squared error
    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
    # The coefficient of determination: 1 is perfect prediction
    print("Coefficient of determination: %.2f" % r2_score(y_test, y_pred))
    print()
    return 

## Win% Regressors
[to top](#top)

-  [linear regression](#winper-lr)
-  [logistic regression](#winper-logr)
<a id="winper">

In [30]:
df = pd.read_csv("data_proc/games_wp_master.csv")
df["SCORE_DIFF"] = df["TEAM1_SCORE"] - df["TEAM2_SCORE"]
df["T1_WON"] = -1* (df["WINNER"] - 2)
df["T2_WON"] = -1 * (df["T1_WON"] - 1)
# df["LOSER"] = ~ df["WINNER"]
df.dropna(inplace=True)
# display(df)
# display(df.columns)

df2 = pd.read_csv("data_proc/games_boxscores_master.csv")
df = df.merge(df2, on="GAME_ID").reset_index()
df2

Unnamed: 0,GAME_ID,TEAM1_ID,TEAM2_ID,TEAM1_SCORE,TEAM2_SCORE,WINNER,TEAM1_FGM,TEAM1_FGA,TEAM1_FG_PCT,TEAM1_FG3M,TEAM1_FG3A,TEAM1_FG3_PCT,TEAM1_FTM,TEAM1_FTA,TEAM1_FT_PCT,TEAM1_OREB,TEAM1_DREB,TEAM1_REB,TEAM1_AST,TEAM1_STL,TEAM1_BLK,TEAM1_TO,TEAM1_PF,TEAM1_PTS,TEAM1_PLUS_MINUS,TEAM2_FGM,TEAM2_FGA,TEAM2_FG_PCT,TEAM2_FG3M,TEAM2_FG3A,TEAM2_FG3_PCT,TEAM2_FTM,TEAM2_FTA,TEAM2_FT_PCT,TEAM2_OREB,TEAM2_DREB,TEAM2_REB,TEAM2_AST,TEAM2_STL,TEAM2_BLK,TEAM2_TO,TEAM2_PF,TEAM2_PTS,TEAM2_PLUS_MINUS
0,21600885,1610612737,1610612738,114.0,98.0,1,46.0,95.0,5.431,6.0,25.0,1.819,16.0,20.0,5.556,13.0,42.0,55.0,22.0,10.0,10.0,14.0,19.0,114.0,80.0,34.0,87.0,6.174,10.0,34.0,3.320,20.0,25.0,5.166,9.0,31.0,40.0,21.0,9.0,7.0,18.0,20.0,98.0,-80.0
1,21900278,1610612741,1610612757,103.0,107.0,2,36.0,94.0,4.096,14.0,38.0,2.860,17.0,25.0,4.950,11.0,36.0,47.0,24.0,8.0,2.0,11.0,22.0,103.0,-20.0,38.0,84.0,3.530,10.0,30.0,1.706,21.0,28.0,5.135,8.0,41.0,49.0,20.0,6.0,11.0,13.0,21.0,107.0,20.0
2,11200117,1610612756,1610612743,88.0,72.0,1,36.0,87.0,3.739,9.0,23.0,2.916,7.0,15.0,3.000,8.0,33.0,41.0,23.0,9.0,10.0,15.0,18.0,88.0,0.0,31.0,85.0,3.619,2.0,11.0,0.500,8.0,12.0,3.334,15.0,39.0,54.0,19.0,9.0,6.0,26.0,18.0,72.0,0.0
3,21400829,1610612738,1610612747,111.0,118.0,2,42.0,98.0,4.165,8.0,33.0,1.029,19.0,27.0,3.738,13.0,35.0,48.0,27.0,9.0,5.0,16.0,31.0,111.0,-35.0,43.0,84.0,4.660,7.0,18.0,2.100,25.0,38.0,7.084,6.0,40.0,46.0,22.0,8.0,4.0,17.0,27.0,118.0,35.0
4,41900313,1610612747,1610612743,106.0,114.0,2,,83.0,5.294,6.0,26.0,2.483,14.0,22.0,2.900,4.0,21.0,25.0,27.0,11.0,3.0,16.0,26.0,106.0,-40.0,,73.0,4.148,11.0,29.0,2.733,23.0,29.0,5.154,9.0,35.0,44.0,26.0,8.0,2.0,18.0,21.0,114.0,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15358,21800513,1610612752,1610612749,96.0,112.0,2,36.0,97.0,3.364,13.0,31.0,2.970,11.0,16.0,4.500,10.0,33.0,43.0,21.0,8.0,6.0,10.0,20.0,96.0,-80.0,42.0,93.0,3.538,11.0,32.0,3.150,17.0,21.0,4.714,11.0,46.0,57.0,20.0,4.0,11.0,15.0,14.0,112.0,80.0
15359,21700136,1610612737,1610612739,117.0,115.0,1,41.0,86.0,3.480,11.0,25.0,2.000,24.0,34.0,5.150,11.0,33.0,44.0,24.0,4.0,6.0,17.0,20.0,117.0,10.0,42.0,96.0,3.757,10.0,36.0,1.905,21.0,23.0,5.167,15.0,32.0,47.0,27.0,6.0,6.0,13.0,29.0,115.0,-10.0
15360,21400830,1610612755,1610612748,108.0,119.0,2,40.0,84.0,5.429,13.0,31.0,1.804,15.0,28.0,3.550,9.0,24.0,33.0,27.0,10.0,8.0,12.0,18.0,108.0,-55.0,46.0,83.0,5.350,11.0,21.0,2.800,16.0,22.0,3.800,11.0,34.0,45.0,27.0,6.0,3.0,16.0,20.0,119.0,55.0
15361,21600644,1610612758,1610612763,91.0,107.0,2,31.0,76.0,5.252,6.0,29.0,1.484,23.0,32.0,7.581,5.0,30.0,35.0,18.0,9.0,6.0,10.0,21.0,91.0,-80.0,38.0,83.0,3.802,11.0,28.0,2.900,20.0,25.0,5.550,15.0,43.0,58.0,21.0,6.0,4.0,15.0,24.0,107.0,80.0


In [22]:
SVGRegression(df[['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp',
       'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp',
       'TEAM2_75wp', 'TEAM2_100wp']],
                df[["T1_WON"]])
                
SVGRegression(df[['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp', 'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp', 'TEAM2_75wp', 'TEAM2_100wp']],df[["T1_WON"]])

# fig = px.scatter(df, x="away_spread", y="SCORE_DIFF", hover_data=["SBR_GAME_ID"], trendline="ols")
# fig.show()


X: ['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp', 'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp', 'TEAM2_75wp', 'TEAM2_100wp']
y: ['T1_WON']
Coefficients: 
 [[-1.  1. -1. ... -1.  1. -1.]]
Mean squared error: 0.35
Coefficient of determination: -0.44

X: ['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp', 'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp', 'TEAM2_75wp', 'TEAM2_100wp']
y: ['T1_WON']
Coefficients: 
 [[ 1. -1. -1. ...  1.  1. -1.]]
Mean squared error: 0.36
Coefficient of determination: -0.46

