# Basic Games Regression
<a id="top"></a>

Treating games as the sum of each team's players stats.
0. [Helper Methods](#helpers)
1. [Score Differential Regressor](#score-diff)
    * [SVD regression](#score-diff-lr), [RFT regression](#score-diff-logr)
    * Only take raw game scores and betting lines
2. [Past Win% Differential Regressor](#winper)
    * [SVD regression](#winper-lr), [RFT regression](#winper-logr)
    * Take past win% over 10, 25, 50, 75, 100 games for each team
3. [Box Score Regressor](#boxscore)
    * Takes in overall box score for either team 

In [11]:
import pandas as pd
import numpy as np
import plotly.express as px

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR

In [12]:
pd.set_option('display.max_columns', 50)

## Helper Methods
[to top](#top)
<a id="helpers">

In [20]:
def SVGRegression(X_input,y_input):
    X_train, X_test, y_train, y_test = train_test_split(X_input, y_input, test_size=.1)

    print(f"X: {X_input.columns.tolist()}")
    print(f"y: {y_input.columns.tolist()}")
    
    reg = SVR().fit(X_train, np.ravel(y_train))
    y_pred = reg.predict(X_test)

    # The coefficients
    print("Coefficients: \n", reg.dual_coef_)
    # The mean squared error
    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
    # The coefficient of determination: 1 is perfect prediction
    print("Coefficient of determination: %.2f" % r2_score(y_test, y_pred))
    print()
    return 

## Win% Regressors
[to top](#top)

-  [linear regression](#winper-lr)
-  [logistic regression](#winper-logr)
<a id="winper">

In [36]:
df = pd.read_csv("data_proc/games_wp_master.csv")
df["SCORE_DIFF"] = df["TEAM1_SCORE"] - df["TEAM2_SCORE"]
df["T1_WON"] = -1* (df["WINNER"] - 2)
df["T2_WON"] = -1 * (df["T1_WON"] - 1)
# df["LOSER"] = ~ df["WINNER"]
df.dropna(inplace=True)
# display(df)
# display(df.columns)

df2 = pd.read_csv("data_proc/games_boxscores_master.csv")
df = df.merge(df2.reset_index(), on="GAME_ID")
df

Unnamed: 0.1,Unnamed: 0,GAME_ID,SBR_GAME_ID,DATE,TEAM1_ID_x,TEAM2_ID_x,TEAM1_SCORE_x,TEAM2_SCORE_x,WINNER_x,away_prob,away_spread,away_total,TEAM1_10wp,TEAM1_25wp,TEAM1_50wp,TEAM1_75wp,TEAM1_100wp,TEAM2_10wp,TEAM2_25wp,TEAM2_50wp,TEAM2_75wp,TEAM2_100wp,SCORE_DIFF,T1_WON,T2_WON,...,TEAM1_STL,TEAM1_BLK,TEAM1_TO,TEAM1_PF,TEAM1_PTS,TEAM1_PLUS_MINUS,TEAM2_FGM,TEAM2_FGA,TEAM2_FG_PCT,TEAM2_FG3M,TEAM2_FG3A,TEAM2_FG3_PCT,TEAM2_FTM,TEAM2_FTA,TEAM2_FT_PCT,TEAM2_OREB,TEAM2_DREB,TEAM2_REB,TEAM2_AST,TEAM2_STL,TEAM2_BLK,TEAM2_TO,TEAM2_PF,TEAM2_PTS,TEAM2_PLUS_MINUS
0,1399,21500895,20160301_nba_Atlanta_Golden State,20160301.0,1610612737,1610612744,105.0,109.0,2,0.338983,5.50,214.00,0.5,0.52,0.60,0.586667,0.61,0.9,0.88,0.92,0.880000,0.87,-4.0,0,1,...,9.0,5.0,17.0,17.0,105.0,-20.0,41.0,103.0,4.364,12.0,36.0,2.374,15.0,17.0,5.500,18.0,33.0,51.0,28.0,11.0,3.0,11.0,22.0,109.0,20.0
1,1425,21500929,20160305_nba_Atlanta_L.A. Clippers,20160305.0,1610612737,1610612746,107.0,97.0,1,0.338983,5.50,202.50,0.4,0.48,0.58,0.573333,0.62,0.7,0.72,0.66,0.720000,0.69,10.0,1,0,...,9.0,3.0,18.0,26.0,107.0,50.0,34.0,79.0,4.584,12.0,32.0,2.653,17.0,28.0,5.412,6.0,35.0,41.0,22.0,10.0,10.0,18.0,20.0,97.0,-50.0
2,1444,21500938,20160307_nba_Memphis_Cleveland,20160307.0,1610612763,1610612739,106.0,103.0,1,0.083333,15.00,198.50,0.7,0.76,0.60,0.560000,0.60,0.7,0.72,0.72,0.720000,0.73,3.0,1,0,...,16.0,8.0,16.0,26.0,106.0,15.0,36.0,80.0,4.312,7.0,29.0,1.554,24.0,29.0,4.490,13.0,36.0,49.0,23.0,9.0,7.0,25.0,22.0,103.0,-15.0
3,1477,21500974,20160312_nba_Memphis_Atlanta,20160312.0,1610612763,1610612737,83.0,95.0,2,0.090909,14.50,193.50,0.7,0.76,0.64,0.560000,0.59,0.5,0.52,0.56,0.600000,0.61,-12.0,0,1,...,10.0,5.0,16.0,14.0,83.0,-60.0,36.0,84.0,3.625,11.0,34.0,3.071,12.0,17.0,3.500,8.0,39.0,47.0,27.0,9.0,12.0,11.0,14.0,95.0,60.0
4,1485,21500985,20160313_nba_Milwaukee_Brooklyn,20160313.0,1610612749,1610612751,109.0,100.0,1,0.555556,-1.50,208.50,0.5,0.52,0.44,0.440000,0.49,0.3,0.24,0.28,0.320000,0.36,9.0,1,0,...,13.0,3.0,9.0,12.0,109.0,45.0,39.0,70.0,6.212,12.0,27.0,3.629,10.0,13.0,4.000,3.0,28.0,31.0,25.0,3.0,6.0,20.0,18.0,100.0,-45.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8103,9640,22300008,20231110_nba_Philadelphia_Detroit,20231110.0,1610612755,1610612765,114.0,106.0,1,0.794872,-8.50,224.50,0.7,0.72,0.68,0.720000,0.67,0.2,0.16,0.16,0.213333,0.22,8.0,1,0,...,13.0,5.0,8.0,23.0,114.0,40.0,40.0,86.0,5.408,8.0,28.0,2.076,18.0,22.0,3.589,11.0,37.0,48.0,28.0,3.0,5.0,16.0,26.0,106.0,-40.0
8104,9646,22300011,20231110_nba_New Orleans_Houston,20231110.0,1610612740,1610612745,101.0,104.0,2,0.439560,3.00,221.00,0.4,0.56,0.42,0.493333,0.49,0.7,0.44,0.32,0.320000,0.29,-3.0,0,1,...,6.0,6.0,18.0,26.0,101.0,-15.0,36.0,88.0,3.355,10.0,35.0,2.527,22.0,28.0,5.900,9.0,33.0,42.0,20.0,9.0,5.0,9.0,22.0,104.0,15.0
8105,9645,22300012,20231110_nba_Utah_Memphis,20231110.0,1610612762,1610612763,127.0,121.0,1,0.378072,5.00,230.25,0.2,0.32,0.38,0.386667,0.43,0.2,0.40,0.46,0.560000,0.55,6.0,1,0,...,10.0,12.0,19.0,17.0,127.0,30.0,46.0,101.0,4.159,19.0,48.0,3.126,10.0,13.0,3.000,14.0,23.0,37.0,23.0,5.0,5.0,11.0,23.0,121.0,-30.0
8106,9641,22300013,20231110_nba_Minnesota_San Antonio,20231110.0,1610612750,1610612759,117.0,110.0,1,0.749355,-7.25,225.00,0.6,0.56,0.54,0.520000,0.50,0.4,0.36,0.24,0.253333,0.31,7.0,1,0,...,6.0,4.0,18.0,19.0,117.0,35.0,43.0,92.0,4.505,12.0,34.0,2.807,12.0,19.0,2.278,7.0,31.0,38.0,29.0,9.0,8.0,14.0,21.0,110.0,-35.0


In [22]:
SVGRegression(df[['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp',
       'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp',
       'TEAM2_75wp', 'TEAM2_100wp']],
                df[["T1_WON"]])
                
SVGRegression(df[['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp', 'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp', 'TEAM2_75wp', 'TEAM2_100wp']],df[["T1_WON"]])

# fig = px.scatter(df, x="away_spread", y="SCORE_DIFF", hover_data=["SBR_GAME_ID"], trendline="ols")
# fig.show()


X: ['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp', 'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp', 'TEAM2_75wp', 'TEAM2_100wp']
y: ['T1_WON']
Coefficients: 
 [[-1.  1. -1. ... -1.  1. -1.]]
Mean squared error: 0.35
Coefficient of determination: -0.44

X: ['TEAM1_ID', 'TEAM2_ID', 'DATE', 'TEAM1_10wp', 'TEAM1_25wp', 'TEAM1_50wp', 'TEAM1_75wp', 'TEAM1_100wp', 'TEAM2_10wp', 'TEAM2_25wp', 'TEAM2_50wp', 'TEAM2_75wp', 'TEAM2_100wp']
y: ['T1_WON']
Coefficients: 
 [[ 1. -1. -1. ...  1.  1. -1.]]
Mean squared error: 0.36
Coefficient of determination: -0.46

