In [1]:
import logging
from nba_betting.logging.tools import logger
logger.setLevel(logging.INFO)

In [2]:
from nba_betting.model.training import preprocess_games
from nba_betting.model.classes import NBA_MODELS

TRAIN_YEAR = 2018
TEST_YEAR = 2019

X_train, y_train = preprocess_games(TRAIN_YEAR)
X_test, y_test = preprocess_games(TEST_YEAR)

INFO:nba_betting:Processing data for year 2018
INFO:nba_betting:Retrieved data for year 2018.
Columns: Index(['PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PLUS_MINUS', 'GAMES_PLAYED', 'OPP_PTS', 'OPP_FGM', 'OPP_FGA',
       'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A', 'OPP_FG3_PCT', 'OPP_FTM',
       'OPP_FTA', 'OPP_FT_PCT', 'OPP_OREB', 'OPP_DREB', 'OPP_REB', 'OPP_AST',
       'OPP_STL', 'OPP_BLK', 'OPP_TOV', 'OPP_PF', 'OPP_PLUS_MINUS',
       'OPP_GAMES_PLAYED'],
      dtype='object')
Target: 0    L
1    L
2    W
3    W
4    L
Name: WL, dtype: object
INFO:nba_betting:Processing data for year 2019
INFO:nba_betting:Retrieved data for year 2019.
Columns: Index(['PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PLUS_MINUS', 'GAMES_PLAYED', 'OPP_PTS', 'OPP_FGM', 'OPP_FGA',
       'OPP_FG

## Train a plain sklearn VotingClassifier

In [3]:
from itertools import combinations_with_replacement
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import GridSearchCV


models = [model() for model in NBA_MODELS.values()]
logger.info("Training ensemble with: %r", [model.model_name for model in models])

voting_cls = VotingClassifier(
    estimators=[(model.model_name, model) for model in models],
    voting="soft",
)

param_grid: Dict[str, List[Any]] = {
    "weights": list(
        set(
            tuple(np.divide(weight_combination, sum(weight_combination)))
            for weight_combination in combinations_with_replacement(
                [i for i in range(4)], len(models)
            )
            if sum(weight_combination) != 0
        )
    )
}
logger.info("Using grid search wih params: %r", param_grid)
grid_search: GridSearchCV = GridSearchCV(
    estimator=voting_cls, param_grid=param_grid, cv=5
)
grid_search = grid_search.fit(X_train, y_train)

logger.info(
    "Grid search complete. GridSearchCV best params: %r", grid_search.best_params_
)

logger.info("Training set score: %s", grid_search.score(X_train, y_train))
logger.info("Testing set score: %s", grid_search.score(X_test, y_test))


INFO:nba_betting:Training ensemble with: ['defence_logistic_regression', 'offence_logistic_regression', 'efficiency_logistic_regression']
INFO:nba_betting:Using grid search wih params: {'weights': [(0.14285714285714285, 0.42857142857142855, 0.42857142857142855), (0.2, 0.2, 0.6), (0.2, 0.4, 0.4), (0.0, 0.25, 0.75), (0.0, 0.4, 0.6), (0.25, 0.25, 0.5), (0.2857142857142857, 0.2857142857142857, 0.42857142857142855), (0.25, 0.375, 0.375), (0.3333333333333333, 0.3333333333333333, 0.3333333333333333), (0.16666666666666666, 0.3333333333333333, 0.5), (0.0, 0.3333333333333333, 0.6666666666666666), (0.0, 0.0, 1.0), (0.0, 0.5, 0.5)]}
INFO:nba_betting:Grid search complete. GridSearchCV best params: {'weights': (0.0, 0.4, 0.6)}
INFO:nba_betting:Training set score: 0.5545529122231337
INFO:nba_betting:Testing set score: 0.533470648815654


In [4]:
from itertools import combinations_with_replacement
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, RobustScaler
from sklearn.pipeline import Pipeline

from nba_betting.model.unsupervised_ensemble import UnsupervisedEnsemble


models = [model() for model in NBA_MODELS.values()]
logger.info("Training ensemble with: %r", [model.model_name for model in models])

correction_model = Pipeline(
    [
        ('scale', RobustScaler()),
        ('poly', PolynomialFeatures(degree=3)),
        ('linear', Ridge(fit_intercept=False))
    ]
)
weight_learner = Pipeline(
    [
        ('scale', RobustScaler()),
        ('poly', PolynomialFeatures(degree=3)),
        ('linear', Ridge(fit_intercept=False))
    ]
)

unsupervised_ensemble = UnsupervisedEnsemble(
    models=[_() for _ in NBA_MODELS.values()],
    correction_learner=correction_model,
    weight_learner=weight_learner,
)

unsupervised_ensemble.fit(X_train, y_train)

logger.info("Training set score: %s", accuracy_score(y_train, unsupervised_ensemble.predict(X_train)))
logger.info("Testing set score: %s", accuracy_score(y_test, unsupervised_ensemble.predict(X_test)))


INFO:nba_betting:Training ensemble with: ['defence_logistic_regression', 'offence_logistic_regression', 'efficiency_logistic_regression']
INFO:nba_betting:Training set score: 0.9942575881870386
INFO:nba_betting:Testing set score: 0.5010298661174047


In [8]:
from itertools import combinations_with_replacement
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, RobustScaler
from sklearn.pipeline import Pipeline

from nba_betting.model.unsupervised_ensemble import UnsupervisedEnsemble


models = [model() for model in NBA_MODELS.values()]
logger.info("Training ensemble with: %r", [model.model_name for model in models])

correction_model = Pipeline(
    [
        ('scale', RobustScaler()),
        ('linear', Ridge(fit_intercept=False))
    ]
)
weight_learner = Pipeline(
    [
        ('scale', RobustScaler()),
        ('linear', Ridge(fit_intercept=False))
    ]
)

unsupervised_ensemble = UnsupervisedEnsemble(
    models=[_() for _ in NBA_MODELS.values()],
    correction_learner=correction_model,
    weight_learner=weight_learner,
)

unsupervised_ensemble.fit(X_train, y_train)

logger.info("Training set score: %s", accuracy_score(y_train, unsupervised_ensemble.predict(X_train)))
logger.info("Testing set score: %s", accuracy_score(y_test, unsupervised_ensemble.predict(X_test)))


INFO:nba_betting:Training ensemble with: ['defence_logistic_regression', 'offence_logistic_regression', 'efficiency_logistic_regression']
INFO:nba_betting:Training set score: 0.610746513535685
INFO:nba_betting:Testing set score: 0.5386199794026777


In [5]:
from itertools import combinations_with_replacement
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, RobustScaler
from sklearn.pipeline import Pipeline

from nba_betting.model.unsupervised_ensemble import UnsupervisedEnsemble


models = [model() for model in NBA_MODELS.values()]
logger.info("Training ensemble with: %r", [model.model_name for model in models])

correction_model = Pipeline(
    [
        ('scale', RobustScaler()),
        ('poly', PolynomialFeatures(degree=2)),
        ('linear', Ridge(fit_intercept=False))
    ]
)
weight_learner = Pipeline(
    [
        ('scale', RobustScaler()),
        ('poly', PolynomialFeatures(degree=2)),
        ('linear', Ridge(fit_intercept=False))
    ]
)

unsupervised_ensemble = UnsupervisedEnsemble(
    models=[_() for _ in NBA_MODELS.values()],
    correction_learner=correction_model,
    weight_learner=weight_learner,
)

unsupervised_ensemble.fit(X_train, y_train)

logger.info("Training set score: %s", accuracy_score(y_train, unsupervised_ensemble.predict(X_train)))
logger.info("Testing set score: %s", accuracy_score(y_test, unsupervised_ensemble.predict(X_test)))


INFO:nba_betting:Training ensemble with: ['defence_logistic_regression', 'offence_logistic_regression', 'efficiency_logistic_regression']
INFO:nba_betting:Training set score: 0.77850697292863
INFO:nba_betting:Testing set score: 0.5597322348094748


In [6]:
from itertools import combinations_with_replacement
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, RobustScaler
from sklearn.pipeline import Pipeline

from nba_betting.model.unsupervised_ensemble import UnsupervisedEnsemble


models = [model() for model in NBA_MODELS.values()]
logger.info("Training ensemble with: %r", [model.model_name for model in models])

correction_model = Pipeline(
    [
        ('scale', RobustScaler()),
        ('forest', RandomForestRegressor())
    ]
)
weight_learner = Pipeline(
    [
        ('scale', RobustScaler()),
        ('forest', RandomForestRegressor())
    ]
)

unsupervised_ensemble = UnsupervisedEnsemble(
    models=[_() for _ in NBA_MODELS.values()],
    correction_learner=correction_model,
    weight_learner=weight_learner,
)

unsupervised_ensemble.fit(X_train, y_train)

logger.info("Training set score: %s", accuracy_score(y_train, unsupervised_ensemble.predict(X_train)))
logger.info("Testing set score: %s", accuracy_score(y_test, unsupervised_ensemble.predict(X_test)))


INFO:nba_betting:Training ensemble with: ['defence_logistic_regression', 'offence_logistic_regression', 'efficiency_logistic_regression']
INFO:nba_betting:Training set score: 0.9942575881870386
INFO:nba_betting:Testing set score: 0.5659114315139032


In [10]:
from itertools import combinations_with_replacement
from typing import Any, Dict, List

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, RobustScaler
from sklearn.pipeline import Pipeline

from nba_betting.model.unsupervised_ensemble import UnsupervisedEnsemble


models = [model() for model in NBA_MODELS.values()]
logger.info("Training ensemble with: %r", [model.model_name for model in models])

correction_model = GridSearchCV(
    estimator=RandomForestRegressor(),
    cv=3,
    param_grid={
        "n_estimators": [10, 50, 100, 200],
        "max_depth": [1, 2, 3],
        "ccp_alpha": [0, 0.05, 0.1],
    },
    n_jobs=4,
)
weight_learner = GridSearchCV(
    estimator=RandomForestRegressor(),
    cv=3,
    param_grid={
        "n_estimators": [10, 50, 100, 200],
        "max_depth": [1, 2, 3],
        "ccp_alpha": [0, 0.05, 0.1],
    },
    n_jobs=4,
)

unsupervised_ensemble = UnsupervisedEnsemble(
    models=[_() for _ in NBA_MODELS.values()],
    correction_learner=correction_model,
    weight_learner=weight_learner,
)

unsupervised_ensemble.fit(X_train, y_train)

logger.info("Training set score: %s", accuracy_score(y_train, unsupervised_ensemble.predict(X_train)))
logger.info("Testing set score: %s", accuracy_score(y_test, unsupervised_ensemble.predict(X_test)))


INFO:nba_betting:Training ensemble with: ['defence_logistic_regression', 'offence_logistic_regression', 'efficiency_logistic_regression']
INFO:nba_betting:Training set score: 0.6312551271534045
INFO:nba_betting:Testing set score: 0.5396498455200824
