In [None]:
GPU = False  #set to True to use GPU for LightGBM 
OPTUNA = False #set to True to run Optuna first, false to use saved hyperparameters

## LightGBM Model Testing

In [None]:
import os

import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 500)

from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    roc_curve,
    confusion_matrix,
    classification_report,
)

from sklearn.model_selection import (
    StratifiedKFold, 
    TimeSeriesSplit,
)

import lightgbm as lgb
from lightgbm import (
    early_stopping,
    log_evaluation,
)
print('LGB version:', lgb.__version__)

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

import neptune.new as neptune
from neptune.new.integrations.lightgbm import (
    NeptuneCallback, 
    create_booster_summary,
)
import neptune.new.integrations.optuna as optuna_utils
from neptune.new.types import File

from tqdm import tqdm

import shap

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from src.common_functions import plot_confusion_matrix

from pathlib import Path  #for Windows/Linux compatibility
DATAPATH = Path(r'data')

import ipynbname

import warnings
warnings.simplefilter('ignore')


In [None]:
TRAIN_NAME = "train_selected.csv"
TEST_NAME = "test_selected.csv"

train = pd.read_csv(DATAPATH / TRAIN_NAME)
test = pd.read_csv(DATAPATH / TEST_NAME)


**Setup Neptuna.ai experiment tracking**

In [None]:
LOGGING_NOTE = ""

try:
    NEPTUNE_API_TOKEN = os.environ['NEPTUNE_API_TOKEN']
except:
    raise Exception('Set environment variable NEPTUNE_API_TOKEN')
    
PROJECT = "cmunch1/nba-prediction"
PROJECT_OPTUNA = "cmunch1/nba-optuna" #for 2nd run if hyperparameters are tuned
SOURCE = ipynbname.name()
SOURCE_SPLIT = "03_train_test_split.ipynb"
SOURCE_ENG = "05_feature_engineering.ipynb"
SOURCE_SEL = "06_feature_selection.ipynb"
    
run = neptune.init(
    project=PROJECT,
    source_files=[SOURCE,SOURCE_SPLIT,SOURCE_ENG,SOURCE_SEL],
    api_token=NEPTUNE_API_TOKEN,
)
neptune_callback = NeptuneCallback(run=run)

**Logging Note**

In [None]:
run["note"] = LOGGING_NOTE
run["sys/tags"].add(["lightgbm",])
run['dataset/train'] = TRAIN_NAME
run['dataset/test'] = TEST_NAME

**Options**

In [None]:
run["model/parameters/GPU"] = GPU
run["model/parameters/OPTUNA"] = OPTUNA

if OPTUNA:
    run["model/optuna/optuna_cv"] = OPTUNA_CV = "TimeSeriesSplit"
    #run["model/parameters/optuna_cv"] = OPTUNA_CV = "StratifiedKFold"    
    run["model/optuna/optuna_folds"] = OPTUNA_FOLDS = 5
    run["model/optuna/optuna_trials"] = OPTUNA_TRIALS = 50

run["model/parameters/k_folds"] = K_FOLDS = 5
run["model/parameters/seed"] = SEED = 13
#run["model/parameters/num_boost_round"] = NUM_BOOST_ROUND = 700
#run["model/parameters/enable_categorical"] = ENABLE_CATEGORICAL = True
run["model/parameters/early_stopping"] = EARLY_STOPPING = 200 

LOG_EVALUATION = 100
VERBOSITY = 0

STATIC_PARAMS = {
                'seed': SEED,
                'verbosity': -1,           
                'boosting_type': 'gbdt',
                'objective': 'binary',
                'metric': 'auc', 
                }

GPU_PARAMS = {
            'device': 'gpu',
            'gpu_platform_id': 0,
            'gpu_device_id': 0,
             }

if GPU:
    STATIC_PARAMS = STATIC_PARAMS | GPU_PARAMS


**Fix Datatypes for smaller memory footprint**

In [None]:
def fix_datatypes(df):
    df['GAME_DATE_EST'] = pd.to_datetime(df['GAME_DATE_EST'])

    long_integer_fields = ['GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON']

    #convert long integer fields to int32 from int64
    for field in long_integer_fields:
        df[field] = df[field].astype('int32')
    
    #convert the remaining int64s to int8
    for field in df.select_dtypes(include=['int64']).columns.tolist():
        df[field] = df[field].astype('int8')
        
    #convert float64s to float16s
    for field in df.select_dtypes(include=['float64']).columns.tolist():
        df[field] = df[field].astype('float16')
        
    return df

train = fix_datatypes(train)
test = fix_datatypes(test)


**Encode categoricals**

In [None]:
# To use special category feature capabalities in XGB and LGB, categoricals must be ints from 0 to N-1
# Conversion can be accomplished by simple subtraction for several features
# (these capabilities may or may not be used, but encoding does not hurt anything)

def encode_categoricals(df):
    first_team_ID = df['HOME_TEAM_ID'].min()
    first_season = df['SEASON'].min()

 
    df['HOME_TEAM_ID'] = (df['HOME_TEAM_ID'] - first_team_ID).astype('int8')
    df['VISITOR_TEAM_ID'] = (df['VISITOR_TEAM_ID'] - first_team_ID).astype('int8')
    df['SEASON'] = (df['SEASON'] - first_season).astype('int8')
    
    return df

train = encode_categoricals(train)
test = encode_categoricals(test)



**Select Features**

In [None]:
target = train['TARGET']
test_target = test['TARGET']

category_columns = ['HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON', 'HOME_TEAM_WINS', 'PLAYOFF', 'CONFERENCE_x', 'CONFERENCE_y',]

all_columns = train.columns.tolist()
drop_columns = ['TARGET', 'GAME_DATE_EST', 'GAME_ID',] 


use_columns = [item for item in all_columns if item not in drop_columns]

train = train[use_columns]
test = test[use_columns]


run["model/features"].log(use_columns)



### Optuna Hyperparameter Tuning

In [None]:
def objective(trial):

    train_oof = np.zeros((train.shape[0],))
    
    
    lgb_params= {
                "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
                "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
                "learning_rate": trial.suggest_loguniform('learning_rate', 1e-4, 0.5),
                "max_depth": trial.suggest_categorical('max_depth', [5,10,20,40,100, -1]),
                "n_estimators": trial.suggest_int("n_estimators", 50, 200000),
                "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
                "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
                "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
                "num_leaves": trial.suggest_int("num_leaves", 2, 1000),
                "min_child_samples": trial.suggest_int("min_child_samples", 5, 300),
                "cat_smooth" : trial.suggest_int('min_data_per_groups', 1, 100)
                }

    lgb_params = lgb_params | STATIC_PARAMS
        
    #pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
    
    if OPTUNA_CV == "StratifiedKFold": 
        kf = StratifiedKFold(n_splits=OPTUNA_FOLDS, shuffle=True, random_state=SEED)
    elif OPTUNA_CV == "TimeSeriesSplit":
        kf = TimeSeriesSplit(n_splits=OPTUNA_FOLDS)
    

    for f, (train_ind, val_ind) in (enumerate(kf.split(train, target))):

        train_df, val_df = train.iloc[train_ind], train.iloc[val_ind]
        
        train_target, val_target = target[train_ind], target[val_ind]

        train_lgbdataset = lgb.Dataset(train_df, label=train_target,categorical_feature=category_columns)
        val_lgbdataset = lgb.Dataset(val_df, label=val_target, reference = train_lgbdataset, categorical_feature=category_columns)


        model =  lgb.train(lgb_params, 
                           train_lgbdataset,
                           valid_sets=val_lgbdataset,
                           #num_boost_round = NUM_BOOST_ROUND,
                           callbacks=[#log_evaluation(LOG_EVALUATION),
                                      early_stopping(EARLY_STOPPING,verbose=False),
                                      #pruning_callback,
                                    ]               
                           #verbose_eval= VERBOSE_EVAL,
                          )

        temp_oof = model.predict(val_df)

        train_oof[val_ind] = temp_oof

        #print(roc_auc_score(val_target, temp_oof))
    
    val_score = roc_auc_score(target, train_oof)
    
    return val_score

In [None]:
def run_optuna():
    
     #log separate Neptune run for optuna hyperameter tuning
    run2 = neptune.init(
                    project=PROJECT_OPTUNA,
                    source_files=[SOURCE,SOURCE_SPLIT,SOURCE_ENG,SOURCE_SEL],
                    api_token=NEPTUNE_API_TOKEN,
                    )
    run2["options/optuna_cv"] = OPTUNA_CV 
    run2["options/optuna_folds"] = OPTUNA_FOLDS 
    run2["options/optuna_trials"] = OPTUNA_TRIALS 
    run2["options/GPU"] = GPU
    #run2["options/enable_categorical"] = ENABLE_CATEGORICAL
    run2["features"].log(use_columns)
    run2["sys/tags"].add(["lightgbm",])
    

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials = OPTUNA_TRIALS,)

    optuna_utils.log_study_metadata(study, run2)
    
    print("Study Best Value:",study.best_value)
    print("Study Best Params:",study.best_params)
    
    plot_optimization_history(study)
    
    plot_param_importances(study)
    
    run2["best_value"] = study.best_value
    run2["best_params"] = study.best_params
    run2["static_params"] = STATIC_PARAMS
    
    run2.stop()
    
    return study.best_params
    
    

**Set Hyperparameters**

In [None]:
if OPTUNA:
    tuned_params = run_optuna()
else:
    tuned_params = {
                #current best hyperparameters from previous tuning:     
                'lambda_l1': 1.795338637297326e-08, 
                'lambda_l2': 0.004705909102689521, 
                'learning_rate': 0.0514056673966814, 
                'max_depth': 100, 
                'n_estimators': 136307, 
                'feature_fraction': 0.707667657054092, 
                'bagging_fraction': 0.5528109129804049, 
                'bagging_freq': 1, 
                'num_leaves': 316, 
                'min_child_samples': 300, 
                'min_data_per_groups': 88,
                }

lgb_params= STATIC_PARAMS | tuned_params   

run["model/params"] = lgb_params


**Setup Results table**

Store key metrics for easy review later

In [None]:
results = pd.DataFrame() #record metrics for easy comparison at the end

#Load Simple Model results for later comparison
def SimpleModel(test, true):
    predict = test['HOME_W_PCT_x'].apply(lambda x: 0 if x < 0.50 else 1)
    acc_score = accuracy_score(true, predict)
    auc_score = roc_auc_score(true, predict)
    
    return acc_score , auc_score

acc_score , auc_score = SimpleModel(test, test_target)
df = {'Label': 'Simple Model', 'Accuracy': acc_score, 'AUC': auc_score, 'Threshold':"N/A"}
results = results.append(df, ignore_index = True) 
 

### Train

In [None]:
def get_scores(target,preds):
    #for accuracy score, prediction probabilities must be convert to binary scores (Win or Lose)
    #determine optimum threshold for conveting probablities using ROC curve
    #generally 0.5 works for balanced data
    #fpr = false positive rate, tpr = true postive rate
    
    fpr, tpr, thresholds = roc_curve(target,preds)
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    preds_binary = (preds > optimal_threshold).astype(int)
    
    acc_score = accuracy_score(target, preds_binary)
    auc_score = roc_auc_score(target, preds)

    print()
    print("Scores:")
    print()
    print("Accuracy Score:", acc_score)
    print("AUC Score:", auc_score)
    print("Optimal Threshold:", optimal_threshold)
    
    return preds_binary, acc_score, auc_score, optimal_threshold

In [None]:
%%time

#initialize oof arrays including Shapley values 
train_oof = np.zeros((train.shape[0],))
train_oof_shap = np.zeros((train.shape[0],train.shape[1]+1))
#train_oof_shap_interact = np.zeros((train.shape[0],train.shape[1]+1,train.shape[1]+1))


# K-fold cross validation

kf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True, random_state=SEED)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(train, target))):
    
    train_df, val_df = train.iloc[train_ind], train.iloc[val_ind]
    train_target, val_target = target[train_ind], target[val_ind]

    train_lgbdataset = lgb.Dataset(train_df, label=train_target, categorical_feature=category_columns)
    val_lgbdataset = lgb.Dataset(val_df, label=val_target, reference = train_lgbdataset, categorical_feature=category_columns)

    model =  lgb.train(lgb_params, 
                       train_lgbdataset,
                       valid_sets=val_lgbdataset,
                       #num_boost_round = NUM_BOOST_ROUND,
                       callbacks=[log_evaluation(LOG_EVALUATION),
                                  early_stopping(EARLY_STOPPING,verbose=False),
                                  neptune_callback],
                       #verbose_eval= VERBOSE_EVAL,
                      )

    temp_oof = model.predict(val_df)
    temp_oof_shap = model.predict(val_df, pred_contrib=True)
    #temp_oof_shap_interact = model.predict(val_df, pred_interactions=True)

    train_oof[val_ind] = temp_oof

    train_oof_shap[val_ind, :] = temp_oof_shap
    #train_oof_shap_interact[val_ind, :,:] = temp_oof_shap_interact
    
    temp_oof_binary, acc_score, auc_score, optimal_threshold = get_scores(val_target, temp_oof)

    
# Out-of-Fold composite for train data

train_oof_binary, acc_score, auc_score, optimal_threshold = get_scores(target,train_oof)

#neptune.ai logging    

run["train/accuracy"] = acc_score = accuracy_score(target, train_oof_binary)
run["train/AUC"] = auc_score = roc_auc_score(target, train_oof)
run["train/optimal_threshold"] = optimal_threshold
                                                          
df = {'Label': 'Train', 'Accuracy': acc_score, 'AUC': auc_score, 'Threshold':optimal_threshold}
results = results.append(df, ignore_index = True)      

**OOF Confusion Matrix**

In [None]:
cm = confusion_matrix(target, train_oof_binary)
print(cm)
fig = plot_confusion_matrix(cm,['win','lose'])
run["train/confusion_matrix"].upload(fig)


**OOF Classification report**

In [None]:
run["train/classification_report"] = classification_report(target, train_oof_binary)
print(classification_report(target, train_oof_binary))

**Train Feature Importance via Split - the number of times a feature is used in the model**

**Train Feature Importance via Gain - the average gain of splits which use the feature**

In [None]:
max_features = 25
max_title = 'Top ' + str(max_features) + ' Feature importance - Gain'
fig, ax = plt.subplots(1,1,figsize=(10,10))
lgb.plot_importance(model, importance_type='gain', max_num_features=max_features, title=max_title ,ax=ax)
run["train/feature_importance_gain"].upload(fig)

**OOF Feature Importance via Shapley values**

In [None]:
# summarize the effects of all the features
fig, ax = plt.subplots(1,1,figsize=(10,10))
shap.summary_plot(train_oof_shap[:,:-1], train)
run["train/shapley_summary"].upload(fig)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,10))
shap.summary_plot(train_oof_shap[:,:-1], train[use_columns], plot_type="bar")
run["train/shapley_summary_bar"].upload(fig)

### Test Data Evaluation

In [None]:

test_preds = model.predict(test)
test_preds_shap = model.predict(test, pred_contrib=True)

test_preds_binary, acc_score, auc_score, optimal_threshold = get_scores(test_target, test_preds)

run["test/accuracy"] = acc_score = accuracy_score(test_target, test_preds_binary)
run["test/AUC"] = auc_score = roc_auc_score(test_target, test_preds)
run["test/optimal_threshold"] = optimal_threshold

df = {'Label': 'Test', 'Accuracy': acc_score, 'AUC': auc_score, 'Threshold':optimal_threshold}
results = results.append(df, ignore_index = True)  

**Test Confusion Matrix**

In [None]:
cm = confusion_matrix(test_target, test_preds_binary)
print(cm)
fig = plot_confusion_matrix(cm,['win','lose'])
run["test/confusion_matrix"].upload(fig)


**Test Classification report**

In [None]:
run["test/classification_report"] = classification_report(test_target, test_preds_binary)
print(classification_report(test_target, test_preds_binary))

**Test Feature Importance via Shapley values**

For comparison to cross-validation OOF Shapley values to ensure that the model is working in similar manner on the test data as train data

In [None]:
# summarize the effects of all the features
fig, ax = plt.subplots(1,1,figsize=(10,10))
shap.summary_plot(test_preds_shap[:,:-1], test)
run["test/shapley_summary"].upload(fig)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,10))
shap.summary_plot(test_preds_shap[:,:-1], test[use_columns], plot_type="bar")
run["test/shapley_summary_bar"].upload(fig)

## Model Evaluation

 - Compare Simple model predictions vs ML Test data predictions
 - Compare OOF/Train data vs Test/Validation data
 - Compare early season Test data vs later season Test data
 
 Feature importances via Shapley values are *local* to the given dataset and can assist in adversarial validation

**Split Test data**

In [None]:
test = pd.read_csv(DATAPATH / TEST_NAME)
test = fix_datatypes(test)
test = encode_categoricals(test)

SPLIT = pd.to_datetime("2022-01-01")

run["test_split_1/end_date"] = SPLIT
run["test_split_2/start_date"] = SPLIT

test1 = test[test['GAME_DATE_EST'] < SPLIT]
test2 = test[test['GAME_DATE_EST'] >= SPLIT]

test1_target = test1['TARGET']
test2_target = test2['TARGET']

test1 = test1[use_columns]
test2 = test2[use_columns]

**Early season results**

In [None]:
def process_splits(label, test, test_target, results):

    test_preds = model.predict(test)
    test_preds_shap = model.predict(test, pred_contrib=True)

    test_preds_binary, acc_score, auc_score, optimal_threshold = get_scores(test_target, test_preds)

    run["test_split_" + label + "/accuracy"] = acc_score 
    run["test_split_" + label + "/AUC"] = auc_score 
    run["test_split_" + label + "/optimal_threshold"] = optimal_threshold

    df = {'Label': label, 'Accuracy': acc_score, 'AUC': auc_score, 'Threshold':optimal_threshold}
    results = results.append(df, ignore_index = True) 

    run["test_split_" + label + "/classification_report"] = classification_report(test_target, test_preds_binary)
    print(classification_report(test_target, test_preds_binary))
    
    fig, ax = plt.subplots(1,1,figsize=(10,10))
    shap.summary_plot(test_preds_shap[:,:-1], test, plot_type="bar")
    run["test_split_" + label + "/shapley_summary_bar"].upload(fig)

    #Simple model applied to split
    acc_score, auc_score = SimpleModel(test, test_target)
    df = {'Label': 'Simple-' + label, 'Accuracy': acc_score, 'AUC': auc_score, 'Threshold':"N/A"}
    results = results.append(df, ignore_index = True) 
    
    return test_preds_shap, results
    
print("TEST1")
test_preds_shap1, results = process_splits('Test1',test1, test1_target, results)
print("TEST2")
test_preds_shap2, results = process_splits('Test2',test2, test2_target, results)


**Summary Table**

Key metrics from Simple Model, Train, Test, and Test-split

In [None]:
run["evaluation/summary_table"].upload(File.as_html(results))
results

**Train vs Test Feature Importances via Shapley Values**

In [None]:
fig = plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
shap.summary_plot(train_oof_shap[:,:-1], train[use_columns], plot_type="bar", plot_size=None, show=False)
plt.subplot(1,2,2)
shap.summary_plot(test_preds_shap[:,:-1], test1[use_columns], plot_type="bar", plot_size=None, show=False)
plt.tight_layout()
fig.show()
run["evaluation/test_train_shapley_summary_bar"].upload(fig)

**Test1 vs Test2 Feature Importances via Shapley Values**

In [None]:
fig = plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
shap.summary_plot(test_preds_shap1[:,:-1], test1[use_columns], plot_type="bar", plot_size=None, show=False)
plt.subplot(1,2,2)
shap.summary_plot(test_preds_shap2[:,:-1], test1[use_columns], plot_type="bar", plot_size=None, show=False)
plt.tight_layout()
fig.show()
run["evaluation/test1_test2_shapley_summary_bar"].upload(fig)

**End experiment tracking**

In [None]:
#end experiment tracking
run.stop()