In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.metrics import roc_curve, roc_auc_score

nba_ = pd.read_csv("/Users/ncapofari/Documents/CUNY_SPS/Capstone/%s" 
                  % "nba_scraper/data_sets/4factor_model_data.csv")

train_set, test_set = train_test_split(nba_, 
                                       test_size=0.2, 
                                       random_state=23)
train_setG1 = train_set[train_set['Seed_Diff'] > 1]

nba_train = train_setG1["Pyt_Win_Pct_Diff"].copy().values.reshape(-1, 1)
nba_labels = train_setG1["Won_Series"].copy()
nba_labels = (nba_labels == 1)

pipeline = Pipeline([
    ('std_scaler', StandardScaler())
])

nba_prepared = pipeline.fit_transform(nba_train, nba_labels)
svc_clf = SVC(C=5000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
svc_clf.fit(nba_prepared, nba_labels)

test_setG1 = test_set[test_set['Seed_Diff'] > 1]
nba_test = test_setG1["Pyt_Win_Pct_Diff"].copy().values.reshape(-1, 1)

nba_test_labels = test_setG1["Won_Series"].copy()
nba_test_labels = (nba_test_labels == 1)

nba_test_prepared = pipeline.transform(nba_test)
y_test_pred = svc_clf.predict(nba_test_prepared)

print(confusion_matrix(nba_test_labels, y_test_pred))
print("precision score:", precision_score(nba_test_labels, y_test_pred))
fpr, tpr, thresholds = roc_curve(nba_test_labels, y_test_pred)
print("roc_auc:", roc_auc_score(nba_test_labels, y_test_pred))

[[ 5  1]
 [14 43]]
precision score: 0.9772727272727273
roc_auc: 0.7938596491228072


In [37]:
from sklearn.ensemble import RandomForestClassifier

train_setE = train_set[train_set['Seed_Diff'] == 1]
nba_train = train_setE[['eFG..Misc', 'TOV..Misc', 'FT.FGA.Misc', 
                        'DRB..Misc', 'ORB..Misc_._Opponent']]

nba_labels = train_setE["Won_Series"].copy()
nba_labels = (nba_labels == 1)

pipeline = Pipeline([
    ('std_scaler', StandardScaler()),
])

nba_prepared = pipeline.fit_transform(nba_train, nba_labels)
rfc_clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=23, verbose=0, warm_start=False)
rfc_clf.fit(nba_prepared, nba_labels)

test_setE = test_set[test_set['Seed_Diff'] == 1]
nba_test = test_setE[['eFG..Misc', 'TOV..Misc', 'FT.FGA.Misc', 
                      'DRB..Misc', 'ORB..Misc_._Opponent']]

nba_test_labels = test_setE["Won_Series"].copy()
nba_test_labels = (nba_test_labels == 1)

nba_test_prepared = pipeline.transform(nba_test)
y_test_pred = rfc_clf.predict(nba_test_prepared)

print(confusion_matrix(nba_test_labels, y_test_pred))
print("precision score:", precision_score(nba_test_labels, y_test_pred))
fpr, tpr, thresholds = roc_curve(nba_test_labels, y_test_pred)
print("roc_auc:", roc_auc_score(nba_test_labels, y_test_pred))

[[ 7  9]
 [13  4]]
precision score: 0.3076923076923077
roc_auc: 0.33639705882352944
