In [2]:
from sklearn import model_selection
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from mlxtend.classifier import StackingCVClassifier
import warnings
import pickle
import pandas as pd
warnings.simplefilter('ignore')
RANDOM_SEED = 42

# Import your models first

In [3]:
# Logistic Regression
f = open('logreg.pckl', 'rb')
logreg = pickle.load(f)
f.close()

In [4]:
# Decision Tree
f = open('decisiontree.pckl', 'rb')
decisiontree = pickle.load(f)
f.close()

In [5]:
# Random Forest
f = open('randomforest.pckl', 'rb')
randomforest = pickle.load(f)
f.close()

In [6]:
# SVM
f = open('svm.pckl', 'rb')
svm = pickle.load(f)
f.close()

In [7]:
# Inspect models
print(logreg)
print(decisiontree)
print(randomforest)
print(svm)

LogisticRegression(random_state=42)
DecisionTreeClassifier(criterion='entropy', max_depth=6, max_features=45,
                       min_samples_leaf=75, min_samples_split=45, presort=None,
                       random_state=40)
RandomForestClassifier(criterion='entropy', min_samples_split=12,
                       n_estimators=200)
SVC(gamma='auto', kernel='linear')


## Reintroduce X and y

In [8]:
df = pd.read_csv("./Output/data_clean&processed_addedsentiment.csv")
X = df['goal'].values.reshape(-1,1)
# X = df.drop('state', axis=1)
y = df['state']

In [9]:
models = [('lr',logreg),('svm',svm),('decisiontree',decisiontree),('randomforest',randomforest)]
stacking = StackingClassifier(estimators=models)

# Create stacking model

In [10]:
sclf = StackingCVClassifier(classifiers=[decisiontree, logreg, randomforest, svm], 
                          meta_classifier=svm)

print('10-fold cross validation:\n')

for clf, label in zip([decisiontree, logreg, randomforest, sclf], 
                      ['Decision Tree',
                       'Log Reg', 
                        'Random Forest','Stacked Model']):

    sclf_scores = model_selection.cross_val_score(clf, X, y,
                                              cv=5, scoring='roc_auc')
    print("AUC: %0.4f (+/- %0.4f) [%s]" % (sclf_scores.mean(), sclf_scores.std(), label))

10-fold cross validation:

AUC: nan (+/- nan) [Decision Tree]
AUC: 0.6538 (+/- 0.0188) [Log Reg]
AUC: 0.6039 (+/- 0.0124) [Random Forest]
AUC: nan (+/- nan) [Stacked Model]
