In [42]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split,\
    GridSearchCV, cross_val_score
from sklearn.dummy import DummyClassifier 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder,\
    FunctionTransformer, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,\
    GradientBoostingClassifier, AdaBoostClassifier,\
    BaggingClassifier, ExtraTreesClassifier,\
    VotingClassifier, StackingClassifier
from sklearn.metrics import plot_confusion_matrix, recall_score,\
    accuracy_score, precision_score, f1_score

import xgboost

import pickle

from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImPipeline

#https://machinelearningmastery.com/threshold-moving-for-imbalanced-classification/
from sklearn.metrics import roc_curve
from numpy import sqrt, argmax

from model import *

In [43]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [44]:
model

<module 'model' from '/Users/zachrauch/Documents/Flatiron/Projects/Flu-Shot-Learning/Workspace/model.py'>

In [45]:
class ModelWithCV():
    '''Structure to save the model and more easily see its crossvalidation'''
    
    def __init__(self, model, model_name, X, y, cv_now=True):
        self.model = model
        self.name = model_name
        self.X = X
        self.y = y
        # For CV results
        self.cv_results = None
        self.cv_mean = None
        self.cv_median = None
        self.cv_std = None
        #
        if cv_now:
            self.cross_validate()
        
    def cross_validate(self, X=None, y=None, kfolds=10):
        '''
        Perform cross-validation and return results.
        
        Args: 
          X:
            Optional; Training data to perform CV on. Otherwise use X from object
          y:
            Optional; Training data to perform CV on. Otherwise use y from object
          kfolds:
            Optional; Number of folds for CV (default is 10)  
        '''
        
        cv_X = X if X else self.X
        cv_y = y if y else self.y

        self.cv_results = cross_val_score(self.model, cv_X, cv_y, cv=kfolds)
        self.cv_mean = np.mean(self.cv_results)
        self.cv_median = np.median(self.cv_results)
        self.cv_std = np.std(self.cv_results)

        
#     def print_cv_summary(self):
#         cv_summary = (
#         f'''CV Results for `{self.name}` model:
#             {self.cv_mean:.5f} ± {self.cv_std:.5f} accuracy
#             recall_score: {recall_score(self.y, self.model.predict(self.X))}
#             precision_score: {precision_score(self.y, self.model.predict(self.X))}
#             f1_score: {f1_score(self.y, self.model.predict(self.X))}    
#         ''')
#         print(cv_summary)
    def print_cv_summary(self):
        cv_summary = (
        f'''CV Results for `{self.name}` model:
            {self.cv_mean:.5f} ± {self.cv_std:.5f} accuracy
            recall_score: {cross_val_score(self.model, self.X, self.y, cv=10, scoring='recall').mean()}
            precision_score: {cross_val_score(self.model, self.X, self.y, cv=10, scoring='average_precision').mean()}
            f1_score: {cross_val_score(self.model, self.X, self.y, cv=10, scoring='f1').mean()}    
        ''')
        print(cv_summary)
        
    def plot_cv(self, ax):
        '''
        Plot the cross-validation values using the array of results and given 
        Axis for plotting.
        '''
        ax.set_title(f'CV Results for `{self.name}` Model')
        # Thinner violinplot with higher bw
        sns.violinplot(y=self.cv_results, ax=ax, bw=.4)
        sns.swarmplot(
                y=self.cv_results,
                color='orange',
                size=10,
                alpha= 0.8,
                ax=ax
        )

        return ax

In [46]:
import pickle
with open('tuned_logreg.sav', 'rb') as f:
    tlog = pickle.load(f)
tlog

<__main__.ModelWithCV at 0x7fde60077880>

In [47]:
tlog.print_cv_summary()

CV Results for `tlog_pipe` model:
            0.77751 ± 0.01195 accuracy
            recall_score: 0.7461571315057609
            precision_score: 0.8288517338281706
            f1_score: 0.7587960505203576    
        


In [48]:
tlog.model

Pipeline(steps=[('ct',
                 ColumnTransformer(transformers=[('subpipe_cat',
                                                  Pipeline(steps=[('cat_impute',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('ohe',
                                                                   OneHotEncoder(drop='first',
                                                                                 handle_unknown='ignore',
                                                                                 sparse=False))]),
                                                  Index(['behavioral_antiviral_meds', 'behavioral_avoidance',
       'behavioral_face_mask', 'behavioral_wash_hands',
       'behavioral_large_gatherings',...
                                                  Pipeline(steps=[('num_impute',
                                                            

In [None]:
import pickle
with open('tuned_knn.sav', 'rb') as f:
    tknn = pickle.load(f)
tknn

In [None]:
import pickle
with open('tuned_forest.sav', 'rb') as f:
    tforest = pickle.load(f)
tforest

In [None]:
import pickle
with open('tuned_gboost.sav', 'rb') as f:
    tgboost = pickle.load(f)
tgboost

In [None]:
import pickle
with open('tuned_ada.sav', 'rb') as f:
    tada = pickle.load(f)
tada

In [None]:
import pickle
with open('tuned_xgb.sav', 'rb') as f:
    txboost = pickle.load(f)
txboost

In [None]:
import pickle
with open('tuned_etc.sav', 'rb') as f:
    tetc = pickle.load(f)
tetc

In [None]:
import pickle
with open('stack_1.sav', 'rb') as f:
    stack_1 = pickle.load(f)
stack_1

In [None]:
import pickle
with open('stack_1.sav', 'rb') as f:
    stack_1 = pickle.load(f)
stack_1

In [None]:
import pickle
with open('stack_2.sav', 'rb') as f:
    stack_2 = pickle.load(f)
stack_2

In [None]:
import pickle
with open('stack_3.sav', 'rb') as f:
    stack_3 = pickle.load(f)
stack_3