In [1]:
from model_evaluation import *
from model_train import *
from data_graphs import *
import matplotlib.pyplot as plt
from sklearn.model_selection import validation_curve
from sklearn.metrics import confusion_matrix
%matplotlib inline
import numpy as np

# set seed for cross-validation sampling
seed = 0

# set scoring function
scorer = make_scorer(balanced_accuracy)

# load datasets
p_wine = get_abspath('winequality.csv', 'data/experiments')
p_seismic = get_abspath('seismic-bumps.csv', 'data/experiments')
df_wine = pd.read_csv(p_wine)
df_seismic = pd.read_csv(p_seismic)
dfs = {'wine': df_wine, 'seismic': df_seismic}
dnames = ['wine', 'seismic']

# instantiate dict of estimators
estimators = {'KNN': None,
              'DT': None,
              'ANN': None,
              'SVM_RBF': None,
              'SVM_PLY': None,
              'Boosting': None}
mnames = ['KNN', 'DT', 'ANN', 'SVM_RBF', 'SVM_PLY', 'Boosting']

for df in dnames:
        X_train, X_test, y_train, y_test = split_data(dfs[df])
        
        # load pickled models into estimators dict
        for m in mnames:
            mfile = '{}/{}_grid.pkl'.format(m, df)
            model = load_pickled_model(get_abspath(mfile, filepath='models'))
            estimators[m] = model

        for name, estimator in estimators.iteritems():
            if name == 'DT':
                y_pred = estimator.best_estimator_.predict(X_test)
                cm = confusion_matrix(y_test, y_pred).astype('int')
                save_path = get_abspath('{}_cm.txt'.format(df), 'results/{}'.format(name))
                np.savetxt(save_path, cm, fmt='%d', delimiter=',')