In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics 
import matplotlib.pyplot as plt
from random import gauss
from random import seed
from scipy.signal import savgol_filter
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from collections import Counter
import time
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

from sklearn.ensemble import GradientBoostingClassifier

from scipy.optimize import fmin
from sklearn.metrics import roc_curve, roc_auc_score, auc
from scipy import interp
from scipy.stats import entropy
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.calibration import CalibratedClassifierCV

import signatory
import torch

# Helper Functions
def get_auc(df):
    auc_values = []
    for i in range(0, len(df.index)):
        row = df.iloc[i]
        x = df.columns.values.astype(float)
        y = row.values
        auc_values.append(metrics.auc(x, y))
    return pd.Series(auc_values, index=df.index)

def get_roc(labels, scores):    
    fpr, tpr, thresholds = roc_curve(labels, scores)  
    threshold = thresholds[np.argmax(tpr - fpr)]
    return fpr, tpr, thresholds, auc(fpr, tpr), threshold

def get_thresh(tpr,fpr,thresholds):
    youden = np.argmax(tpr - fpr)
    thresh = thresholds[youden]
    fpr_thresh = fpr[youden]
    tpr_thresh = tpr[youden]
    return thresh, fpr_thresh, tpr_thresh 

# Used to find nearest threshold
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    print('nearest difference:', min(np.abs(array - value))) # to make sure differences aren't too big
    return idx

# Ignore warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# Folder to save data

In [5]:
labels = ['S. agalactiae', 'S. gallolyticus', 'C. koseri', 'L. monocytogenes', 'H. influenzae', 'S. sanguinis', 'S. aureus', 'S. pneumoniae', 'E. faecium', 'E. coli'] 

In [14]:
# Convert time-series to signature features.

# These are background subtracted aligned to 0
all_dfs = pd.read_csv('./Data/all_dfs.csv', index_col=0)

def add_time(data):
    t = torch.broadcast_to(
        torch.linspace(0, 1, data.shape[1])[None, :, None],
        (data.shape[0], data.shape[1], 1)
    )

    out = torch.cat((t, data), axis=2)
    return out


BATCH = 8
df = torch.from_numpy(all_dfs.values)
short_df = df[:, 240:]

# Information-rich portion of melt curve
short_time_series = (
    short_df
    .reshape(short_df.shape[0], short_df.shape[1], 1)
)

melt_sigs = []

for i in range(0, short_time_series.shape[1], BATCH):
    ts = short_time_series[:, i:i+20, :]
    ts = add_time(ts)
    winsig = signatory.signature(
        ts, 
        3,
        basepoint=True
    )
    melt_sigs.append(winsig)

melt_sigs = torch.cat((melt_sigs), axis=1)
all_dfs_short = pd.DataFrame(data=melt_sigs.numpy(), index=all_dfs.index)


# Full melt curve
melt_sigs = []
time_series = (
    df
    .reshape(df.shape[0], df.shape[1], 1)
)

for i in range(0, time_series.shape[1], BATCH):
    ts = time_series[:, i:i+20, :]
    ts = add_time(ts)
    winsig = signatory.signature(
        ts, 
        3,
        basepoint=True
    )
    melt_sigs.append(winsig)

melt_sigs = torch.cat((melt_sigs), axis=1)
all_dfs = pd.DataFrame(data=melt_sigs.numpy(), index=all_dfs.index)

# Define Classifiers

In [15]:
# Logistic Regression
class_weights = [None, 'balanced']
class_weights = [None] # Don't use extra options for now
LR_classifiers = [LogisticRegression(random_state=42, multi_class="multinomial",solver='newton-cg', penalty='l2', class_weight=cw, n_jobs=-1) for cw in class_weights]  
LR_names = ['LogisticRegression, class_weights=%s' %cw for cw in class_weights] 
LR_names = ['LogisticRegression'] 

# Support Vector Machine (SVM)
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
kernels = ['rbf', 'linear'] # These are the best performing ones
SVM_classifiers = [SVC(kernel=kernel, probability=True, random_state=42) for kernel in kernels]  
SVM_names = ['SVM kernel=%s' %kernel for kernel in kernels] 

# Neural Network, Multi-layer Perceptron (MLP)
activation = ['identity', 'logistic', 'tanh', 'relu']
MLP_classifiers = [MLPClassifier(activation=act, random_state=42) for act in activation]  
MLP_names = ['Neural net activation=%s' %act for act in activation]  

# Random Forest
RandF_classifiers = [RandomForestClassifier(n_estimators=n, random_state=42, n_jobs=-1) for n in [50,100,500]]
RandF_names = ['RandF n=%s' %str(n) for n in [50,100,500]]

# Naive Bayes
NB_classifiers = [GaussianNB(), CalibratedClassifierCV(base_estimator=GaussianNB(), cv=5)]
NB_names = ['Gaussian NB', 'Gaussian NB, calibrated']

#kNN ED and DTW
# kNN_classifiers = [KNeighborsTimeSeriesClassifier(n_neighbors=1, metric='euclidean', n_jobs=-1), 
#                    KNeighborsTimeSeriesClassifier(n_neighbors=1, metric='euclidean', weights='distance', n_jobs=-1),
#                    KNeighborsTimeSeriesClassifier(n_neighbors=1, metric='softdtw', n_jobs=-1), 
#                    KNeighborsTimeSeriesClassifier(n_neighbors=1, metric='softdtw', weights='distance', n_jobs=-1)]
# kNN_names = ['1NN-ED', '1NN-ED weighted','1NN-softDTW', '1NN-softDTW weighted']

# Classifiers
classifiers = LR_classifiers + NB_classifiers + SVM_classifiers + MLP_classifiers + RandF_classifiers  #+ kNN_classifiers
classifiers_names = LR_names + NB_names + SVM_names + MLP_names + RandF_names  #+ kNN_names

# Main Method
1. 10x Leave-One-Out (10x LOO)
    1. probability 
    2. entropy 
2. Accumulate those 10 experiments to get novelty threshold 
    1. weights = None 
    2. each experiment is weighted to balance 0's and 1's 
    3. the accumulated experiment is weighted to balance 0's and 1's 
    4. each experiment is weighted by #novel curves (= size of leave-one-out species) 
3. Use novelty threshold on uMelt validation dataset
    1. Compare to optimal threshold

# 1) 10x LOO

In [25]:
save_folder = './outputs/'

In [26]:
# labels = list(np.unique(df_all_alignedbyOrg_1_auc_norm.index.values))

# These are background subtracted aligned to 0
# all_dfs = pd.read_csv('./Data/all_dfs.csv', index_col=0).iloc[:, 240:]
# classifiers = RandF_classifiers
# classifiers_names = RandF_names
save = []
accumulate = []
roc_curves = []
test_size = []
fig_save = []
aucs = []

for z,df in enumerate([all_dfs, all_dfs_short]):
    for label in labels:
        label_time = time.time()
        print(label)

        df_LOO = df.drop(label)
        X_train, X_test, y_train, y_test = train_test_split(df_LOO, df_LOO.index, test_size=0.2, random_state=42)
        known_length = len(X_test)
        unknown_length = len(df.loc[label])
        test_size.append([label,known_length,unknown_length])

        X_test_known = X_test.copy()
        y_test_known = y_test.copy()
        X_test = X_test.append(df.loc[label])
        y_test = y_test.append(df.loc[label].index)
        print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

        standardizer = StandardScaler()
        standardizer.fit(X_train)
        X_train_std = standardizer.transform(X_train)
        X_test_std = standardizer.transform(X_test)
        X_test_std_known = standardizer.transform(X_test_known)

        X_train_std = np.nan_to_num(X_train_std)
        X_test_std = np.nan_to_num(X_test_std)


        for clf, clf_name in zip(classifiers,classifiers_names): # Classifier loop
            
            # if z in [2,3]:
            #     clf = CalibratedClassifierCV(base_estimator=clf, method='sigmoid') 
            # if z in [4,5]:
            #     clf = CalibratedClassifierCV(base_estimator=clf, method='isotonic')

            start_time = time.time()
            model_fit = clf.fit(X_train_std, y_train)
            fit_time = time.time()-start_time

            start_time = time.time()
            pred=model_fit.predict(X_test_std)
            predict_time = time.time()-start_time

            df_out = pd.DataFrame(data=None, index=X_test.index)
            df_out['Prediction'] = pred
            prob = model_fit.predict_proba(X_test_std)
            df_out['Probability'] = np.max(prob, axis=1)
            df_out['Entropy']= entropy(prob.T)
            df_out['Correct'] = (df_out.index == df_out['Prediction'])
            accuracy = sum(df_out['Correct'])/known_length

            df_out['Known'] = False
            df_out['Known'].iloc[:known_length] = True

            aucs.append(
                roc_auc_score(
                    y_test_known,
                    model_fit.predict_proba(X_test_std_known),
                    multi_class='ovr'
                )
            )

            # Only correctly classified ones
            df_out2 = df_out.loc[df_out['Correct'] == True].append(df_out.iloc[known_length:])

            fpr1, tpr1, thresholds1, auc1, threshold1 = get_roc(df_out['Known'].values, df_out['Probability'].values)
            fpr2, tpr2, thresholds2, auc2, threshold2 = get_roc(df_out['Known'].values, -df_out['Entropy'].values)
            fpr3, tpr3, thresholds3, auc3, threshold3 = get_roc(df_out2['Known'].values, df_out2['Probability'].values)
            fpr4, tpr4, thresholds4, auc4, threshold4 = get_roc(df_out2['Known'].values, -df_out2['Entropy'].values)

            # Save data (updated)
            save.append([label, clf_name, z, fit_time, predict_time, accuracy, auc1, auc2, auc3, auc4])

            accumulate.append([label, clf_name, z, 'prob_all', df_out['Known'].values, df_out['Probability'].values, compute_sample_weight(class_weight='balanced', y=df_out['Known'])])
            accumulate.append([label, clf_name, z, 'entr_all', df_out['Known'].values, -df_out['Entropy'].values, compute_sample_weight(class_weight='balanced', y=df_out['Known'])])
            accumulate.append([label, clf_name, z, 'prob_correct', df_out2['Known'].values, df_out2['Probability'].values, compute_sample_weight(class_weight='balanced', y=df_out2['Known'])])
            accumulate.append([label, clf_name, z, 'entr_correct', df_out2['Known'].values, -df_out2['Entropy'].values, compute_sample_weight(class_weight='balanced', y=df_out2['Known'])])

            roc_curves.append([label, clf_name, z, 'prob_all', fpr1, tpr1, thresholds1, auc1, threshold1])
            roc_curves.append([label, clf_name, z, 'entr_all', fpr2, tpr2, thresholds2, auc2, -threshold2])
            roc_curves.append([label, clf_name, z, 'prob_correct', fpr3, tpr3, thresholds3, auc3, threshold3])
            roc_curves.append([label, clf_name, z, 'entr_correct', fpr4, tpr4, thresholds4, auc4, -threshold4])       

            fig_save.append([label, clf_name, z, 'prob_all', df_out.index, df_out['Probability'], auc1, threshold1])
            fig_save.append([label, clf_name, z, 'entr_all', df_out.index, df_out['Entropy'], auc2, -threshold2])
            fig_save.append([label, clf_name, z, 'prob_correct', df_out2.index, df_out2['Probability'], auc3, threshold3])
            fig_save.append([label, clf_name, z, 'entr_correct', df_out2.index, df_out2['Entropy'], auc4, -threshold4])

        print(time.time()-label_time)

       
# Save all data as pickle and csv's
results = pd.DataFrame(save)
results.columns = ("species", "method", "short","fit_time", "predict_time", "accuracy", "roc_auc prob", "roc_auc entropy", "roc_auc3", "roc_auc4")
results.to_csv(save_folder+'results.csv')

df_accumulate = pd.DataFrame(accumulate)
df_accumulate.columns = ("species", "method", "short", 'novelty_method','Known','Probability', 'weights')
df_accumulate.to_pickle(save_folder+'accumulate.pkl')

df_roc_curves = pd.DataFrame(roc_curves)
df_roc_curves.columns = ("species", "method", "short", 'novelty_method', 'fpr', 'tpr', 'thresholds', 'auc', 'threshold')
df_roc_curves.to_pickle(save_folder+'roc_curves.pkl')

df_fig_save = pd.DataFrame(fig_save)
df_fig_save.columns = ("species", "method", "short",'novelty_method', 'index', 'score', 'auc', 'threshold')
df_fig_save.to_pickle(save_folder+'fig_save.pkl')

S. agalactiae
(12825, 728) (5456, 728) (12825,) (5456,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

410.11573481559753
S. gallolyticus
(13020, 728) (5261, 728) (13020,) (5261,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

363.7476828098297
C. koseri
(12983, 728) (5298, 728) (12983,) (5298,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

267.38973569869995
L. monocytogenes
(12829, 728) (5452, 728) (12829,) (5452,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

264.90873980522156
H. influenzae
(12812, 728) (5469, 728) (12812,) (5469,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

254.69951581954956
S. sanguinis
(12863, 728) (5418, 728) (12863,) (5418,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

258.7691447734833
S. aureus
(13002, 728) (5279, 728) (13002,) (5279,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

255.36034893989563
S. pneumoniae
(13620, 728) (4661, 728) (13620,) (4661,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

280.77542781829834
E. faecium
(13715, 728) (4566, 728) (13715,) (4566,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

302.646821975708
E. coli
(13950, 728) (4331, 728) (13950,) (4331,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

265.8084809780121
S. agalactiae
(12825, 308) (5456, 308) (12825,) (5456,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

132.72303652763367
S. gallolyticus
(13020, 308) (5261, 308) (13020,) (5261,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

144.87520051002502
C. koseri
(12983, 308) (5298, 308) (12983,) (5298,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

129.23128175735474
L. monocytogenes
(12829, 308) (5452, 308) (12829,) (5452,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

132.3588764667511
H. influenzae
(12812, 308) (5469, 308) (12812,) (5469,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

127.68092274665833
S. sanguinis
(12863, 308) (5418, 308) (12863,) (5418,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

183.68819904327393
S. aureus
(13002, 308) (5279, 308) (13002,) (5279,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

188.75330567359924
S. pneumoniae
(13620, 308) (4661, 308) (13620,) (4661,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

224.40179562568665
E. faecium
(13715, 308) (4566, 308) (13715,) (4566,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

201.52129244804382
E. coli
(13950, 308) (4331, 308) (13950,) (4331,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

264.2058012485504


In [28]:

save = []
accumulate = []
roc_curves = []
test_size = []
fig_save = []
aucs = []
save_folder = './outputs_no_ecoli/'
for z,df in enumerate([all_dfs.drop('E. coli'), all_dfs_short.drop('E. coli')]):
    for label in labels[:-1]:
        label_time = time.time()
        print(label)

        df_LOO = df.drop(label)
        X_train, X_test, y_train, y_test = train_test_split(df_LOO, df_LOO.index, test_size=0.2, random_state=42)
        known_length = len(X_test)
        unknown_length = len(df.loc[label])
        test_size.append([label,known_length,unknown_length])

        X_test_known = X_test.copy()
        y_test_known = y_test.copy()
        X_test = X_test.append(df.loc[label])
        y_test = y_test.append(df.loc[label].index)
        print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

        standardizer = StandardScaler()
        standardizer.fit(X_train)
        X_train_std = standardizer.transform(X_train)
        X_test_std = standardizer.transform(X_test)
        X_test_std_known = standardizer.transform(X_test_known)

        X_train_std = np.nan_to_num(X_train_std)
        X_test_std = np.nan_to_num(X_test_std)

        # X_train_std = X_train.copy()
        # X_test_std = X_test.copy()
        # X_train_std = X_train_std.copy()
        # X_test_std = X_test_std.copy()

        for clf, clf_name in zip(classifiers,classifiers_names): # Classifier loop
            
            # if z in [2,3]:
            #     clf = CalibratedClassifierCV(base_estimator=clf, method='sigmoid') 
            # if z in [4,5]:
            #     clf = CalibratedClassifierCV(base_estimator=clf, method='isotonic')

            start_time = time.time()
            model_fit = clf.fit(X_train_std, y_train)
            fit_time = time.time()-start_time

            start_time = time.time()
            pred=model_fit.predict(X_test_std)
            predict_time = time.time()-start_time

            df_out = pd.DataFrame(data=None, index=X_test.index)
            df_out['Prediction'] = pred
            prob = model_fit.predict_proba(X_test_std)
            df_out['Probability'] = np.max(prob, axis=1)
            df_out['Entropy']= entropy(prob.T)
            df_out['Correct'] = (df_out.index == df_out['Prediction'])
            accuracy = sum(df_out['Correct'])/known_length

            df_out['Known'] = False
            df_out['Known'].iloc[:known_length] = True

            aucs.append(
                roc_auc_score(
                    y_test_known,
                    model_fit.predict_proba(X_test_std_known),
                    multi_class='ovr'
                )
            )

            # Only correctly classified ones
            df_out2 = df_out.loc[df_out['Correct'] == True].append(df_out.iloc[known_length:])

            fpr1, tpr1, thresholds1, auc1, threshold1 = get_roc(df_out['Known'].values, df_out['Probability'].values)
            fpr2, tpr2, thresholds2, auc2, threshold2 = get_roc(df_out['Known'].values, -df_out['Entropy'].values)
            fpr3, tpr3, thresholds3, auc3, threshold3 = get_roc(df_out2['Known'].values, df_out2['Probability'].values)
            fpr4, tpr4, thresholds4, auc4, threshold4 = get_roc(df_out2['Known'].values, -df_out2['Entropy'].values)

            # Save data (updated)
            save.append([label, clf_name, z, fit_time, predict_time, accuracy, auc1, auc2, auc3, auc4])

            accumulate.append([label, clf_name, z, 'prob_all', df_out['Known'].values, df_out['Probability'].values, compute_sample_weight(class_weight='balanced', y=df_out['Known'])])
            accumulate.append([label, clf_name, z, 'entr_all', df_out['Known'].values, -df_out['Entropy'].values, compute_sample_weight(class_weight='balanced', y=df_out['Known'])])
            accumulate.append([label, clf_name, z, 'prob_correct', df_out2['Known'].values, df_out2['Probability'].values, compute_sample_weight(class_weight='balanced', y=df_out2['Known'])])
            accumulate.append([label, clf_name, z, 'entr_correct', df_out2['Known'].values, -df_out2['Entropy'].values, compute_sample_weight(class_weight='balanced', y=df_out2['Known'])])

            roc_curves.append([label, clf_name, z, 'prob_all', fpr1, tpr1, thresholds1, auc1, threshold1])
            roc_curves.append([label, clf_name, z, 'entr_all', fpr2, tpr2, thresholds2, auc2, -threshold2])
            roc_curves.append([label, clf_name, z, 'prob_correct', fpr3, tpr3, thresholds3, auc3, threshold3])
            roc_curves.append([label, clf_name, z, 'entr_correct', fpr4, tpr4, thresholds4, auc4, -threshold4])       

            fig_save.append([label, clf_name, z, 'prob_all', df_out.index, df_out['Probability'], auc1, threshold1])
            fig_save.append([label, clf_name, z, 'entr_all', df_out.index, df_out['Entropy'], auc2, -threshold2])
            fig_save.append([label, clf_name, z, 'prob_correct', df_out2.index, df_out2['Probability'], auc3, threshold3])
            fig_save.append([label, clf_name, z, 'entr_correct', df_out2.index, df_out2['Entropy'], auc4, -threshold4])

        print(time.time()-label_time)

       
# Save all data as pickle and csv's
results = pd.DataFrame(save)
results.columns = ("species", "method", "short","fit_time", "predict_time", "accuracy", "roc_auc prob", "roc_auc entropy", "roc_auc3", "roc_auc4")
results.to_csv(save_folder+'results.csv')

# df_summary=pd.DataFrame()
# for i,method in enumerate(classifiers_names):
#     df_summary=df_summary.append(results[results['method']==method].mean(),ignore_index=True)
# df_summary.index = classifiers_names
# df_summary.to_csv(save_folder+'results_summary.csv')

df_accumulate = pd.DataFrame(accumulate)
df_accumulate.columns = ("species", "method", "short", 'novelty_method','Known','Probability', 'weights')
df_accumulate.to_pickle(save_folder+'accumulate.pkl')

df_roc_curves = pd.DataFrame(roc_curves)
df_roc_curves.columns = ("species", "method", "short", 'novelty_method', 'fpr', 'tpr', 'thresholds', 'auc', 'threshold')
df_roc_curves.to_pickle(save_folder+'roc_curves.pkl')

df_fig_save = pd.DataFrame(fig_save)
df_fig_save.columns = ("species", "method", "short",'novelty_method', 'index', 'score', 'auc', 'threshold')
df_fig_save.to_pickle(save_folder+'fig_save.pkl')

S. agalactiae
(12151, 728) (5287, 728) (12151,) (5287,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

224.35124826431274
S. gallolyticus
(12345, 728) (5093, 728) (12345,) (5093,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

219.9159038066864
C. koseri
(12308, 728) (5130, 728) (12308,) (5130,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

203.77691507339478
L. monocytogenes
(12155, 728) (5283, 728) (12155,) (5283,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

199.45091247558594
H. influenzae
(12138, 728) (5300, 728) (12138,) (5300,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

206.49297523498535
S. sanguinis
(12188, 728) (5250, 728) (12188,) (5250,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

199.72303414344788
S. aureus
(12328, 728) (5110, 728) (12328,) (5110,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

204.0590763092041
S. pneumoniae
(12946, 728) (4492, 728) (12946,) (4492,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

250.84927654266357
E. faecium
(13040, 728) (4398, 728) (13040,) (4398,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

413.68978357315063
S. agalactiae
(12151, 308) (5287, 308) (12151,) (5287,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

234.78575825691223
S. gallolyticus
(12345, 308) (5093, 308) (12345,) (5093,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

315.152672290802
C. koseri
(12308, 308) (5130, 308) (12308,) (5130,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

302.5925934314728
L. monocytogenes
(12155, 308) (5283, 308) (12155,) (5283,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

283.2656979560852
H. influenzae
(12138, 308) (5300, 308) (12138,) (5300,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

265.9683458805084
S. sanguinis
(12188, 308) (5250, 308) (12188,) (5250,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

124.7354679107666
S. aureus
(12328, 308) (5110, 308) (12328,) (5110,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

112.85595083236694
S. pneumoniae
(12946, 308) (4492, 308) (12946,) (4492,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

114.66013956069946
E. faecium
(13040, 308) (4398, 308) (13040,) (4398,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

113.17212557792664


# 3) uMelts for extra validation

In [7]:
df_50_norm_noisy = pd.read_csv('./Data/df_50_norm_noisy(noise_models).csv', index_col=0)

In [9]:
# df_all_alignedbyOrg_1_auc_norm.columns = df_all_alignedbyOrg_1_auc_norm.columns.map(float)
df_50_norm_noisy.columns = df_50_norm_noisy.columns.map(float)
# df_50_norm.columns = df_50_norm.columns.map(float)
# all_dfs.columns = all_dfs.columns.map(float)

In [17]:
df_50_norm_noisy = pd.read_csv('./Data/df_50_norm_noisy(noise_models).csv', index_col=0)
df = torch.from_numpy(df_50_norm_noisy.values)
short_df = df[:, 240:]

# Information-rich portion of melt curve
short_time_series = (
    short_df
    .reshape(short_df.shape[0], short_df.shape[1], 1)
)

melt_sigs = []

for i in range(0, short_time_series.shape[1], BATCH):
    ts = short_time_series[:, i:i+20, :]
    ts = add_time(ts)
    winsig = signatory.signature(
        ts, 
        3,
        basepoint=True
    )
    melt_sigs.append(winsig)

melt_sigs = torch.cat((melt_sigs), axis=1)
df_50_norm_noisy_short = pd.DataFrame(data=melt_sigs.numpy(), index=df_50_norm_noisy.index)


# Full melt curve
melt_sigs = []
time_series = (
    df
    .reshape(df.shape[0], df.shape[1], 1)
)

for i in range(0, time_series.shape[1], BATCH):
    ts = time_series[:, i:i+20, :]
    ts = add_time(ts)
    winsig = signatory.signature(
        ts, 
        3,
        basepoint=True
    )
    melt_sigs.append(winsig)

melt_sigs = torch.cat((melt_sigs), axis=1)
df_50_norm_noisy = pd.DataFrame(data=melt_sigs.numpy(), index=df_50_norm_noisy.index)

In [19]:
save_folder = './outputs/'

In [20]:
settings = 0
plot=False
for i in [0, 1, 0, 1, 0, 1]:
    if i == 0:
        df = all_dfs
        noisy_df = df_50_norm_noisy
    else:
        df = all_dfs_short
        noisy_df = df_50_norm_noisy_short
    # X_train, X_test, y_train, y_test = train_test_split(all_dfs.drop('E. coli').iloc[:,i:], all_dfs.drop('E. coli').index, test_size=0.2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(df, df.index, test_size=0.2, random_state=42)

    known_length = len(X_test)
    print(X_test.shape)
    print(noisy_df.iloc[:,i:].shape)
    X_test = X_test.append(noisy_df.iloc[:,i:])
    y_test = y_test.append(noisy_df.index)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    standardizer = StandardScaler()
    standardizer.fit(X_train)
    X_train_std = standardizer.transform(X_train)
    X_test_std = standardizer.transform(X_test)
    X_train_std = np.nan_to_num(X_train_std)
    X_test_std = np.nan_to_num(X_test_std)

    roc_curves = []
    fig_save = []
    for clf, clf_name in zip(classifiers,classifiers_names):
        
        if settings in [2,3]:
            clf = CalibratedClassifierCV(base_estimator=clf, method='sigmoid') 
        if settings in [4,5]:
            clf = CalibratedClassifierCV(base_estimator=clf, method='isotonic')

        model_fit = clf.fit(X_train_std, y_train)

        df_out = pd.DataFrame(data=None, index=X_test.index)
        pred=model_fit.predict(X_test_std)
        df_out['Prediction'] = pred
        prob = model_fit.predict_proba(X_test_std)
        df_out['Probability'] = np.max(prob, axis=1)
        df_out['Entropy']= entropy(prob.T)
        df_out['Correct'] = (df_out.index == df_out['Prediction'])

        df_out['Known'] = False
        df_out['Known'].iloc[:known_length] = True

        accuracy = sum(df_out['Correct'])/(len(df_out)-50)

        # Get ROC curve and AUC values
        fpr1, tpr1, thresholds1, auc1, threshold1 = get_roc(df_out['Known'].values, df_out['Probability'].values)
        fpr2, tpr2, thresholds2, auc2, threshold2 = get_roc(df_out['Known'].values, -df_out['Entropy'].values)

        if(plot):
            plt.figure(figsize=(16,7))
            ax = sns.boxplot(x=df_out.index, y=df_out['Probability'], dodge=False, showfliers=False)

            plt.ylim(0.0, 1.1)
            plt.legend(loc='upper left')
            plt.ylabel('Probability', fontsize=12)
            plt.xticks((np.arange(60)), rotation='vertical')
            ax.xaxis.label.set_visible(False)
            plt.title('%s, Classification accuracy: %.2f %%' %(clf_name, accuracy*100))
            plt.show()

        roc_curves.append([clf_name, 'prob_all', fpr1, tpr1, thresholds1, auc1, threshold1])
        roc_curves.append([clf_name, 'entr_all', fpr2, tpr2, thresholds2, auc2, -threshold2])
        fig_save.append([clf_name, 'prob_all', df_out.index, df_out['Probability'], auc1, threshold1, accuracy])
        fig_save.append([clf_name, 'entropy_all', df_out.index, df_out['Entropy'], auc2, -threshold2, accuracy])


    df_fig_save = pd.DataFrame(fig_save)
    df_fig_save.columns = ('method','novelty_method', 'index', 'score','auc','threshold','accuracy')
    df_fig_save.to_pickle(save_folder+'uMelt_fig_save'+str(settings)+'.pkl')

    df_roc_curves = pd.DataFrame(roc_curves)
    df_roc_curves.columns = ('method', 'novelty_method', 'fpr', 'tpr', 'thresholds', 'auc', 'threshold')
    df_roc_curves.to_pickle(save_folder+'uMelt_roc_curves'+str(settings)+'.pkl')
    
    settings += 1

(3657, 728)
(5000, 728)
(14624, 728) (8657, 728) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 308)
(5000, 307)
(14624, 308) (8657, 308) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 728)
(5000, 728)
(14624, 728) (8657, 728) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 308)
(5000, 307)
(14624, 308) (8657, 308) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 728)
(5000, 728)
(14624, 728) (8657, 728) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 308)
(5000, 307)
(14624, 308) (8657, 308) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

In [21]:
save_folder = './outputs_no_ecoli/'

In [22]:
settings = 0
plot=False
for i in [0, 1, 0, 1, 0, 1]:
    if i == 0:
        df = all_dfs
        noisy_df = df_50_norm_noisy
    else:
        df = all_dfs_short
        noisy_df = df_50_norm_noisy_short
    # X_train, X_test, y_train, y_test = train_test_split(all_dfs.drop('E. coli').iloc[:,i:], all_dfs.drop('E. coli').index, test_size=0.2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(df, df.index, test_size=0.2, random_state=42)

    known_length = len(X_test)
    print(X_test.shape)
    print(noisy_df.iloc[:,i:].shape)
    X_test = X_test.append(noisy_df.iloc[:,i:])
    y_test = y_test.append(noisy_df.index)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    standardizer = StandardScaler()
    standardizer.fit(X_train)
    X_train_std = standardizer.transform(X_train)
    X_test_std = standardizer.transform(X_test)
    X_train_std = np.nan_to_num(X_train_std)
    X_test_std = np.nan_to_num(X_test_std)

    roc_curves = []
    fig_save = []
    for clf, clf_name in zip(classifiers,classifiers_names):
        
        if settings in [2,3]:
            clf = CalibratedClassifierCV(base_estimator=clf, method='sigmoid') 
        if settings in [4,5]:
            clf = CalibratedClassifierCV(base_estimator=clf, method='isotonic')

        model_fit = clf.fit(X_train_std, y_train)

        df_out = pd.DataFrame(data=None, index=X_test.index)
        pred=model_fit.predict(X_test_std)
        df_out['Prediction'] = pred
        prob = model_fit.predict_proba(X_test_std)
        df_out['Probability'] = np.max(prob, axis=1)
        df_out['Entropy']= entropy(prob.T)
        df_out['Correct'] = (df_out.index == df_out['Prediction'])

        df_out['Known'] = False
        df_out['Known'].iloc[:known_length] = True

        accuracy = sum(df_out['Correct'])/(len(df_out)-50)

        # Get ROC curve and AUC values
        fpr1, tpr1, thresholds1, auc1, threshold1 = get_roc(df_out['Known'].values, df_out['Probability'].values)
        fpr2, tpr2, thresholds2, auc2, threshold2 = get_roc(df_out['Known'].values, -df_out['Entropy'].values)

        if(plot):
            plt.figure(figsize=(16,7))
            ax = sns.boxplot(x=df_out.index, y=df_out['Probability'], dodge=False, showfliers=False)

            plt.ylim(0.0, 1.1)
            plt.legend(loc='upper left')
            plt.ylabel('Probability', fontsize=12)
            plt.xticks((np.arange(60)), rotation='vertical')
            ax.xaxis.label.set_visible(False)
            plt.title('%s, Classification accuracy: %.2f %%' %(clf_name, accuracy*100))
            plt.show()

        roc_curves.append([clf_name, 'prob_all', fpr1, tpr1, thresholds1, auc1, threshold1])
        roc_curves.append([clf_name, 'entr_all', fpr2, tpr2, thresholds2, auc2, -threshold2])
        fig_save.append([clf_name, 'prob_all', df_out.index, df_out['Probability'], auc1, threshold1, accuracy])
        fig_save.append([clf_name, 'entropy_all', df_out.index, df_out['Entropy'], auc2, -threshold2, accuracy])


    df_fig_save = pd.DataFrame(fig_save)
    df_fig_save.columns = ('method','novelty_method', 'index', 'score','auc','threshold','accuracy')
    df_fig_save.to_pickle(save_folder+'uMelt_fig_save'+str(settings)+'.pkl')

    df_roc_curves = pd.DataFrame(roc_curves)
    df_roc_curves.columns = ('method', 'novelty_method', 'fpr', 'tpr', 'thresholds', 'auc', 'threshold')
    df_roc_curves.to_pickle(save_folder+'uMelt_roc_curves'+str(settings)+'.pkl')
    
    settings += 1

(3657, 728)
(5000, 728)
(14624, 728) (8657, 728) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 308)
(5000, 307)
(14624, 308) (8657, 308) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 728)
(5000, 728)
(14624, 728) (8657, 728) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 308)
(5000, 307)
(14624, 308) (8657, 308) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 728)
(5000, 728)
(14624, 728) (8657, 728) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

(3657, 308)
(5000, 307)
(14624, 308) (8657, 308) (14624,) (8657,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_