In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, FastICA, KernelPCA
from sklearn.utils import shuffle
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from IPython.display import display, clear_output
from  itertools import combinations
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.io import loadmat
from sklearn.model_selection import cross_val_score
from collections import defaultdict
from sklearn.metrics import roc_auc_score, accuracy_score
from jupyterthemes import jtplot


In [None]:
AWAKE = "awake"
ASLEEP = "asleep"

def get_state(name):
    if "awake" in name.lower():
        return AWAKE
    else:
        return ASLEEP

In [None]:
def get_subject(name):
    lower = name.lower()
    ind = max(lower.find("awake"), lower.find("asleep"))
    return name[:ind]

In [None]:
def get_channel_names(powers, channels):
    res_cols = []
    rows = []
    cols = []
    for power, channel in zip(powers, channels):
        if channel == "all":
            rows += [i for i in range(9)]
            cols += [power]*9
            res_cols += [power_to_string[power]+" "+str(c+1) for c in range(9)]
        else:
            rows += channel
            cols += [power]*len(channel)
            res_cols += [power_to_string[power]+" "+str(c+1) for c in channel]
    return rows, cols, res_cols

In [None]:
delta = 0
theta = 1
alpha = 2
beta = 3
low_gamma = 4
high_gamma = 5

power_to_string = {
    delta : "delta",
    theta : "theta",
    alpha  :"alpha",
    beta  :"beta",
    low_gamma : "low_gamma",
    high_gamma  :"high_gamma"
    }

def get_eeg_ratios(df, powers, channels):    
    cols = df.columns
    inds = df.index
    sudep_eeg_ratios = {}
    c1_eeg_ratios = {}
    c2_eeg_ratios = {}
    rows, cols, res_cols = get_channel_names(powers, channels)
    for subject in inds:
        if df['asleep'][subject].shape != (1, 0) and df['awake'][subject].shape != (1, 0):
            asleep = df['asleep'][subject][rows, cols].flatten()
            awake = df['awake'][subject][rows, cols].flatten()
            sudep_eeg_ratios[subject] = asleep/awake
            
        if df['C1_asleep'][subject].shape != (1, 0) and df['C1_awake'][subject].shape != (1, 0):
            asleep = df['C1_asleep'][subject][rows, cols].flatten()
            awake = df['C1_awake'][subject][rows, cols].flatten()
            c1_eeg_ratios[subject+"C1"] = asleep/awake
            
        if df['C2_asleep'][subject].shape != (1, 0) and df['C2_awake'][subject].shape != (1, 0):
            asleep = df['C2_asleep'][subject][rows, cols].flatten()
            awake = df['C2_awake'][subject][rows, cols].flatten()
            c2_eeg_ratios[subject+"C2"] = asleep/awake
            
    return pd.DataFrame(sudep_eeg_ratios, index=res_cols).transpose(), \
           pd.DataFrame(c1_eeg_ratios, index=res_cols).transpose(), \
           pd.DataFrame(c2_eeg_ratios, index=res_cols).transpose()

In [None]:
def get_eeg_ratios2(df, powers, channels):    
    cols = df.columns
    inds = df.index
    sudep_eeg_ratios = {}
    c1_eeg_ratios = {}
    c2_eeg_ratios = {}
    rows, cols, res_cols = get_channel_names(powers, channels)
    for subject in inds:
        if df['asleep'][subject].shape != (1, 0):
            sudep_eeg_ratios[subject] = df['asleep'][subject][rows, cols].flatten()
            
        if df['C1_asleep'][subject].shape != (1, 0):
            c1_eeg_ratios[subject+"C1"] = df['C1_asleep'][subject][rows, cols].flatten()
            
        if df['C2_asleep'][subject].shape != (1, 0):
            c2_eeg_ratios[subject+"C2"] = df['C2_asleep'][subject][rows, cols].flatten()
            
            
    return pd.DataFrame(sudep_eeg_ratios, index=res_cols).transpose(), \
           pd.DataFrame(c1_eeg_ratios, index=res_cols).transpose(), \
           pd.DataFrame(c2_eeg_ratios, index=res_cols).transpose()

In [None]:
def get_ecg_ratios(df, add = ""):
    inds = df.index
    d = {}
    ratios = {}
    res = []
    for i in range(inds.shape[0]):
        name = get_subject(inds[i])
        state = get_state(inds[i])
        group = experimental_group(name)
        
        if name in d:
            if state == AWAKE:
                ratios[group+add] = d[name]/df.iloc[i, :]
            else:
                ratios[group+add] = df.iloc[i, :]/d[name]
        else:
            d[name]=df.iloc[i, :]
    return pd.DataFrame(ratios, index=df.columns).transpose()

In [None]:
def get_ecg_ratios2(df, add = ""):
    inds = df.index
    d = {}
    ratios = {}
    res = []
    for i in range(inds.shape[0]):
        name = get_subject(inds[i])[:-1]
        state = get_state(inds[i])
        
        if state == ASLEEP:
            ratios[name] = df.iloc[i,:]
    pd.DataFrame(ratios, index=df.columns).transpose().shape
    return pd.DataFrame(ratios, index=df.columns).transpose()

In [None]:
def experimental_group(subject):   
    if "awake" in subject.lower():
        subject = subject[0:subject.lower().rfind("awake")]
    else:
        subject = subject[0:subject.lower().rfind("asleep")]
        
    if "C" in subject[1:]:
        subject = subject[:subject.rfind("C")]
    subject = subject.strip()

    return subject

In [None]:
def get_excluded(dfs):
    d = {}
    res = []
    for df in dfs:
        ind = df.index
        for i in ind:
            name = experimental_group(i)
            if name not in d:
                d[name] = [i]
            else:
                d[name].append(i)
    for k, v in d.items():
        sudep = False
        control = False
        for subject in v:
            if 'C' in subject[1:]:
                control = True
            else:
                sudep = True
        if not sudep or not control:
            res.append(k)
            
    return res

In [None]:
def plot_pca(X, Y, pca=PCA(n_components=2)):
    clf = make_pipeline(StandardScaler(),pca)
    X = clf.fit_transform(X)
    
    plt.scatter(X[Y==0, 0], X[Y==0, 1], label='control', color='red')
    plt.scatter(X[Y==1, 0], X[Y==1, 1], label='SUDEP', color='blue')
    plt.legend()
    plt.show()
    
    return X, Y

In [None]:
def split(X, y, names, test_size=0.33):
    test_ind = int(y.shape[0]*test_size) // 2
    sudep_ind = np.where(y==1)[0]
    control_ind = np.where(y==0)[0]
    np.random.shuffle(sudep_ind)
    np.random.shuffle(control_ind)
    
    X_test = np.concatenate([X[sudep_ind[:test_ind]], X[control_ind[:test_ind]]])
    y_test = np.concatenate([y[sudep_ind[:test_ind]], y[control_ind[:test_ind]]])
    test_names = np.concatenate([names[sudep_ind[:test_ind]], names[control_ind[:test_ind]]])
    X_train = np.concatenate([X[sudep_ind[test_ind:]], X[control_ind[test_ind:]]])
    y_train = np.concatenate([y[sudep_ind[test_ind:]], y[control_ind[test_ind:]]])
    
    return X_train, X_test, y_train, y_test, test_names

In [None]:
def get_common(eeg, ecg):
    common = {}
    for ind in eeg.index:
        if ind in common:
            common[ind] += 1
        else:
            common[ind] = 1
            
    for ind in ecg.index:
        if ind in common:
            common[ind] += 1
        else:
            common[ind] = 1
            
    res = []
    for k, v in common.items():
        if v == 2:
            res.append(k)
    return res

In [None]:
def combine(sudep_eeg_ratios, c1_eeg_ratios, c2_eeg_ratios, sudep_ecg_ratios, c1_ecg_ratios, c2_ecg_ratios):
    def helper(df1, df2, common):
        res = []
        for point in common:
            left_data = df1.loc[point].values
            right_data = df2.loc[point].values
            res.append(np.concatenate([left_data, right_data]))
        return np.asarray(res)

#     if sudep_ecg_ratios.shape[1] == 0:
#         Y = np.concatenate([np.ones(len(sudep_eeg_ratios)), np.zeros(len(c1_eeg_ratios)+len(c2_eeg_ratios))])
#         index = list(sudep_eeg_ratios.index) + list(c1_eeg_ratios.index) + list(c2_eeg_ratios.index)
#         return pd.concat([sudep_eeg_ratios, c1_eeg_ratios, c2_eeg_ratios]), pd.DataFrame(Y, columns=['class'], index=index)
    sudep_common = get_common(sudep_eeg_ratios, sudep_ecg_ratios)
    c1_common = get_common(c1_eeg_ratios, c1_ecg_ratios)
    c2_common = get_common(c2_eeg_ratios, c2_ecg_ratios)
    
    X = helper(sudep_eeg_ratios, sudep_ecg_ratios, sudep_common)
    X = np.concatenate([X, helper(c1_eeg_ratios, c1_ecg_ratios, c1_common)])
    X = np.concatenate([X, helper(c2_eeg_ratios, c2_ecg_ratios, c2_common)])
    
    Y = np.concatenate([np.ones(len(sudep_common)), np.zeros(len(c1_common)+len(c2_common))])
    return pd.DataFrame(X, columns=sudep_eeg_ratios.columns.append(sudep_ecg_ratios.columns), \
                       index=sudep_common+c1_common+c2_common), pd.DataFrame(Y, columns=['class'], index=sudep_common+c1_common+c2_common)

In [None]:
def other_combine(X, Y, amps):
    common = list(set(X.index).intersection(amps.index))
    X = X.loc[common]
    Y = Y.loc[common]
    X[amps.columns] = amps.loc[common][amps.columns]
    return X, Y

In [None]:
def run(X_ratios, y_ratios, n_neighbors=0, clf = None, resample=True):
    X, y, names = shuffle(X_ratios.values, y_ratios, X_ratios.index)
    X_train, X_test, y_train, y_test, test_names = split(X, y, names, test_size=0.33)
    if resample:
        sm = Smote(distance='euclidian')
        X_generated  = sm.generate_synthetic_points(X_train[y_train==1], 200, n_neighbors)
        X_train = np.concatenate([X_train, X_generated])
        y_train = np.concatenate([y_train, np.ones(X_generated.shape[0])])

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    if clf == None:
        clf = RandomForestClassifier(n_estimators=35)

    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    pred = clf.predict(X_test)
    incorrect = np.where(pred != y_test)
    
    #feature_ranks = clf.coef_[0]

    #print(scores / times * 100)
    return score, np.asarray(np.ones(X_train.shape[1])), test_names[incorrect], test_names

In [None]:
def sampling(X, Y, trials=100, verbose=1, n_neighbors=5, clf = None):
    scores = []
    incorrect_dict = {}
    test_dict = {}
    for i in range(trials):
        if verbose:
            display(f"trial {i+1} of {trials}")
        score, ranks, incorrect, tested = run(X, Y, n_neighbors, resample=True, clf=clf)
        scores.append(score)
        for inc in incorrect:
            bob = str(inc)
            if bob in incorrect_dict:
                incorrect_dict[bob] += 1
            else:
                incorrect_dict[bob] = 1
        for test in tested:
            s = str(test)
            if s in test_dict:
                test_dict[s] += 1
            else:
                test_dict[s] = 1
        if verbose:
            clear_output(wait=True)
        
    scores = np.asarray(scores)
    if verbose:
        plt.hist(scores)
        over_50 = (scores > 0.5).sum()
        print("mean score", scores.mean(), "std score", scores.std())
        print(f"{over_50} out of {scores.shape[0]} ({over_50/scores.shape[0]}%) trials were over 50%")
    return scores, incorrect_dict, test_dict

In [None]:
def no_sampling(features, trials=100, verbose=1, clf=None):
    s_selected = sudep_ratios[features].copy()
    c1_selected = c1_ratios[features].copy()
    c2_selected = c2_ratios[features].copy()

    scores = []
    average_rank = np.zeros(len(features))
    trials = trials
    for i in range(trials):
        if verbose:
            display(f"trial {i+1} of {trials}")
        X_ratios = np.concatenate([s_selected, select_random_controls(c1_selected.values, c2_selected.values, s_selected.shape[0])])
        y_ratios = np.concatenate([np.ones(s_selected.shape[0]), np.zeros(c2_selected.shape[0])])

        score, ranks = run(X_ratios, y_ratios, resample=False, clf=clf)
        scores.append(score)
        average_rank = average_rank + ranks / trials
        if verbose:
            clear_output(wait=True)

    scores = np.asarray(scores)
    if verbose:
        plt.hist(scores)
        print("Feature ranks in descending order: ")
        feature_rankings = [x for _,x in sorted(zip(average_rank, s_selected.columns))]
        print(feature_rankings)
        over_50 = (scores > 0.5).sum()
        print("mean score", scores.mean(), "std score", scores.std())
        print(f"{over_50} out of {scores.shape[0]} ({over_50/scores.shape[0]}%) trials were over 50%")
    return scores

In [None]:
def display_incorrect(d, counts):
    dd = {}
    for k, v in counts.items():
        count = counts[k]
        group = k
        ind = 0
        if "C" in k[1:]:
            group = k[:-2]
            
        if group not in dd:
            dd[group] = [-1]*3
            
        if "C1" in k:
            ind = 1
        elif "C2" in k:
            ind = 2
        else:
            ind = 0
            
        if k in incorrect:
            dd[group][ind] = incorrect[k]/v
        else:
            dd[group][ind] = 0
    
    res = pd.DataFrame(dd, index=["SUDEP", "C1", "C2"]).transpose().sort_index()
    return res

In [None]:
def two_clf(X, Y, ecg_feats, eeg_feats, sample_size, clf1, clf2):
    def run_two_clf():
        X_shuffled, Y_shuffled, names = shuffle(X.values, Y, X.index)
        X_train, X_test, y_train, y_test, test_names = split(X_shuffled, Y_shuffled, names, test_size=0.33)
        
        sm = Smote(distance='euclidian')
        X_generated  = sm.generate_synthetic_points(X_train[y_train==1], 200, 5)
        X_train = np.concatenate([X_train, X_generated])
        y_train = np.concatenate([y_train, np.ones(X_generated.shape[0])])
        n = len(eeg_feats)
        ecg_train, eeg_train, ecg_test, eeg_test = X_train[:, n:], X_train[:, :n], X_test[:, n:], X_test[:, :n]
        ecg_scaler = StandardScaler()
        eeg_scaler = StandardScaler()
        ecg_train = ecg_scaler.fit_transform(ecg_train)
        ecg_test = ecg_scaler.transform(ecg_test)
        eeg_train = eeg_scaler.fit_transform(eeg_train)
        eeg_test = eeg_scaler.transform(eeg_test)
        
        clf1.fit(ecg_train, y_train)
        clf2.fit(eeg_train, y_train)
        
        ecg_pred = clf1.predict(ecg_test)
        eeg_pred = clf2.predict(eeg_test)
        
        res = np.logical_or(ecg_pred, eeg_pred) == y_test
        return res.sum()/res.shape[0]
    scores = []
    for i in range(sample_size):
        scores.append(run_two_clf())
    return np.asarray(scores).mean()

In [None]:
file_dir='.\\ekg_features\\'

drop = ['nni_50', 'pnni_50', 'nni_20', 'pnni_20', 'sampen']

sudep = pd.read_excel(file_dir+"SUDEP.xlsx", index_col=[0]).drop(drop, axis=1)
c1 = pd.read_excel(file_dir+"Control_1.xlsx", index_col=[0]).drop(drop, axis=1)
c2 = pd.read_excel(file_dir+"Control_2.xlsx", index_col=[0]).drop(drop, axis=1)

In [None]:
mean_p_area = loadmat('.\\mat_files\\mean_p_area.mat')['mean_p_area']
for i in range(1,  mean_p_area.shape[0]):
    mean_p_area[i][0] = mean_p_area[i][0][0]
for i in range(1,  mean_p_area.shape[1]):
    mean_p_area[0][i] = mean_p_area[0][i][0]
    
index = mean_p_area[1:, 0]
columns = mean_p_area[0, 1:]

mean_p_area = pd.DataFrame(mean_p_area[1:, 1:], columns=columns, index=index)
    
mean_p_all = loadmat('.\\mat_files\\mean_p_all.mat')['mean_p_all']
for i in range(1,  mean_p_all.shape[0]):
    mean_p_all[i][0] = mean_p_all[i][0][0]
for i in range(1,  mean_p_all.shape[1]):
    mean_p_all[0][i] = mean_p_all[0][i][0]
    
index = mean_p_all[1:, 0]
columns = mean_p_all[0, 1:]

mean_p_all = pd.DataFrame(mean_p_all[1:, 1:], columns=columns, index=index)

In [None]:
# S = get_ecg_ratios(sudep)
# C1 = get_ecg_ratios(c1, "C1")
# C2 = get_ecg_ratios(c2, "C2")

# for col in S.columns:
#     d = {
#         "SUDEP":S[col],
#         "C1": C1[col],
#         "C2": C2[col]
#     }
#     fig, ax = plt.subplots()
#     plt.title(col)
#     ax.boxplot(d.values(), showfliers=False)
#     ax.set_xticklabels(d.keys())
#     plt.show()

# EEG Features
delta(1-4Hz), theta(4-8Hz), alpha(8-12Hz), beta(12-30Hz), low-gamma(30-50Hz), high-gamma(50-100Hz)

# ECG Features
'mean_nni', 'sdnn', 'sdsd', 'rmssd', 'median_nni', 'range_nni', 'cvsd',
       'cvnni', 'mean_hr', 'max_hr', 'min_hr', 'std_hr', 'triangular_index',
       'lf', 'hf', 'lf_hf_ratio', 'lfnu', 'hfnu', 'total_power', 'vlf', 'csi',
       'cvi', 'Modified_csi', 'sd1', 'sd2', 'ratio_sd2_sd1'

In [None]:
EMU = {
'COL SUDEP 1' : 0.5,
'COL SUDEP 2' : 4,
'COL SUDEP 3' : 0.5,
'COL SUDEP 4' : 2,
'COL SUDEP 5' : 4,
'COL SUDEP 7' : 2,
'NYU SUDEP 1' : 2,
'NYU SUDEP 2' : 5,
'JH SUDEP1' : 4,
'AUSTIN S1' : 2,
'AUSTIN S12': 3,    
'AUSTIN S10' : 4,
'AUSTIN S11' : 3,
'AUSTIN S6' : 8,
'AUSTIN S9' : 10,
'RMH-SUDEP1' : 6,
'RMH-SUDEP11' : 2,
'RMH-SUDEP15' : 10,
'RMH-SUDEP6' : 7,
'RMH-SUDEP4' : 6,
'STV_SUDEP5' : 2,
'STV_SUDEP9' : 5,
'CIN SUDEP 1' : 3,
'CIN SUDEP 2' : 8,
'YALE-SUDEP1'  : 4,
'YALE-SUDEP2' : 6,
'YALE-SUDEP3' : 6,
'YALE-SUDEP4' : 5,
'YALE-SUDEP5' : 4
}

In [None]:
all_powers = [delta, theta, alpha, beta, low_gamma, high_gamma]
all_channels = ['all']*6
all_ecg = ['mean_nni', 'sdnn', 'sdsd', 'rmssd', 'median_nni', 'range_nni', 'cvsd', 'cvnni', 'mean_hr', 'max_hr', 'min_hr', 'std_hr', 'triangular_index', 'lf', 'hf', 'lf_hf_ratio', 'lfnu', 'hfnu', 'total_power', 'vlf', 'csi', 'cvi', 'Modified_csi', 'sd1', 'sd2', 'ratio_sd2_sd1']

In [None]:
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_classif, f_classif

In [None]:
powers = all_powers
channels = all_channels

_, _, eeg_feats = get_channel_names(powers, channels)
sudep_eeg_ratios, c1_eeg_ratios, c2_eeg_ratios = get_eeg_ratios(mean_p_area, powers, channels)

ecg_feats = all_ecg

sudep_ecg_ratios = get_ecg_ratios(sudep)[ecg_feats]
c1_ecg_ratios = get_ecg_ratios(c1, "C1")[ecg_feats]
c2_ecg_ratios = get_ecg_ratios(c2, "C2")[ecg_feats]

if False:
    all_outliers = get_excluded([sudep_ecg_ratios, c1_ecg_ratios, c2_ecg_ratios])
    print(all_outliers)
    
    for out in all_outliers:
        for i in sudep_ecg_ratios.index:
            if experimental_group(i) == out:
                sudep_ecg_ratios.drop(i, inplace=True)
        for i in c1_ecg_ratios.index:
            if experimental_group(i) == out:
                c1_ecg_ratios.drop(i, inplace=True)
        for i in c2_ecg_ratios.index:
            if experimental_group(i) == out:
                c2_ecg_ratios.drop(i, inplace=True)
X, Y = combine(sudep_eeg_ratios, c1_eeg_ratios, c2_eeg_ratios, sudep_ecg_ratios, c1_ecg_ratios, c2_ecg_ratios)
# X = pd.concat([sudep_eeg_ratios, c1_eeg_ratios, c2_eeg_ratios])
# Y = np.concatenate([np.ones(len(sudep_eeg_ratios)), np.zeros(len(c1_eeg_ratios)+len(c2_eeg_ratios))])
# Y = np.expand_dims(Y, axis=1)
index = X.index
columns = X.columns

if False:
    anova_filter = SelectKBest(f_classif, k=3)
    X = anova_filter.fit_transform(X, Y)
    selected = anova_filter.get_support(indices=True)
    X = pd.DataFrame(X, index=index, columns = [columns[c] for c in selected])
    print(f"{X.shape[1]} features selected out of {len(columns)}")
print(X.columns)
print(len(X), "subjects")
# powers = defaultdict(list)
# for c in X.columns:
#     powers[c[:-2]].append(int(c[-1])-1)
# print("powers = [" + ", ".join(list(powers.keys())) + "]")
# print("channels = [" + ", ".join([str(asd) for asd in list(powers.values())]) + "]")

In [None]:
import random
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import scipy.stats as st
import scipy

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

def pick_balanced(Y):
    s = []
    c = []
    Y = np.array(Y)
    for i in range(len(Y)):
        if Y[i] == 1:
            s.append(i)
        else:
            c.append(i)
            
    return s + random.sample(c, k=len(s))

from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

coefs = []
a = []
b = []
bob = []
info_arr = []
for i in range(1000):
    clear_output(wait=True)
    cross_fold_scores = [[], []]
    inds = pick_balanced(Y)
    x = X.iloc[inds].values
    y = Y.iloc[inds].values
    names = [X.index[i] for i in inds]
    info = {
        'names':[],
        'prediction':[],
        'actual':[]
    }
    for train_index, test_index in StratifiedKFold(5, shuffle=True).split(x, y):
        bob.append(test_index)
        clf = SVC(kernel='poly', degree=3, probability=True)
#         clf = RandomForestClassifier()
#         clf = LogisticRegression(max_iter=10000)
        x_train = x[train_index]
        x_test = x[test_index]
#         scaler = FastICA(n_components=10)
#         x_train = scaler.fit_transform(x_train)
#         x_test = scaler.transform(x_test)
        
#         scaler = MinMaxScaler()
#         x_train = scaler.fit_transform(x_train)
#         x_test = scaler.transform(x_test)
        
        clf.fit(x_train, y[train_index].ravel())
        Y_hat = clf.predict_proba(x_test)[:, 1]
        cross_fold_scores[0].append(roc_auc_score(y[test_index], Y_hat))
        cross_fold_scores[1].append(accuracy_score(y[test_index], Y_hat>0.5))
        info['names'].append([names[i] for i in test_index])
        info['prediction'].append(list(Y_hat))
        info['actual'].append(list(y[test_index, 0]))
#         coefs.append(clf.coef_)
    info_arr.append(info)
    a.append(np.mean(cross_fold_scores[0]))
    b.append(np.mean(cross_fold_scores[1]))
    print(i+1)
clear_output()
plt.hist(a)
a.sort()
print(a[249], a[749])
print("median:", np.median(a), np.median(b))
print("mean:", np.mean(a))
print("std:", np.std(a))
# with open("ROC/Final Paper Results/Table 1/(i)+(ii) LR v2.txt", "w'") as f:
#     print(info_arr, file=f)
# print(mean_confidence_interval(a))
# scores.append(np.median(a))
# stds.append(np.std(a))