In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay, mean_squared_error

import warnings

### Sampling iterations function

In [3]:
def log_reg_rep_samples(df, n_iter = 1000):
    df = df
    x = df.loc[:, df.columns != 'state'] #features
    y = df.loc[:, df.columns == 'state'] #supervisor

    eval_metrics = {} #empty dictionary to store metrics

    for i in range(n_iter): 

        x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.67)

        scaler = StandardScaler()
        scaler.fit(x_train)
        x_train = scaler.transform(x_train)
        x_test = scaler.transform(x_test)

        log_model = LogisticRegression(class_weight='balanced')
        log_model.fit(x_train, y_train.values.ravel())
        y_pred = log_model.predict(x_test)

        acc = classification_report(y_test, y_pred, output_dict=True)['accuracy']
        spec = classification_report(y_test, y_pred, output_dict=True)['0']['precision'] #specificity
        sens = classification_report(y_test, y_pred, output_dict=True)['0']['recall'] #sensitivity

        metrics = [acc, spec, sens] #store values in list
        eval_metrics[i]=list(metrics) #store list in dictionary

    eval_metrics = pd.DataFrame.from_dict(eval_metrics).T 
    eval_metrics.columns = ['accuracy', 'specificity', 'sensitivity']
    eval_metrics.head()

    means = eval_metrics.mean(axis=0) #mean of each column
    
    return means


Wavelet Packet / Wang, 4-3-02 data (wp4)

In [4]:
%store -r wp4 
log_reg_rep_samples(df = wp4, n_iter = 10000)

accuracy       0.861167
specificity    0.872762
sensitivity    0.850099
dtype: float64


Wavelet Packet / Wang, 8-7-02 data (wp8)

In [5]:
%store -r wp8 
log_reg_rep_samples(df = wp8, n_iter = 10000)

accuracy       0.960633
specificity    0.937398
sensitivity    0.955830
dtype: float64

Discrete Wavelet Transform, 4-3-02 data (dwt4)

In [6]:
%store -r dwt4
log_reg_rep_samples(df = dwt4, n_iter = 10000)

accuracy       0.880250
specificity    0.886321
sensitivity    0.875227
dtype: float64

Discrete Wavelet Transform, 8-7-02 data (dwt4)

In [7]:
%store -r dwt8
log_reg_rep_samples(df = dwt8, n_iter = 10000)

accuracy       0.955255
specificity    0.925950
sensitivity    0.953467
dtype: float64