In [1]:
from CNDE import Models
from CNDE import perform_CNDE, test_ensemble

import pandas as pd
import numpy as np

# Turn off user warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
path_train = 'df_train_sleep.csv'
path_test = 'df_test_sleep.csv'

c = 0.05

sleep1 = Models(path_train, k=3, contamination=c)
sleep1.instantiate_models()
sleep1 = perform_CNDE(sleep1)
sleep1 = test_ensemble(sleep1, path_test)

sleep2 = Models(path_train, k=3, contamination=c)
sleep2.instantiate_models()
sleep2 = perform_CNDE(sleep2)
sleep2 = test_ensemble(sleep2, path_test)

sleep3 = Models(path_train, k=5, contamination=c)
sleep3.instantiate_models()
sleep3 = perform_CNDE(sleep3)
sleep3 = test_ensemble(sleep3, path_test)

sleep4 = Models(path_train, k=5, contamination=c)
sleep4.instantiate_models()
sleep4 = perform_CNDE(sleep4)
sleep4 = test_ensemble(sleep4, path_test)


Training ensemble...
Training data point: 37/37
 --------------------- 
Updating weights
 ---------------------
Model IsolationForest performance: 34.0/37. Weight: 1 -> 0.9189189189189189
Model LocalOutlierFactor performance: 33.0/37. Weight: 1 -> 0.8918918918918919
Model OneClassSVM performance: 27.0/37. Weight: 1 -> 0.7297297297297297
Model EllipticEnvelope performance: 33.0/37. Weight: 1 -> 0.8918918918918919
Training complete. 
 ------------------
------------------ 

Training ensemble...y score: 22/22
Training data point: 37/37
 --------------------- 
Updating weights
 ---------------------
Model IsolationForest performance: 36.0/37. Weight: 1 -> 0.972972972972973
Model LocalOutlierFactor performance: 35.0/37. Weight: 1 -> 0.9459459459459459
Model OneClassSVM performance: 32.0/37. Weight: 1 -> 0.8648648648648649
Model EllipticEnvelope performance: 30.0/37. Weight: 1 -> 0.8108108108108107
Training complete. 
 ------------------
------------------ 

Training ensemble...y score: 22/2

In [3]:
import plotly.graph_objects as go
import numpy as np

scores_list = [sleep1.normality_scores, sleep2.normality_scores, sleep3.normality_scores, sleep4.normality_scores]
scores = np.zeros(len(scores_list[0]))
for score in scores_list:
    print(score)
    score = [s[0] for s in score]
    score = np.array(score)
    scores += score

scores = scores / len(scores_list)

# Normalize scores
scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))

path_train = 'Unscaled/df_train_sleep.csv'
path_test = 'Unscaled/df_test_sleep.csv'

df_unscaled_train = pd.read_csv(path_train)
df_unscaled_test = pd.read_csv(path_test)

# Concatenate train and test
df_unscaled_both = pd.concat([df_unscaled_train, df_unscaled_test], axis=0)
df_unscaled_both['normality_score'] = scores

[array([1.]), array([1.]), array([-0.48228346]), array([1.]), array([-0.49015748]), array([1.]), array([0.49015748]), array([1.]), array([-0.48228346]), array([1.]), array([1.]), array([1.]), array([-0.49015748]), array([0.02755906]), array([1.]), array([1.]), array([0.02755906]), array([1.]), array([1.]), array([0.02755906]), array([0.48228346]), array([1.]), array([1.]), array([0.49015748]), array([0.53740157]), array([1.]), array([0.02755906]), array([0.02755906]), array([1.]), array([0.53740157]), array([1.]), array([1.]), array([0.49015748]), array([0.49015748]), array([1.]), array([1.]), array([1.]), array([1.]), array([0.49015748]), array([0.53740157]), array([0.02755906]), array([0.53740157]), array([1.]), array([1.]), array([1.]), array([0.53740157]), array([0.02755906]), array([-0.49015748]), array([0.02755906]), array([0.53740157]), array([1.]), array([1.]), array([0.53740157]), array([0.02755906]), array([-0.49015748]), array([1.]), array([0.02755906]), array([-1.]), array(

In [4]:
# Get new indices
df_unscaled_both = df_unscaled_both.reset_index(drop=True)

In [5]:
import plotly.express as px

# Mean and std
mean = np.mean(scores)
std = np.std(scores)

#
# # Plot normality scores
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=list(range(len(scores))), y=scores, mode='markers', name='Normality Scores'))
# fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean for i in range(len(scores))], mode='lines', name='Mean'))
# fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - std for i in range(len(scores))], mode='lines', name='-Sigma'))
# fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - 2* std for i in range(len(scores))], mode='lines', name='-2 Sigma'))
# fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - 3* std for i in range(len(scores))], mode='lines', name='-3 Sigma'))

# fig.update_layout(title='Normality Scores', xaxis_title='Time', yaxis_title='Normality Score')

# fig.show()

# Plot normality scores from df_unscaled_both against index. Hover data for each point: all columns in df_unscaled_both
fig = px.scatter(df_unscaled_both, x=df_unscaled_both.index, y='normality_score', hover_data=df_unscaled_both.columns)
fig.update_layout(title='Normality Scores', xaxis_title='Time', yaxis_title='Normality Score')

# Include mean, -1 std, -2 std, -3 std
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean for i in range(len(scores))], mode='lines', name='Mean'))
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - std for i in range(len(scores))], mode='lines', name='-Sigma'))
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - 2* std for i in range(len(scores))], mode='lines', name='-2 Sigma'))
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - 3* std for i in range(len(scores))], mode='lines', name='-3 Sigma'))

fig.show()