In [10]:
from CNDE import Models
from CNDE import perform_CNDE, test_ensemble

import pandas as pd
import numpy as np

# Turn off user warnings
import warnings
warnings.filterwarnings('ignore')

path_train = 'df_train_sleep.csv'
path_test = 'df_test_sleep.csv'

c = 0.01

sleep1 = Models(path_train, k=3, contamination=c)
sleep1.instantiate_models()
sleep1 = perform_CNDE(sleep1)
sleep1 = test_ensemble(sleep1, path_test)

sleep2 = Models(path_train, k=3, contamination=c)
sleep2.instantiate_models()
sleep2 = perform_CNDE(sleep2)
sleep2 = test_ensemble(sleep2, path_test)

sleep3 = Models(path_train, k=5, contamination=c)
sleep3.instantiate_models()
sleep3 = perform_CNDE(sleep3)
sleep3 = test_ensemble(sleep3, path_test)

sleep4 = Models(path_train, k=5, contamination=c)
sleep4.instantiate_models()
sleep4 = perform_CNDE(sleep4)
sleep4 = test_ensemble(sleep4, path_test)


Training ensemble...
Training data point: 37/37
 --------------------- 
Updating weights
 ---------------------
Model IsolationForest performance: 34.0/37. Weight: 1 -> 0.9189189189189189
Model LocalOutlierFactor performance: 37.0/37. Weight: 1 -> 1.0
Model OneClassSVM performance: 26.0/37. Weight: 1 -> 0.7027027027027026
Model EllipticEnvelope performance: 37.0/37. Weight: 1 -> 1.0
Training complete. 
------------------
------------------ 

Model IsolationForest ECS: [0.85438529 0.94854753 0.19800737 0.72391718 0.24034724 0.6101739
 0.47047172 0.60179535 0.08218064 0.99590796 0.77661506 0.77581542
 0.29774517 0.22155599 0.64420582 0.58262038 0.64403085 0.88920253
 0.79957957 0.17264441 0.23036238 0.78747508 0.65535508 0.55703242
 0.57960704 0.6370928  0.65931422 0.         0.78437837 0.67235747
 0.74230685 0.90169553 0.66904276 0.71840194 0.96749266 1.
 0.89962909]
Model IsolationForest CICS: [0.85438529 0.94854753 0.19800737 0.72391718 0.24034724 0.6101739
 0.47047172 0.60179535 0.08

In [59]:
import plotly.graph_objects as go
import numpy as np

def compile_and_save(models, description):
    scores_list = [model.normality_scores for model in models]

    scores = np.zeros(len(scores_list[0]))
    for score in scores_list:
        score = np.array(score)
        scores += score

    scores = scores / len(scores_list)

    # Normalize scores
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))

    path_train = 'Unscaled/df_train_sleep.csv'
    path_test = 'Unscaled/df_test_sleep.csv'

    df_unscaled_train = pd.read_csv(path_train)
    train_scores = scores[:len(df_unscaled_train)]
    df_unscaled_test = pd.read_csv(path_test)

    # Concatenate train and test
    df_unscaled_both = pd.concat([df_unscaled_train, df_unscaled_test], axis=0)
    df_unscaled_both['normality_score'] = scores

    # Get new indices
    df_unscaled_both = df_unscaled_both.reset_index(drop=True)
    df_unscaled_both['Start time'] = pd.to_datetime(df_unscaled_both['Start time'])
    df_unscaled_both = df_unscaled_both.set_index('Start time')


    # Save to csv
    df_unscaled_both.to_csv(f'Results/df_sleep_{description}.csv', index=False)

    return df_unscaled_both, train_scores


In [60]:
df_unscaled_both, train_scores = compile_and_save([sleep1, sleep2, sleep3, sleep4], 'c001')

In [51]:
# Calculate number of days as final start time minus start time
# day_count = df_unscaled_both['Start time'].iloc[-1] - df_unscaled_both['Start time'].iloc[0]
# day_count = day_count.days

In [61]:
import plotly.express as px

scores = df_unscaled_both['normality_score']

# Mean and std
mean = np.mean(train_scores)
std = np.std(train_scores)

# Plot normality scores from df_unscaled_both against index. Hover data for each point: all columns in df_unscaled_both
fig = px.scatter(df_unscaled_both, x=df_unscaled_both.index, y='normality_score', hover_data=df_unscaled_both.columns)
fig.update_layout(title='Normality Scores', xaxis_title='Time', yaxis_title='Normality Score')

# Include mean, -1 std, -2 std, -3 std
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean for i in range(len(scores))], mode='lines', name='Mean'))
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - std for i in range(len(scores))], mode='lines', name='-Sigma'))
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - 2* std for i in range(len(scores))], mode='lines', name='-2 Sigma'))
fig.add_trace(go.Scatter(x=list(range(len(scores))), y=[mean - 3* std for i in range(len(scores))], mode='lines', name='-3 Sigma'))

# Set x-axis ticks as enumerated days
# fig.update_xaxes(tickvals=list(range(0, len(scores), day_count)), ticktext=list(range(0, day_count + 1)))
# Set x-range - 1 day before first day and 1 day after last day
lower_x = df_unscaled_both.index[0] - pd.Timedelta(days=1)
upper_x = df_unscaled_both.index[-1] + pd.Timedelta(days=1)
fig.update_xaxes(range=[lower_x, upper_x])

# Set x-ticks as days

fig.show()

In [62]:
# Plot distribution of normality scores
fig = px.histogram(df_unscaled_both, x='normality_score', nbins=100)
fig.update_layout(title='Distribution of Normality Scores', xaxis_title='Normality Score', yaxis_title='Count')
fig.show()