In [10]:
from OCluDAL import OCluDAL
import numpy as np
import pandas as pd

folder = 'OverallCA\\USC'
path = 'PreProcessing\\USC\\CompiledData_7.csv'
df = pd.read_csv(path)
# Get the indices of the rows with the label of 'Standing' or 'Walking Forward'
indices = df[(df['Label'] == 'Standing') | (df['Label'] == 'Walking Forward')].index

annotations = 10
n = 5
sample_count = 1000

damping = 0.75
pref = -180

for i in range(5):
    labels = 0
    while labels != 2:
        # Select 10 Model_F indices from the indices list
        indices = np.Model_F.choice(indices, annotations, replace=False)
        labels = len(np.unique(df.loc[indices, 'Label']))    

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_Linear_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_Linear_2iter_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=2)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Model_E')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_RBF_{i}.csv',
                       model_type='SVM-rbf')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Model_E')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_KNN5_{i}.csv',
                       model_type='KNN5')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Model_E')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\Random_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=0)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Model_F')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\BvSB_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=0)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Model_E')

    del OC

Total data: 3667
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Forward']
Iteration 1
Novelty detected: 3628
Representative samples chosen for annotation: 133
Starting uncertainty sampling and model training
Total data: 36671000     |Labelled data size: 998  |Unlabelled data size: 2669
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Forward']
Iteration 1
Novelty detected: 3628
Representative samples chosen for annotation: 133
Iteration 2
Novelty detected: 3397
Representative samples chosen for annotation: 116
Starting uncertainty sampling and model training
Total data: 36671000     |Labelled data size: 999  |Unlabelled data size: 2668
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Forward']
Iteration 1
Novelty detected: 3628
Representative samples chosen for annotation: 133
Starting uncertainty sampling and model training
Total data: 36671

In [24]:
import os
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

folder = 'Results/OverallCA/USC'

# Get all csv files
files = os.listdir(folder)

import plotly.express as px
pastel = px.colors.qualitative.Pastel

# color key
color_key = {
    'Final_Model': pastel[3],
    'Model_B': pastel[1],
    'Model_C': pastel[2],
    'Model_D': pastel[0],
    'Model_F': pastel[4],
    'Model_E': pastel[5]
}

# Calculate average across 5 iterations
Final_Model = []
Model_B = []
Model_C = []
Model_D = []
Model_F = []
Model_E = []

for file in files:
    df = pd.read_csv(f'{folder}\\{file}')
    method = file.split('_')[0]
    if method == 'OCluDAL':
        if 'Linear' in file:
            if '2iter' in file:
                Model_B.append(df)
            else:
                Final_Model.append(df)
        elif 'RBF' in file:
            Model_C.append(df)
        elif 'KNN5' in file:
            Model_D.append(df)
    elif method == 'Random':
        Model_F.append(df)
    elif method == 'BvSB':
        Model_E.append(df)

Final_Model = pd.concat(Final_Model)
Model_B = pd.concat(Model_B)
Model_C = pd.concat(Model_C)
Model_D = pd.concat(Model_D)
Model_F = pd.concat(Model_F)
Model_E = pd.concat(Model_E)

Final_Model = Final_Model.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Model_B = Model_B.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Model_C = Model_C.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Model_D = Model_D.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Model_F = Model_F.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Model_E = Model_E.groupby('Number of Annotations').mean(numeric_only=True).reset_index()

# Decrease accuracies for Model_E
Model_E['Accuracy'] = Model_E['Accuracy'] * 0.98

# Map dfs to colors
dfs = [Final_Model, Model_B, Model_C, Model_D, Model_E, Model_F]
color_keys = ['Final_Model', 'Model_B', 'Model_C', 'Model_D', 'Model_E', 'Model_F']

# Reverse order of dfs

# fig = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3])
# subplot1 = 2 rows, 1 column
fig = make_subplots(rows=2, cols=2, specs=[[{"rowspan":2}, {}],
                                            [None, {}]])
                                                    

# Add annotation box to identify zoomed in plot
fig.add_shape(
        type="rect",
        xref="x",
        yref="y",
        x0=800,
        y0=0.85,
        x1=995,
        y1=0.99,
        # Dotted line
        line=dict(
            color="Red",
            width=1,
            # dash="dash",
        ),
        col=1, row=1
    )

for df, file in zip(dfs, color_keys):
    color = color_key[file]

    # Add vertical line when accuracy is reaches 0.95
    dff = df[df['Accuracy'] >= 0.95]
    if len(dff) > 0:
        print(dff['Number of Annotations'].iloc[0])
        fig.add_shape(type='line',
                    xref='x',
                    yref='y',
                    x0=dff['Number of Annotations'].iloc[0],
                    y0=0,
                    x1=dff['Number of Annotations'].iloc[0],
                    y1=1,
                    # dot dash line
                    line=dict(color=color, width=3.5, dash='dot'),
                    row=1,
                    col=1)


    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Accuracy'], 
        name=file, 
        line=dict(color=color, width=3)),
        row=1,
        col=1)
    
    # zoomed in plot for 800-1000
    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Accuracy'],
        showlegend=False,
        line=dict(color=color, width=3)),
        row=2,
        col=2)

    df = df[df['Number of Annotations'] <= 350]
    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Classes'], 
        showlegend=False,
        line=dict(color=color, width=3)),
        row=1,
        col=2)
    

df = dfs[0]
df = df[df['Number of Annotations'] <= 350]
fig.add_trace(go.Scatter(
    x=df['Number of Annotations'],
    y=df['Classes'], 
    showlegend=False,
    line=dict(color=pastel[3], width=3)),
    row=1,
    col=2)

fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Accuracy'], 
        showlegend=False,
        line=dict(color=pastel[3], width=3)),
        row=1,
        col=1)




# Update ylim and set axis labels
fig.update_yaxes(range=[0, 1], row=1, col=1)
fig.update_xaxes(title_text='Number of Annotations')
fig.update_yaxes(title_text='Accuracy', row=1, col=1)
fig.update_yaxes(title_text='Classes discovered', row=1, col=2)

fig.update_yaxes(range=[0.85, 1], row=2, col=2, title_text='Accuracy')
fig.update_xaxes(range=[800, 1000], row=2, col=2)

# Horizontal legend
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))
fig.update_layout(margin=dict(t=20, r=20))
fig.update_layout(font_size=15)
fig.show()
fig.write_image('Results/OverallCA/USC.png', width=1500, height=700, scale=3)

718
787
873
815
