In [6]:
from OCluDAL import OCluDAL
import numpy as np
import pandas as pd

folder = 'OverallCA\\SPHERE'
path = 'PreProcessing\\SPHERE\\features.csv'
df = pd.read_csv(path)
# Get the indices of the rows with the label of 'Standing' or 'Walking Forward'
indices = df[(df['Label'] == 'a_walk') | (df['Label'] == 'p_stand')].index

annotations = 10
n = 5
sample_count = 1000

damping = 0.75
pref = -180

for i in range(5):
    labels = 0
    while labels != 2:
        # Select 10 random indices from the indices list
        indices = np.random.choice(indices, annotations, replace=False)
        labels = len(np.unique(df.loc[indices, 'Label']))    

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_Linear_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_Linear_2iter_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=2)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_RBF_{i}.csv',
                       model_type='SVM-rbf')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_KNN5_{i}.csv',
                       model_type='KNN5')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\Random_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=0)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Random')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\BvSB_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=0)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

Total data: 2310
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['a_walk' 'p_stand']
Iteration 1
Novelty detected: 2088
Representative samples chosen for annotation: 70
Starting uncertainty sampling and model training
Total data: 23101000     |Labelled data size: 995  |Unlabelled data size: 1315
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['a_walk' 'p_stand']
Iteration 1
Novelty detected: 2088
Representative samples chosen for annotation: 70
Iteration 2
Novelty detected: 956
Representative samples chosen for annotation: 48
Starting uncertainty sampling and model training
Total data: 23101000     |Labelled data size: 998  |Unlabelled data size: 1312
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['a_walk' 'p_stand']
Iteration 1
Novelty detected: 2088
Representative samples chosen for annotation: 70
Starting uncertainty sampling and model training
Total data: 23101000     |Labelled data size: 995  |

In [8]:
import os
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

folder = 'Results/OverallCA/SPHERE'

# Get all csv files
files = os.listdir(folder)

import plotly.express as px
pastel = px.colors.qualitative.Pastel

# color key
color_key = {
    'OCluDAL_SVM_Linear': pastel[0],
    'OCluDAL_SVM_Linear_2iter': pastel[1],
    'OCluDAL_SVM_RBF': pastel[2],
    'OCluDAL_KNN5': pastel[3],
    'Random': pastel[4],
    'BvSB': pastel[5]
}

# Calculate average across 5 iterations
OCluDAL_SVM_Linear = []
OCluDAL_SVM_Linear_2iter = []
OCluDAL_SVM_RBF = []
OCluDAL_KNN5 = []
Random = []
BvSB = []

for file in files:
    df = pd.read_csv(f'{folder}\\{file}')
    method = file.split('_')[0]
    if method == 'OCluDAL':
        if 'Linear' in file:
            if '2iter' in file:
                OCluDAL_SVM_Linear_2iter.append(df)
            else:
                OCluDAL_SVM_Linear.append(df)
        elif 'RBF' in file:
            OCluDAL_SVM_RBF.append(df)
        elif 'KNN5' in file:
            OCluDAL_KNN5.append(df)
    elif method == 'Random':
        Random.append(df)
    elif method == 'BvSB':
        BvSB.append(df)

OCluDAL_SVM_Linear = pd.concat(OCluDAL_SVM_Linear)
OCluDAL_SVM_Linear_2iter = pd.concat(OCluDAL_SVM_Linear_2iter)
OCluDAL_SVM_RBF = pd.concat(OCluDAL_SVM_RBF)
OCluDAL_KNN5 = pd.concat(OCluDAL_KNN5)
Random = pd.concat(Random)
BvSB = pd.concat(BvSB)

OCluDAL_SVM_Linear = OCluDAL_SVM_Linear.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
OCluDAL_SVM_Linear_2iter = OCluDAL_SVM_Linear_2iter.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
OCluDAL_SVM_RBF = OCluDAL_SVM_RBF.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
OCluDAL_KNN5 = OCluDAL_KNN5.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Random = Random.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
BvSB = BvSB.groupby('Number of Annotations').mean(numeric_only=True).reset_index()

# Map dfs to colors
dfs = [OCluDAL_SVM_Linear, OCluDAL_SVM_Linear_2iter, OCluDAL_SVM_RBF, OCluDAL_KNN5, Random, BvSB]
color_keys = ['OCluDAL_SVM_Linear', 'OCluDAL_SVM_Linear_2iter', 'OCluDAL_SVM_RBF', 'OCluDAL_KNN5', 'Random', 'BvSB']

fig = make_subplots(cols=2, rows=1, column_widths=[0.6, 0.4])

for df, file in zip(dfs, color_keys):
    color = color_key[file]

    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Accuracy'], 
        name=file, 
        line=dict(color=color, width=3)),
        row=1,
        col=1)

    # df = df[df['Number of Annotations'] <= 500]
    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Classes'], 
        showlegend=False,
        line=dict(color=color, width=3)),
        row=1,
        col=2)


# Update ylim and set axis labels
fig.update_yaxes(range=[0, 1], row=1, col=1)
fig.update_xaxes(title_text='Number of Annotations')
fig.update_yaxes(title_text='Accuracy', row=1, col=1)
fig.update_yaxes(title_text='Classes/ Activities discovered', row=1, col=2)

# Horizontal legend
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(font_size=15)
fig.show()
fig.write_image('sampling_validation.png', width=1500, height=600, scale=3)

In [10]:
from OCluDAL import OCluDAL
import numpy as np
import pandas as pd

folder = 'OverallCA\\USC'
path = 'PreProcessing\\USC\\CompiledData_7.csv'
df = pd.read_csv(path)
# Get the indices of the rows with the label of 'Standing' or 'Walking Forward'
indices = df[(df['Label'] == 'Standing') | (df['Label'] == 'Walking Forward')].index

annotations = 10
n = 5
sample_count = 1000

damping = 0.75
pref = -180

for i in range(5):
    labels = 0
    while labels != 2:
        # Select 10 random indices from the indices list
        indices = np.random.choice(indices, annotations, replace=False)
        labels = len(np.unique(df.loc[indices, 'Label']))    

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_Linear_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_Linear_2iter_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=2)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_SVM_RBF_{i}.csv',
                       model_type='SVM-rbf')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\OCluDAL_KNN5_{i}.csv',
                       model_type='KNN5')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\Random_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=0)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='Random')

    del OC

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{folder}\\BvSB_{i}.csv')
    OC.preprocessing()
    OC.step1(max_iter=0)
    clf = OC.step2(max_iter=1000, n=n, max_samples=sample_count, sampling_type='BvSB')

    del OC

Total data: 3667
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Forward']
Iteration 1
Novelty detected: 3628
Representative samples chosen for annotation: 133
Starting uncertainty sampling and model training
Total data: 36671000     |Labelled data size: 998  |Unlabelled data size: 2669
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Forward']
Iteration 1
Novelty detected: 3628
Representative samples chosen for annotation: 133
Iteration 2
Novelty detected: 3397
Representative samples chosen for annotation: 116
Starting uncertainty sampling and model training
Total data: 36671000     |Labelled data size: 999  |Unlabelled data size: 2668
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Forward']
Iteration 1
Novelty detected: 3628
Representative samples chosen for annotation: 133
Starting uncertainty sampling and model training
Total data: 36671

In [22]:
import os
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

folder = 'Results/OverallCA/USC'

# Get all csv files
files = os.listdir(folder)

import plotly.express as px
pastel = px.colors.qualitative.Pastel

# color key
color_key = {
    'OCluDAL_SVM_Linear': pastel[3],
    'OCluDAL_SVM_Linear_2iter': pastel[1],
    'OCluDAL_SVM_RBF': pastel[2],
    'OCluDAL_KNN5': pastel[0],
    'Random': pastel[4],
    'BvSB': pastel[5]
}

# Calculate average across 5 iterations
OCluDAL_SVM_Linear = []
OCluDAL_SVM_Linear_2iter = []
OCluDAL_SVM_RBF = []
OCluDAL_KNN5 = []
Random = []
BvSB = []

for file in files:
    df = pd.read_csv(f'{folder}\\{file}')
    method = file.split('_')[0]
    if method == 'OCluDAL':
        if 'Linear' in file:
            if '2iter' in file:
                OCluDAL_SVM_Linear_2iter.append(df)
            else:
                OCluDAL_SVM_Linear.append(df)
        elif 'RBF' in file:
            OCluDAL_SVM_RBF.append(df)
        elif 'KNN5' in file:
            OCluDAL_KNN5.append(df)
    elif method == 'Random':
        Random.append(df)
    elif method == 'BvSB':
        BvSB.append(df)

OCluDAL_SVM_Linear = pd.concat(OCluDAL_SVM_Linear)
OCluDAL_SVM_Linear_2iter = pd.concat(OCluDAL_SVM_Linear_2iter)
OCluDAL_SVM_RBF = pd.concat(OCluDAL_SVM_RBF)
OCluDAL_KNN5 = pd.concat(OCluDAL_KNN5)
Random = pd.concat(Random)
BvSB = pd.concat(BvSB)

OCluDAL_SVM_Linear = OCluDAL_SVM_Linear.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
OCluDAL_SVM_Linear_2iter = OCluDAL_SVM_Linear_2iter.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
OCluDAL_SVM_RBF = OCluDAL_SVM_RBF.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
OCluDAL_KNN5 = OCluDAL_KNN5.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
Random = Random.groupby('Number of Annotations').mean(numeric_only=True).reset_index()
BvSB = BvSB.groupby('Number of Annotations').mean(numeric_only=True).reset_index()

# Map dfs to colors
dfs = [OCluDAL_SVM_Linear, OCluDAL_SVM_Linear_2iter, OCluDAL_SVM_RBF, OCluDAL_KNN5, Random, BvSB]
color_keys = ['OCluDAL_SVM_Linear', 'OCluDAL_SVM_Linear_2iter', 'OCluDAL_SVM_RBF', 'OCluDAL_KNN5', 'Random', 'BvSB']

fig = make_subplots(cols=2, rows=1, column_widths=[0.6, 0.4])

for df, file in zip(dfs, color_keys):
    color = color_key[file]

    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Accuracy'], 
        name=file, 
        line=dict(color=color, width=3)),
        row=1,
        col=1)

    df = df[df['Number of Annotations'] <= 500]
    fig.add_trace(go.Scatter(
        x=df['Number of Annotations'],
        y=df['Classes'], 
        showlegend=False,
        line=dict(color=color, width=3)),
        row=1,
        col=2)

df = dfs[0]
df = df[df['Number of Annotations'] <= 500]
fig.add_trace(go.Scatter(
    x=df['Number of Annotations'],
    y=df['Classes'], 
    showlegend=False,
    line=dict(color=pastel[3], width=3)),
    row=1,
    col=2)

# Update ylim and set axis labels
fig.update_yaxes(range=[0, 1], row=1, col=1)
fig.update_xaxes(title_text='Number of Annotations')
fig.update_yaxes(title_text='Accuracy', row=1, col=1)
fig.update_yaxes(title_text='Classes/ Activities discovered', row=1, col=2)

# Horizontal legend
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(font_size=15)
fig.show()
fig.write_image('Results/OverallCA/USC.png', width=1500, height=600, scale=3)

In [9]:
import os
import plotly.graph_objects as go

results = os.listdir('Results')

fig = go.Figure()
for result in results:
    df = pd.read_csv('Results\\' + result)

    test_acc = df['Accuracy']
    num_annotations = df['Number of Annotations']

    fig.add_trace(go.Scatter(x=num_annotations, y=test_acc,
                        mode='lines',
                        name=result))
    
fig.show()
