In [None]:
import numpy as np
from sklearn.svm import OneClassSVM
from OCluDAL import OCluDAL

from sklearn.svm import OneClassSVM
# Path to the data
path = 'PreProcessingClassification\\USC\\CompiledData.csv'
OC = OCluDAL(path, annotations=200)

OC.initialise_data()
OC.preprocessing()

masks = []
# Novelty detection using OCSVM
for label in OC.unique_labels:
    # Fit OCSVM
    svm = OneClassSVM().fit(OC.labelled_X_new[OC.labelled_y_new == label])
    novel_mask_i = svm.predict(OC.unlabelled_X_new) == -1
    
    masks.append(novel_mask_i)

novel_mask = np.all(masks, axis=0)
novel_X = OC.unlabelled_X_new[novel_mask]
print(f"Novelty detected: {len(novel_X)}")

In [None]:
# import pandas as pd

# from sklearn.metrics import silhouette_score
# from sklearn.cluster import AffinityPropagation

# def fine_tune_affinity_propagation(OC_original, novel_X, damping_range, preference_range, max_annotations=600):
#     """Fine tune the affinity propagation model. Score is measured by the
#     silhouette score of the model on the novel data points.
    
#     Parameters
#     ----------
#     OC : OCluDAL object
#         OCluDAL object.
#     novel_X : numpy array
#         Novel data points.
#     damping_range : list
#         Range of damping values to try.
#     preference_range : list
#         Range of preference values to try.

#     Returns
#     -------
#     scores_array : numpy array
#         Array of scores for each combination of damping and preference.
#     """

#     scores_array = np.zeros((len(damping_range), len(preference_range)))
#     annotations_array = np.zeros((len(damping_range), len(preference_range)))
#     df = pd.DataFrame(columns=['Damping', 'Preference', 'Original Score', 'Score', 'Weighted Score', 'Annotations', 'Avg Annotations'])

#     for di, damping in enumerate(damping_range):
#         for pi, preference in enumerate(preference_range):
#             OC = OC_original.copy()
            
#             print(f"Progress: {di * len(preference_range) + pi + 1}/{len(damping_range) * len(preference_range)}, Damping: {damping}  |Preference: {preference}")
            
#             sum_scores = 0
#             iter = 0
#             annotation_lengths = []
#             try:

#                 while len(OC.labelled_X_new) < max_annotations:
#                     masks = []
#                     iter += 1

#                     # Novelty detection using OCSVM
#                     for label in OC.unique_labels:
#                         # Fit OCSVM
#                         svm = OneClassSVM().fit(OC.labelled_X_new[OC.labelled_y_new == label])
#                         novel_mask_i = svm.predict(OC.unlabelled_X_new) == -1
                        
#                         masks.append(novel_mask_i)

#                     novel_mask = np.all(masks, axis=0)
#                     novel_X = OC.unlabelled_X_new[novel_mask]

#                     # Clustering to select representative samples for annotation using Affinity Propagation
#                     if len(novel_X) > 0:
#                         ap = AffinityPropagation(damping=damping, preference=preference)
#                         ap.fit(novel_X)
#                         representative_X = ap.cluster_centers_
#                         annotation_lengths.append(len(representative_X))
#                         # Get labels
#                         labels = ap.labels_

#                         # Calculate silhouette score
#                         score = silhouette_score(novel_X, labels)
#                         sum_scores += score

#                     else:
#                         break

#                     # Find row indices of representative samples
#                     representative_indices = []
#                     for sample in representative_X:
#                         representative_indices.append(np.where((OC.unlabelled_X_new == sample).all(axis=1))[0][0])


#                     # Update labelled and unlabelled sets
#                     OC.oracle_annotations(representative_indices)

#                 # Calculate average score and standardise it
#                 orig_score = sum_scores / iter
#                 score = orig_score * max_annotations / len(OC.labelled_X_new)
            
#                 # Calculate average number of annotations
#                 avg = np.mean(annotation_lengths)

#                 # Reward for lower average annotations
#                 weighted_score = score * (1 - avg / max_annotations)

#             except:
#                 orig_score = np.nan
#                 score = np.nan
#                 avg = np.nan
#                 weighted_score = np.nan

#             df = pd.concat([df, pd.DataFrame({'Damping': damping,
#                                                 'Preference': preference,
#                                                 'Original Score': orig_score,
#                                                 'Score': score,
#                                                 'Weighted Score': weighted_score,
#                                                 'Annotations': len(OC.labelled_X_new),
#                                                 'Avg Annotations': avg}, index=[0])], ignore_index=True)
#             df.to_csv('fine_tune_affinity_propagation.csv', index=False)
            
#             scores_array[di, pi] = score
#             annotations_array[di, pi] = avg

#             print(f"Annotations: {len(OC.labelled_X_new)}  |Score: {score}  |Avg annotations: {avg}")
#             print('____________________________________________________')

#     return scores_array

# damping_range = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
# preference_range = [-200, -190, -180, -170, -160, -150, -140, -130, -120, -110, -100, -90, -80, -70, -60, -50, -40, -30, -20, -10]

# scores_array = fine_tune_affinity_propagation(OC, novel_X, damping_range, preference_range)

In [5]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

df = pd.read_csv('fine_tune_affinity_propagation.csv')
# Remove all damping values above 0.9
df = df[df['Damping'] <= 0.9]

# Remove all average annotations above 100
df = df[df['Avg Annotations'] <= 200]

# Calculate weighted score
df['Weighted Score'] = df['Score'] ** 2 / df['Avg Annotations']

# Plot heatmap of weighted scores
import plotly.graph_objects as go


fig = go.Figure(data=go.Heatmap(
                        z=df['Weighted Score'].values.reshape(len(df['Damping'].unique()), len(df['Preference'].unique())),
                        x=df['Preference'].unique(),
                        y=df['Damping'].unique(),
                        colorscale='Viridis'))

fig.update_layout(
    xaxis_nticks=36,
    yaxis={'title': 'Damping value'},
    xaxis={'title': 'Preference value'}
)

# Highlight top 5 values with red border
df = df.sort_values(by='Weighted Score', ascending=False)
for i in range(2):
    fig.add_shape(
        type="rect",
        xref="x",
        yref="y",
        x0=df.iloc[i]['Preference'] - 5,
        y0=df.iloc[i]['Damping'] - 0.025,
        x1=df.iloc[i]['Preference'] + 5,
        y1=df.iloc[i]['Damping'] + 0.025,
        line=dict(
            color="Red",
            width=2
        )
    )

fig.show()

# Save png
# fig.write_image('AP_heatmap.png', width=1000, height=500, scale=8)

In [7]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

df = pd.read_csv('fine_tune_affinity_propagation.csv')
# Remove all damping values above 0.9
df = df[df['Damping'] <= 0.9]

# Remove all average annotations above 100
df = df[df['Avg Annotations'] <= 200]

# Calculate weighted score
df['Weighted Score'] = df['Score'] ** 2 / df['Avg Annotations']

# Plot heatmap of weighted scores
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
                        z=df['Score'].values.reshape(len(df['Damping'].unique()), len(df['Preference'].unique())),
                        x=df['Preference'].unique(),
                        y=df['Damping'].unique(),
                        colorscale='Viridis'))

fig.update_layout(
    xaxis_nticks=36,
    yaxis={'title': 'Damping value'},
    xaxis={'title': 'Preference value'}
)

# Highlight top 5 values with red border
df = df.sort_values(by='Score', ascending=False)
for i in range(2):
    fig.add_shape(
        type="rect",
        xref="x",
        yref="y",
        x0=df.iloc[i]['Preference'] - 5,
        y0=df.iloc[i]['Damping'] - 0.025,
        x1=df.iloc[i]['Preference'] + 5,
        y1=df.iloc[i]['Damping'] + 0.025,
        line=dict(
            color="Red",
            width=2
        )
    )

fig.show()

In [7]:
from OCluDAL import OCluDAL
import numpy as np

# Path to the data
# indices = np.random.choice(4000, 10, replace=False)
indices = np.arange(138, 148)
path = 'PreProcessing\\USC\\CompiledData_7.csv'
annotations = 10

damping_pref_tuples = {
    'combination1': (0.75, -190),
    'combination2': (0.75, -180),
    'combination3': (0.8, -40),
    'combination4': (0.6, -40)
}

for key, (damping, pref) in damping_pref_tuples.items():
    # OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    # OC.initialise_data(indices=indices, output_path=f'{key}_1_iter.csv')
    # OC.preprocessing()
    # OC.step1(max_iter=1)
    # clf = OC.step2(max_iter=500, n=10, max_samples=500)

    OC = OCluDAL(path, annotations, damping=damping, preference=pref)
    OC.initialise_data(indices=indices, output_path=f'{key}_2_heatmap_verification.csv')
    OC.preprocessing()
    OC.step1(max_iter=1)
    clf = OC.step2(max_iter=500, n=10, max_samples=800)

# OC = OCluDAL(path, annotations)
# OC.initialise_data(indices=indices, output_path='baseline_0_iter.csv')
# OC.preprocessing()
# OC.step1(max_iter=0)
# clf = OC.step2(max_iter=500, n=10, max_samples=500)


Total data: 3667
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Downstairs']
Iteration 1
Novelty detected: 3649
Representative samples chosen for annotation: 122
Starting uncertainty sampling and model training
Total data: 366700     |Labelled data size: 792  |Unlabelled data size: 2875
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Downstairs']
Iteration 1
Novelty detected: 3649
Representative samples chosen for annotation: 126
Starting uncertainty sampling and model training
Total data: 366700     |Labelled data size: 796  |Unlabelled data size: 2871
Annotations: 10
Preprocessing data: Applying StandardScaler
Unique labels:  ['Standing' 'Walking Downstairs']
Iteration 1
Novelty detected: 3649
Representative samples chosen for annotation: 483
Starting uncertainty sampling and model training
Total data: 366700     |Labelled data size: 793  |Unlabelled data size: 2874
Annotations: 10
Prep

In [6]:
import os
import pandas as pd
import plotly.graph_objects as go
import numpy as np

folder = 'Results/heatmap_validation'

# Get all csv files
files = os.listdir(folder)

import plotly.express as px
pastel = px.colors.qualitative.Pastel

# color key
color_key = {
    'combination1': pastel[0],
    'combination2': pastel[1],
    'combination3': pastel[2],
    'combination4': pastel[3],
}

# Line style key
line_style_key = {
    '0': 'solid',
    '1': 'solid',
    '2': 'dash',
    '3': 'dot',
}

name_key = {
    'combination1_2_heatmap_verification.csv': 'Damping = 0.75, Preference = -190',
    'combination2_2_heatmap_verification.csv': 'Damping = 0.75, Preference = -180',
    'combination3_2_heatmap_verification.csv': 'Damping = 0.8, Preference = -40',
    'combination4_2_heatmap_verification.csv': 'Damping = 0.6, Preference = -40',
}

fig = go.Figure()

for file in files:
    df = pd.read_csv(f'{folder}\\{file}')
    color = color_key[file.split('_')[0]]
    line_style = line_style_key[file.split('_')[1]]

    fig.add_trace(go.Scatter(x=df['Number of Annotations'], y=df['Accuracy'], name=name_key[file], line=dict(color=color)))#, dash=line_style)))

    # Add grey dotted vertical line at row 3
    fig.add_shape(
        type="line",
        x0=df.iloc[2]['Number of Annotations'],
        y0=0,
        x1=df.iloc[2]['Number of Annotations'],
        y1=df.iloc[2]['Accuracy'],
        line=dict(
            color="g    rey",
            width=3,
            dash="dot",
        ),
    )




# Update ylim and set axis labels
fig.update_yaxes(range=[0, 1])
fig.update_xaxes(title_text='Number of Annotations')
fig.update_yaxes(title_text='Accuracy')

# Horizontal legend
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.show()