# Evaluation of results
This notebook contains the evaluation of the results.

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import ast
from plotly.subplots import make_subplots
import warnings

warnings.filterwarnings("ignore")

# OnlineKMeans v2

In [2]:
df = pd.read_excel("./data/results/onlinekmeans_v2.xlsx")
df['metrics'] = df['metrics'].apply(ast.literal_eval)

In [3]:
groups = {
    "Accuracy": ["doc_accuracy", "chunk_accuracy"],
    "F1 Score": ["doc_f1", "chunk_f1"],
    "Precision": ["doc_precision", "chunk_precision"],
    "Recall": ["doc_recall", "chunk_recall"],
}

for title, keys in groups.items():
    fig = go.Figure()

    for key in keys:
        fig.add_trace(go.Scatter(
            x=df['batch'],
            y=df['metrics'].apply(lambda m: m.get(key)),
            mode='lines+markers',
            name=key
        ))

    fig.update_layout(
        title=title,
        xaxis_title='Batch',
        yaxis_title=title,
        template='plotly_white'
    )
    fig.update_yaxes(range=[0, 1.1])
    fig.show()

# Centroid vs full
hyperparameter optimization, data v2, tensors v2

In [4]:
centroid_df = pd.read_excel("./data/results/hyperparameter_for_centroid_vs_full/centroid_results_kmeans180_v2.xlsx")
full_df = pd.read_excel("./data/results/hyperparameter_for_centroid_vs_full/full_results_kmeans180_v2.xlsx")
centroid_df['centroid_metrics'] = centroid_df['centroid_metrics'].apply(ast.literal_eval)
full_df['full_metrics'] = full_df['full_metrics'].apply(ast.literal_eval)
centroid_df['x_axis'] = centroid_df['top_k'].astype(str) + "_" + centroid_df['top_n_clusters'].astype(str)



# centroid_df2 = pd.read_excel("./data/results/hyperparameter_for_centroid_vs_full/centroid_results_kmeans500_v4_first4000row_1.xlsx")
# centroid_df2['centroid_metrics'] = centroid_df2['centroid_metrics'].apply(ast.literal_eval)
# centroid_df2['x_axis'] = centroid_df2['top_k'].astype(str) + "_" + centroid_df2['top_n_clusters'].astype(str)
# full_df2 = pd.read_excel("./data/results/hyperparameter_for_centroid_vs_full/full_results_kmeans500_v4_first4000row_1.xlsx")
# full_df2['full_metrics'] = full_df2['full_metrics'].apply(ast.literal_eval)
# centroid_df = pd.concat([centroid_df, centroid_df2], ignore_index=True)
# full_df = pd.concat([full_df, full_df2], ignore_index=True)

In [5]:
centroid_df

Unnamed: 0.1,Unnamed: 0,top_k,top_n_clusters,centroid_metrics,centroid_time,x_axis
0,0,3,5,"{'doc_accuracy': 0.7010125686366283, 'chunk_ac...",1890.154896,3_5
1,1,3,10,"{'doc_accuracy': 0.7373485998698616, 'chunk_ac...",2135.487215,3_10
2,2,3,20,"{'doc_accuracy': 0.762691354924143, 'chunk_acc...",2480.586709,3_20
3,3,3,35,"{'doc_accuracy': 0.7776344478818251, 'chunk_ac...",3006.753139,3_35
4,4,5,5,"{'doc_accuracy': 0.7431363371727988, 'chunk_ac...",1965.017514,5_5
5,5,5,10,"{'doc_accuracy': 0.7814016141736778, 'chunk_ac...",2163.159012,5_10
6,6,5,20,"{'doc_accuracy': 0.8082854827109899, 'chunk_ac...",2520.288513,5_20
7,7,5,35,"{'doc_accuracy': 0.8244842977659562, 'chunk_ac...",3031.381288,5_35
8,8,12,5,"{'doc_accuracy': 0.7952031415883744, 'chunk_ac...",2202.401286,12_5
9,9,12,10,"{'doc_accuracy': 0.8352720921471706, 'chunk_ac...",2405.623073,12_10


In [6]:
full_df

Unnamed: 0.1,Unnamed: 0,top_k,full_metrics,full_time
0,0,3,"{'doc_accuracy': 0.8036735579173279, 'chunk_ac...",10972.37997
1,1,5,"{'doc_accuracy': 0.8522357561159374, 'chunk_ac...",10506.523075
2,2,12,"{'doc_accuracy': 0.9135035788079773, 'chunk_ac...",11872.97477
3,3,25,"{'doc_accuracy': 0.9460039498167787, 'chunk_ac...",12497.922858


In [7]:
groups = {
    "Accuracy": ["doc_accuracy", "chunk_accuracy"],
    "F1 Score": ["doc_f1", "chunk_f1"],
    "Precision": ["doc_precision", "chunk_precision"],
    "Recall": ["doc_recall", "chunk_recall"],
}

for title, keys in groups.items():
    fig = make_subplots(rows=1, cols=2, subplot_titles=(f"Centroid - {title}", f"Full - {title}"))

    for key in keys:
        fig.add_trace(go.Scatter(
            x=centroid_df['x_axis'],
            y=centroid_df['centroid_metrics'].apply(lambda m: m.get(key)),
            mode='lines+markers',
            name=key
        ), row=1, col=1)

        fig.add_trace(go.Scatter(
            x=full_df['top_k'],
            y=full_df['full_metrics'].apply(lambda m: m.get(key)),
            mode='lines+markers',
            name=key
        ), row=1, col=2)

    fig.update_yaxes(range=[0, 1.1], row=1, col=1)
    fig.update_yaxes(range=[0, 1.1], row=1, col=2)

    fig.update_xaxes(title_text='top k with top n clusters', row=1, col=1)
    fig.update_xaxes(title_text='top k', row=1, col=2)

    fig.show()

In [8]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Full Time", "Centroid Time"))

fig.add_trace(go.Scatter(
    x=full_df['top_k'],
    y=full_df['full_time'],
    mode='lines+markers',
    name="Full Time"
), row=1, col=1)
fig.add_trace(go.Scatter(
    x=centroid_df['x_axis'],
    y=centroid_df['centroid_time'],
    mode='lines+markers',
    name="Centroid Time"
), row=1, col=2)
fig.update_layout(
    template='plotly_white'
)
fig.update_xaxes(title_text='top k', row=1, col=1)
fig.update_xaxes(title_text='top k with top n clusters', row=1, col=2)
fig.update_yaxes(title_text='Time (s)', row=1, col=1)
fig.update_yaxes(title_text='Time (s)', row=1, col=2)

fig.update_yaxes(range=[0, 15000], row=1, col=1)
fig.update_yaxes(range=[0, 15000], row=1, col=2)

fig.show()

# Average speed up / accuracy decrease

In [9]:
chunk_num = 84007

centroid_df['time_per_chunk'] = centroid_df['centroid_time'] / chunk_num
full_df['time_per_chunk'] = full_df['full_time'] / chunk_num

top_ks = [3, 5, 12, 25]

average_ratios = {}
for top_k in top_ks:
    top_k_centroid_df = centroid_df[centroid_df['top_k'] == top_k]
    full_time = full_df[full_df['top_k'] == top_k]['time_per_chunk'].values[0]

    average_time = 0
    for idx, row in top_k_centroid_df.iterrows():
        time_ratio = full_time / row['time_per_chunk']
        average_time += time_ratio
    average_time /= len(top_k_centroid_df)
    average_ratios[top_k] = average_time

total_average_time = sum(average_ratios.values()) / len(average_ratios)
print(f"Total Average Time Ratio (Full / Centroid): {total_average_time:.4f}")

Total Average Time Ratio (Full / Centroid): 4.3205


In [10]:
results = []
for top_k in top_ks:
    top_k_centroid_df = centroid_df[centroid_df['top_k'] == top_k]


    for title, keys in groups.items():
        for key in keys:
            average_metric_value = 0
            for idx, row in top_k_centroid_df.iterrows():
                metric_value = row['centroid_metrics'].get(key)
                average_metric_value += metric_value
            average_metric_value /= len(top_k_centroid_df)
            full_value = full_df[full_df['top_k'] == top_k]['full_metrics'].apply(lambda m: m.get(key)).values[0]
            ratio_metric_value = (average_metric_value - full_value) / full_value * 100
            results.append((top_k, key, ratio_metric_value, average_ratios[top_k]))


results_df = pd.DataFrame(results, columns=['top_k', 'Metric', 'Ratio of metrics (%)', 'Average Time Speed Up'])

In [11]:
metrics = results_df['Metric'].unique()

for metric in metrics:
    metric_df = results_df[results_df['Metric'] == metric]
    metric_df['Ratio of metrics (%)'] = np.log10(metric_df['Ratio of metrics (%)'] + 10 + 1)  # Shift to avoid log(0)
    metric_df['Average Time Speed Up'] = np.log10(metric_df['Average Time Speed Up'])

    fig = go.Figure()
    fig.update_layout(
        title=f"Metric: {metric}",
        xaxis_title='top k',
        yaxis_title='Log Scale',
        template='plotly_white'
    )
    fig.add_trace(go.Scatter(
        x=metric_df['top_k'],
        y=metric_df['Ratio of metrics (%)'],
        mode='lines+markers',
        name='Ratio of metrics (%)'
    ))
    fig.add_trace(go.Scatter(
        x=metric_df['top_k'],
        y=metric_df['Average Time Speed Up'],
        mode='lines+markers',
        name='Average Time Speed Up'
    ))
    fig.show()

# Centroid vs full
kmeans with 180 cluster on v2 data, v2 tensors

In [13]:
df = pd.read_excel("./data/results/kmeans180_v2_comparison_v2tensor.xlsx")

In [14]:
df

Unnamed: 0,doc_accuracy,chunk_accuracy,doc_precision,doc_recall,doc_f1,chunk_precision,chunk_recall,chunk_f1
0,0.768308,0.698741,1,0.768308,0.868975,1,0.909454,0.95258
1,0.852247,0.784096,1,0.852247,0.920231,1,0.920033,0.958351
