In [3]:
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd

# Testing for val_exps and non_zeros

In [8]:
def get_accuracy_top1(model1, model2, p=6, k=250):
    asif_value = 0
    with open(f'retrieval_{model1}_{model2}_p{p}_k{k}.json', 'r') as f:
        retrieval = json.load(f)
    words = []
    count_top1 = 0 
    keys_test = [retrieval['all_values'][i] for i in retrieval['rows_deleted']]
    total = len(keys_test)
    for i in range(total):
        word = keys_test[i]
        k_1 = retrieval['retrieval_indices'][i][0]
        if word == retrieval['keys_anchors'][k_1]:
            count_top1 += 1
    asif_value = count_top1 / total
    return asif_value, words 

In [11]:
def get_df_asif_accuracy(model1, model2):
    val_exp_range = np.arange(1, 5) 
    non_zeros_range = np.arange(250, 751, 250) 

    rows = []
    for val_exp in val_exp_range:
        for non_zeros in non_zeros_range:
            accuracy = get_accuracy_top1(model1=model1, model2=model2, p=val_exp, k=non_zeros)[0]
            rows.append({
                "val_exp": val_exp,
                "non_zeros": non_zeros,
                "get_accuracy_top1": accuracy
            })

    # Additional points
    additional_points = [
        (1, 100), (2, 100), (3, 100),
        (4, 10), (4, 50), (4, 100), (4, 150), (4, 200), (4, 250),
        (5, 250), (6, 250), (7, 250), (8, 250), (9, 250)
    ]
    for val_exp, non_zeros in additional_points:
        accuracy = get_accuracy_top1(model1=model1, model2=model2, p=val_exp, k=non_zeros)[0]
        rows.append({
            "val_exp": val_exp,
            "non_zeros": non_zeros,
            "get_accuracy_top1": accuracy
        })

    df = pd.DataFrame(rows)

    return df

## EncodecMAE (Encodec) and BERT

In [12]:
get_df_asif_accuracy('base_layer9', 'bert-base-uncased_layer3')

Unnamed: 0,val_exp,non_zeros,get_accuracy_top1
0,1,250,0.282445
1,1,500,0.267377
2,1,750,0.255579
3,2,250,0.296944
4,2,500,0.282018
5,2,750,0.270505
6,3,250,0.300213
7,3,500,0.287278
8,3,750,0.274627
9,4,250,0.301919


## EncodecMAE (Encodec) and GloVe

In [13]:
get_df_asif_accuracy('base_layer9', 'glove_layer0')

Unnamed: 0,val_exp,non_zeros,get_accuracy_top1
0,1,250,0.16091
1,1,500,0.121962
2,1,750,0.04307
3,2,250,0.194314
4,2,500,0.150249
5,2,750,0.0742
6,3,250,0.196446
7,3,500,0.166169
8,3,750,0.104478
9,4,250,0.226297


## EncodecMAE (MEL) con BERT

In [14]:
get_df_asif_accuracy('mel256-ec-base_layer9', 'bert-base-uncased_layer0')

Unnamed: 0,val_exp,non_zeros,get_accuracy_top1
0,1,250,0.398436
1,1,500,0.372992
2,1,750,0.354371
3,2,250,0.412651
4,2,500,0.390618
5,2,750,0.375124
6,3,250,0.416773
7,3,500,0.392182
8,3,750,0.373561
9,4,250,0.420327


## EncodecMAE (MEL) con GloVe

In [15]:
get_df_asif_accuracy('mel256-ec-base_layer9', 'glove_layer0')

Unnamed: 0,val_exp,non_zeros,get_accuracy_top1
0,1,250,0.213362
1,1,500,0.148827
2,1,750,0.072921
3,2,250,0.26624
4,2,500,0.202416
5,2,750,0.119545
6,3,250,0.282303
7,3,500,0.23113
8,3,750,0.175124
9,4,250,0.325515


## BEATs and BERT

In [16]:
get_df_asif_accuracy('iter3_layer5', 'bert-base-uncased_layer10')

Unnamed: 0,val_exp,non_zeros,get_accuracy_top1
0,1,250,0.002559
1,1,500,0.001848
2,1,750,0.00199
3,2,250,0.002985
4,2,500,0.001706
5,2,750,0.002274
6,3,250,0.003269
7,3,500,0.001421
8,3,750,0.002701
9,4,250,0.003412


## BEATs and GloVe

In [17]:
get_df_asif_accuracy('iter3_layer3', 'glove_layer0')

Unnamed: 0,val_exp,non_zeros,get_accuracy_top1
0,1,250,0.000142
1,1,500,0.0
2,1,750,0.0
3,2,250,0.000284
4,2,500,0.000284
5,2,750,0.000142
6,3,250,0.000426
7,3,500,0.000284
8,3,750,0.000284
9,4,250,0.000284
