In [1]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
import ast
import matplotlib.pyplot as plt
import numpy as np
import altair as alt
import pickle


In [None]:
# Parse RADio results and save them to a pickle file
results = pd.read_csv(f'results/mind_results_radio.csv', index_col=0)
results = results.map(lambda x: list(ast.literal_eval(x)))

with open('radio_results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [6]:
# Compute the mean of the RADio metrics over all users
mean_results = results.map(lambda x: np.mean(x))
display(mean_results)

Unnamed: 0,topic_calibrations,complexity_calibrations,fragmentations,activations,representations,alternative_voices,ndcg_values
pop,0.678212,0.453856,0.684298,0.258861,0.278886,0.176424,0.293245
nrms,0.674027,0.456337,0.673768,0.259612,0.279101,0.176212,0.430544
lstur,0.674693,0.455605,0.67692,0.259659,0.278891,0.176246,0.410928
naml,0.674129,0.45627,0.673145,0.25944,0.279184,0.176156,0.445023
random,0.678419,0.455758,0.677979,0.259804,0.278692,0.176202,0.285954
incorrect_random,0.700298,0.486237,0.697724,0.323532,0.332341,0.193912,0.286537
npa,0.675065,0.455072,0.676701,0.259824,0.279017,0.176211,0.402473


In [8]:
metrics = ['topic_calibrations', 'complexity_calibrations', 'fragmentations', 'activations', 'representations', 'alternative_voices']

# Perform significance testing between two methods. In this case, we compare LSTUR and random.
for metric in metrics:
    lstur = results[metric]['lstur']
    random = results[metric]['random']
    t, p = ttest_ind(lstur, random)
    print(f'{metric} t: {t}, p: {p}')
    if p < 0.01:
        print(f'{metric} is significant')
    else: 
        print(f'{metric} is not significant')
    print()


topic_calibrations t: -10.976078027656126, p: 5.006029954751262e-28
topic_calibrations is significant

complexity_calibrations t: -0.3916287129049828, p: 0.6953326836337776
complexity_calibrations is not significant

fragmentations t: -4.771317901243979, p: 1.8305968092435278e-06
fragmentations is significant

activations t: -0.6637008710137836, p: 0.5068819912764353
activations is not significant

representations t: 0.2447305044120423, p: 0.8066652039927888
representations is not significant

alternative_voices t: 0.09233293433249713, p: 0.9264335509739892
alternative_voices is not significant



In [12]:
# This code plots the difference between the mean of the metric of interest and the mean of the random metric for each method as the number of samples increases.

metric_to_plot = 'alternative_voices'

x = np.arange(1, 51)
y1 = [] 
y2 = []
y3 = []
y4 = []
y_5 = []
for seed in range(100):

    y1_seed = []
    y2_seed = []
    y3_seed = []
    y4_seed = []
    y5_seed = []

    start = np.random.randint(0, 1000)

    for n in x:

        end = start + n
        y1_seed.append(np.mean(results[metric_to_plot]['random'][start:end]))
        y2_seed.append(np.mean(results[metric_to_plot]['nrms'][start:end]))
        y3_seed.append(np.mean(results[metric_to_plot]['naml'][start:end]))
        y4_seed.append(np.mean(results[metric_to_plot]['lstur'][start:end]))
        y5_seed.append(np.mean(results[metric_to_plot]['npa'][start:end]))
        
    y1.append(y1_seed)
    y2.append(y2_seed)
    y3.append(y3_seed)
    y4.append(y4_seed)
    y_5.append(y5_seed)

y1 = np.array(y1)
y2 = np.array(y2)
y3 = np.array(y3)
y4 = np.array(y4)
y5 = np.array(y_5)

y_diff_1 = abs(y2 - y1)
y_diff_2 = abs(y3 - y1)
y_diff_3 = abs(y4 - y1)
y_diff_4 = abs(y5 - y1)

plot_1 = np.mean(y_diff_1, axis=0)
plot_2 = np.mean(y_diff_2, axis=0)
plot_3 = np.mean(y_diff_3, axis=0)
plot_4 = np.mean(y_diff_4, axis=0)

# Assuming plot_1 to plot_4 are numpy arrays of the same length
x_values = np.arange(len(plot_1))

# Create individual DataFrames for each plot
df1 = pd.DataFrame({'x': x_values, 'mean': plot_1, 'metric': 'NRMS - Random'})
df2 = pd.DataFrame({'x': x_values, 'mean': plot_2, 'metric': 'NAML - Random'})
df3 = pd.DataFrame({'x': x_values, 'mean': plot_3, 'metric': 'LSTUR - Random'})
df4 = pd.DataFrame({'x': x_values, 'mean': plot_4, 'metric': 'NPA - Random'})

# Combine all DataFrames into one
df = pd.concat([df1, df2, df3, df4], ignore_index=True)

# Create the plot
base = alt.Chart(df).mark_line().encode(
    x=alt.X('x', title='Number of samples', axis=alt.Axis(labelFontSize=14, titleFontSize=16)),
    y=alt.Y('mean', title='Mean of differences', axis=alt.Axis(labelFontSize=14, titleFontSize=16)),
    color=alt.Color('metric:N', scale=alt.Scale(scheme='category10'), legend=alt.Legend(title=None, labelFontSize=14, titleFontSize=16, orient='top-right')),
).properties(
    width=500,
    height=200,
    title=alt.TitleParams(
        text='Converging Divergences',
        fontSize=20,
        fontWeight='bold',
        anchor='middle',
        offset=10
    )
)

# Display the plot
base