In [12]:
# Import the required libraries
import pandas as pd
import numpy as np
import plotly as pl
import plotly.subplots as sp
import plotly.graph_objs as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

In [13]:
# Read the data

df = pd.read_csv('/Users/albertcortbanke/personalisation-for-public-media/results.csv')

df.head()

Unnamed: 0,user_id,low_intralist_diversity,med_intralist_diversity,high_intralist_diversity,random_intralist_diversity,low_relevance,med_relevance,high_relevance,random_relevance,low_diversity*relevance,med_diversity*relevance,high_diversity*relevance,random_diversity*relevance,low_serendipity,med_serendipity,high_serendipity,random_serendipity
0,8,3.27727,4.925877,4.882163,3.736485,0.8,0.6,0.333333,0.666667,2.621816,2.955526,1.627388,2.49099,0.533333,0.52,0.266667,0.355556
1,877,3.57278,4.678019,4.857595,3.577829,0.933333,0.666667,0.4,0.6,3.334595,3.118679,1.943038,2.146697,0.497778,0.4,0.293333,0.36
2,684,3.372577,4.34279,5.306635,3.905067,1.0,0.733333,0.6,0.533333,3.372577,3.184712,3.183981,2.082702,0.6,0.488889,0.4,0.462222
3,779,3.515489,4.187252,4.775986,4.52466,0.8,0.533333,0.4,0.6,2.812391,2.233201,1.910394,2.714796,0.693333,0.355556,0.346667,0.44
4,144,2.072464,4.048122,4.945909,4.266429,1.0,0.666667,0.4,0.466667,2.072464,2.698748,1.978364,1.991,0.2,0.444444,0.266667,0.342222


In [14]:
# Calculate mean accuracy for each algorithm category
mean_intralist_diversity = df[['low_intralist_diversity', 'med_intralist_diversity', 'high_intralist_diversity', 'random_intralist_diversity']].mean()
mean_relevance = df[['low_relevance', 'med_relevance', 'high_relevance', 'random_relevance']].mean()
mean_diversity_relevance = df[['low_diversity*relevance', 'med_diversity*relevance', 'high_diversity*relevance', 'random_diversity*relevance']].mean()
mean_serendipity = df[['low_serendipity', 'med_serendipity', 'high_serendipity', 'random_serendipity']].mean()

# Concatenate the mean values into a single DataFrame
mean_values = pd.concat([mean_intralist_diversity, mean_relevance, mean_diversity_relevance, mean_serendipity], axis=1)
mean_values.columns = ['Intralist Diversity', 'Relevance', 'Diversity*Relevance', 'Serendipity']
mean_values = mean_values.reset_index().rename(columns={'index': 'Algorithm'})

# Melt the DataFrame to prepare it for a grouped bar plot
mean_values_melted = pd.melt(mean_values, id_vars=['Algorithm'], var_name='Category', value_name='Mean Value')

# Create a grouped bar plot comparing the mean values of all categories
fig = px.bar(mean_values_melted, x='Algorithm', y='Mean Value', color='Category', barmode='group', title='Comparison of Algorithm Categories')
fig.update_traces(hovertemplate='Algorithm: %{x}<br>Category: %{marker.color}<br>Mean Value: %{y:.2f}')
fig.update_layout(xaxis_title='Algorithm', yaxis_title='Mean Value')

# Show the plot
fig.show()

In [29]:
# Calculate mean accuracy for each algorithm category
mean_intralist_diversity = df[['low_intralist_diversity', 'med_intralist_diversity', 'high_intralist_diversity', 'random_intralist_diversity']].mean()
mean_relevance = df[['low_relevance', 'med_relevance', 'high_relevance', 'random_relevance']].mean()
mean_diversity_relevance = df[['low_diversity*relevance', 'med_diversity*relevance', 'high_diversity*relevance', 'random_diversity*relevance']].mean()
mean_serendipity = df[['low_serendipity', 'med_serendipity', 'high_serendipity', 'random_serendipity']].mean()

# Concatenate the mean values into a single DataFrame
mean_values = pd.concat([mean_intralist_diversity, mean_diversity_relevance, mean_relevance, mean_serendipity], axis=1)
mean_values.columns = ['Intralist Diversity', 'Diversity*Relevance', 'Relevance', 'Serendipity']
mean_values = mean_values.reset_index().rename(columns={'index': 'Algorithm'})


# Melt the DataFrame to prepare it for a grouped bar plot
mean_values_melted = pd.melt(mean_values, id_vars=['Algorithm'], var_name='Category', value_name='Mean Value')

fig = go.Figure()

# Add bars for algorithms with values between 0 and 1
algorithms_0_to_1 = ['Relevance', 'Serendipity']
for alg in algorithms_0_to_1:
    fig.add_trace(go.Bar(x=mean_values_melted.loc[mean_values_melted['Category'] == alg, 'Algorithm'],
                         y=mean_values_melted.loc[mean_values_melted['Category'] == alg, 'Mean Value'],
                         name=alg,
                         yaxis='y2'))

# Add bars for other algorithms
algorithms_above_1 = ['Diversity*Relevance', 'Intralist Diversity']
for alg in algorithms_above_1:
    fig.add_trace(go.Bar(x=mean_values_melted.loc[mean_values_melted['Category'] == alg, 'Algorithm'],
                         y=mean_values_melted.loc[mean_values_melted['Category'] == alg, 'Mean Value'],
                         name=alg,
                         yaxis='y1'))

# Update the layout to include two y-axes and the custom order of algorithms in the x-axis
fig.update_layout(
    title='Comparison of Algorithm Categories',
    xaxis=dict(
        title='Algorithm',
        categoryorder='array',
        categoryarray=algorithm_order,
    ),
    yaxis=dict(
        title='Mean Value (Above 1)',
        range=[1, mean_values_melted['Mean Value'].max() * 1.1],
        titlefont=dict(color='blue'),
        tickfont=dict(color='blue'),
    ),
    yaxis2=dict(
        title='Mean Value (0 to 1)',
        range=[0, 1],
        titlefont=dict(color='green'),
        tickfont=dict(color='green'),
        anchor='x',
        overlaying='y',
        side='right',
    ),
    legend=dict(
        x=1.05,
        y=1,
    ),
    barmode='group',
)

# Show the plot
fig.show()

In [16]:
# Create bar plots for each algorithm category
fig1 = px.bar(mean_intralist_diversity, title='Mean Intralist Diversity')
fig2 = px.bar(mean_relevance, title='Mean Relevance')
fig3 = px.bar(mean_diversity_relevance, title='Mean Diversity*Relevance')
fig4 = px.bar(mean_serendipity, title='Mean Serendipity')

# Remove the legend from the plots
fig1.update_layout(showlegend=False)
fig2.update_layout(showlegend=False)
fig3.update_layout(showlegend=False)
fig4.update_layout(showlegend=False)

# Show the plots
fig1.show()
fig2.show()
fig3.show()
fig4.show()