# Data Visualisation for Scenes Ablation Study

In [175]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.ticker import LogLocator

from pathlib import Path
import pandas as pd
import sys

sys.path.append("..")
import visualization

%matplotlib inline
%config InlineBackend.figure_format = 'pdf'

In [176]:
# Load data from files
df1 = pd.read_csv(Path("results.csv")) # Load first 20 samples
df2 = pd.read_csv(Path("results-L-50s.csv")) # Load the remaining 30 samples

df = pd.concat([df1, df2])
df

Unnamed: 0,token_size,qa,index,entities,n_relevant_chunks,n_merged_chunks,is_summarised
0,0,BABILongQuestionType.qa1,1,daniel,1,1,True
1,0,BABILongQuestionType.qa1,1,bathroom,1,1,False
2,0,BABILongQuestionType.qa1,1,john,1,1,False
3,0,BABILongQuestionType.qa1,1,kitchen,1,1,False
4,0,BABILongQuestionType.qa1,1,mary,1,1,False
...,...,...,...,...,...,...,...
634629,128,BABILongQuestionType.qa5,50,third part of his age of reason,1,1,False
634630,128,BABILongQuestionType.qa5,50,several pieces\non religious subjects,1,1,False
634631,128,BABILongQuestionType.qa5,50,prose and verse,1,1,False
634632,128,BABILongQuestionType.qa5,50,great part of his posthumous\npolitical works,1,1,False


In [177]:
# We want grouped by text size, index and qa to get the sum of the individual text
df = df.groupby(['token_size', 'qa', 'index'])[['n_relevant_chunks', 'n_merged_chunks']].sum().reset_index()
df

Unnamed: 0,token_size,qa,index,n_relevant_chunks,n_merged_chunks
0,0,BABILongQuestionType.qa1,1,6,6
1,0,BABILongQuestionType.qa1,2,7,7
2,0,BABILongQuestionType.qa1,3,7,7
3,0,BABILongQuestionType.qa1,4,8,8
4,0,BABILongQuestionType.qa1,5,8,8
...,...,...,...,...,...
2245,128,BABILongQuestionType.qa5,46,5533,4196
2246,128,BABILongQuestionType.qa5,47,6097,4036
2247,128,BABILongQuestionType.qa5,48,5142,3580
2248,128,BABILongQuestionType.qa5,49,5161,3608


In [178]:
# With data for each text, we now mean over qa and indices, to get mean for each token size, for each text
mean_df = df.groupby('token_size')[['n_relevant_chunks', 'n_merged_chunks']].mean().reset_index()
mean_df

Unnamed: 0,token_size,n_relevant_chunks,n_merged_chunks
0,0,16.9,9.06
1,1,49.664,29.648
2,2,93.904,61.892
3,4,173.02,121.94
4,8,340.084,248.02
5,16,666.068,495.636
6,32,1274.46,959.916
7,64,2434.5,1852.092
8,128,4929.164,3713.384


In [179]:
# Melt the dataframe for nicer format
melted_df = mean_df.melt(id_vars='token_size', value_name='chunks', var_name='strategy', value_vars=['n_relevant_chunks', 'n_merged_chunks'])
melted_df['strategy'] = melted_df['strategy'].replace({
    'n_relevant_chunks': 'No Merging',
    'n_merged_chunks': 'Scene Merging'
})
melted_df

Unnamed: 0,token_size,strategy,chunks
0,0,No Merging,16.9
1,1,No Merging,49.664
2,2,No Merging,93.904
3,4,No Merging,173.02
4,8,No Merging,340.084
5,16,No Merging,666.068
6,32,No Merging,1274.46
7,64,No Merging,2434.5
8,128,No Merging,4929.164
9,0,Scene Merging,9.06


In [180]:
visualization.create_line_chart(
    df = melted_df,
    experiment_title = 'Scene Merging Ablation Study',
    x_axis = 'token_size',
    y_axis = 'chunks',
    category = 'strategy',
    n_types = 3,
    x_label = 'Token Size (Log$\mathregular{_2}$ Scale)',
    y_label = 'Mean Prompts Used', 
    y_grid = True,
    y_grid_minor = True,
    y_lims = None,
    ylog = False,
)

  x_label = 'Token Size (Log$\mathregular{_2}$ Scale)',
  sns.lineplot(


<Figure size 1000x600 with 1 Axes>

# Data Visualisation for Delayed Summarisation, ablation study

In [181]:
# Load data from files -> this is pulled from the knowledge base
df1 = pd.read_csv(Path("results.csv"))
df2 = pd.read_csv(Path("results-L-50s.csv"))

df = pd.concat([df1, df2])

We want to show the difference in relevant_chunks (prompts) for summarised entities versus those who are not summarised. Grouped by token size.

In [182]:
summed_df_text = df.groupby(['token_size', 'qa', 'is_summarised', 'index'])['n_relevant_chunks'].sum().reset_index()
summed_df_text

Unnamed: 0,token_size,qa,is_summarised,index,n_relevant_chunks
0,0,BABILongQuestionType.qa1,False,1,5
1,0,BABILongQuestionType.qa1,False,2,6
2,0,BABILongQuestionType.qa1,False,3,6
3,0,BABILongQuestionType.qa1,False,4,7
4,0,BABILongQuestionType.qa1,False,5,7
...,...,...,...,...,...
4337,128,BABILongQuestionType.qa5,True,46,28
4338,128,BABILongQuestionType.qa5,True,47,5
4339,128,BABILongQuestionType.qa5,True,48,8
4340,128,BABILongQuestionType.qa5,True,49,6


In [183]:
# Now we add up the data so false holds all entities, and true only holds those who are summarised. We do this by pivoting
pivoted_summing_df = summed_df_text.pivot(index=['token_size', 'qa', 'index'], columns='is_summarised', values='n_relevant_chunks')
pivoted_summing_df 

Unnamed: 0_level_0,Unnamed: 1_level_0,is_summarised,False,True
token_size,qa,index,Unnamed: 3_level_1,Unnamed: 4_level_1
0,BABILongQuestionType.qa1,1,5.0,1.0
0,BABILongQuestionType.qa1,2,6.0,1.0
0,BABILongQuestionType.qa1,3,6.0,1.0
0,BABILongQuestionType.qa1,4,7.0,1.0
0,BABILongQuestionType.qa1,5,7.0,1.0
...,...,...,...,...
128,BABILongQuestionType.qa5,46,5505.0,28.0
128,BABILongQuestionType.qa5,47,6092.0,5.0
128,BABILongQuestionType.qa5,48,5134.0,8.0
128,BABILongQuestionType.qa5,49,5155.0,6.0


In [184]:
pivoted_summing_df[False] = pivoted_summing_df[False] + pivoted_summing_df[True]
pivoted_summing_df

Unnamed: 0_level_0,Unnamed: 1_level_0,is_summarised,False,True
token_size,qa,index,Unnamed: 3_level_1,Unnamed: 4_level_1
0,BABILongQuestionType.qa1,1,6.0,1.0
0,BABILongQuestionType.qa1,2,7.0,1.0
0,BABILongQuestionType.qa1,3,7.0,1.0
0,BABILongQuestionType.qa1,4,8.0,1.0
0,BABILongQuestionType.qa1,5,8.0,1.0
...,...,...,...,...
128,BABILongQuestionType.qa5,46,5533.0,28.0
128,BABILongQuestionType.qa5,47,6097.0,5.0
128,BABILongQuestionType.qa5,48,5142.0,8.0
128,BABILongQuestionType.qa5,49,5161.0,6.0


In [185]:
# Melt it back into a nicer format
summed_df = pivoted_summing_df.reset_index().melt(id_vars=['token_size', 'qa', 'index'], var_name='is_summarised', value_name='n_relevant_chunks')
summed_df

Unnamed: 0,token_size,qa,index,is_summarised,n_relevant_chunks
0,0,BABILongQuestionType.qa1,1,False,6.0
1,0,BABILongQuestionType.qa1,2,False,7.0
2,0,BABILongQuestionType.qa1,3,False,7.0
3,0,BABILongQuestionType.qa1,4,False,8.0
4,0,BABILongQuestionType.qa1,5,False,8.0
...,...,...,...,...,...
4495,128,BABILongQuestionType.qa5,46,True,28.0
4496,128,BABILongQuestionType.qa5,47,True,5.0
4497,128,BABILongQuestionType.qa5,48,True,8.0
4498,128,BABILongQuestionType.qa5,49,True,6.0


In [186]:
# Now we can mean for token size and is_summarised
grouped_summed_df = summed_df.groupby(['token_size', 'is_summarised'])['n_relevant_chunks'].mean().reset_index()
grouped_summed_df

Unnamed: 0,token_size,is_summarised,n_relevant_chunks
0,0,False,16.86747
1,0,True,2.566265
2,1,False,50.62069
3,1,True,4.922414
4,2,False,95.049774
5,2,True,6.411765
6,4,False,173.936441
7,4,True,6.567797
8,8,False,339.653333
9,8,True,7.613333


In [187]:
# Melt it back into a nicer format
ds_df = grouped_summed_df
ds_df['delayed_summarisation'] = ds_df['is_summarised'].replace({
    True: 'Delayed Summarisation',
    False: 'Full Summarisation'
})
ds_df

Unnamed: 0,token_size,is_summarised,n_relevant_chunks,delayed_summarisation
0,0,False,16.86747,Full Summarisation
1,0,True,2.566265,Delayed Summarisation
2,1,False,50.62069,Full Summarisation
3,1,True,4.922414,Delayed Summarisation
4,2,False,95.049774,Full Summarisation
5,2,True,6.411765,Delayed Summarisation
6,4,False,173.936441,Full Summarisation
7,4,True,6.567797,Delayed Summarisation
8,8,False,339.653333,Full Summarisation
9,8,True,7.613333,Delayed Summarisation


In [188]:
visualization.create_line_chart(
    df = ds_df,
    experiment_title = 'Delayed Summarisation Ablation Study',
    x_axis = 'token_size',
    y_axis = 'n_relevant_chunks',
    category = 'delayed_summarisation',
    n_types = 3,
    x_label = 'Token Size (Log$\mathregular{_2}$ Scale)',
    y_label = 'Mean Prompts Used (Log$\mathregular{_{10}}$ Scale)', 
    y_grid = True,
    y_grid_minor = True,
    y_lims = (1, 1e4),
    ylog = True,
)

  x_label = 'Token Size (Log$\mathregular{_2}$ Scale)',
  y_label = 'Mean Prompts Used (Log$\mathregular{_{10}}$ Scale)',
  sns.lineplot(


<Figure size 1000x600 with 1 Axes>

In [189]:
delayed_sum_n_chunks = ds_df.loc[(ds_df['token_size'] == 128) & (ds_df['delayed_summarisation'] == 'Delayed Summarisation')]['n_relevant_chunks']
full_sum_n_chunks = ds_df.loc[(ds_df['token_size'] == 128) & (ds_df['delayed_summarisation'] == 'Full Summarisation')]['n_relevant_chunks']
delayed_sum_n_chunks, full_sum_n_chunks

(17    8.374468
 Name: n_relevant_chunks, dtype: float64,
 16    4920.238298
 Name: n_relevant_chunks, dtype: float64)

In [190]:
ratio = delayed_sum_n_chunks.iloc[0] / full_sum_n_chunks.iloc[0] * 100
ratio

np.float64(0.17020452218193893)