# Case Study - LLM

In [1]:
import numpy as np
import pandas as pd
import pickle
import os
import time

np.random.seed(2024)

import plotly.express as px

import sys
sys.path.append('../..')

from topomap.visualizations import plot_topomap_comparison_highlight, plot_hierarchical_treemap, plot_projections_discrete_feature, plot_icicle

from topomap import TopoMap, TopoTree, HierarchicalTopoMap

## 1. Load or Prepare Data

In [3]:
df_questions_path = 'ARC_df_questions.csv'
embs_path = 'ARC_embeddings.pkl'

if os.path.isfile(df_questions_path):
    df_questions = pd.read_csv(df_questions_path)
    with open(embs_path, 'rb') as f:
        embs = pickle.load(f)

else:
    levels = ['Easy', 'Challenge']
    sets = ['train', 'test']

    numbers = ['1','2','3','4']

    df_questions = pd.DataFrame()
    embs = []

    for level in levels:
        for set in sets:
            df_set_level = pd.read_csv(f'../../data/LLM/ARC_{level}_{set}_questions_data.csv')
            df_set_level['Level'] = level
            df_set_level = df_set_level[~df_set_level['correct_answer'].isin(numbers)]
            non_number_ids = df_set_level.index.to_list()
            df_questions = pd.concat([df_questions, df_set_level], ignore_index=True)

            emb_all_layers = pickle.load(open(f'../../data/LLM/ARC_{level}_{set}_layers_emb.pkl', 'rb'))
            emb_last_layer = emb_all_layers[-1]
            emb_last_layer_cleaned = []
            for i in non_number_ids:
                emb_last_layer_cleaned.append(emb_last_layer[i])
            embs.extend(emb_last_layer_cleaned)

    embs = np.array(embs)

    mask = df_questions.map(type) != bool
    d = {True: 'True', False: 'False'}
    df_questions = df_questions.drop('Unnamed: 0', axis=1)
    df_questions = df_questions.where(mask, df_questions.replace(d))

    df_questions.to_csv(df_questions_path, index=False)
    with open(embs_path, 'wb') as f:
        pickle.dump(embs, f)

In [4]:
easy_questions = (df_questions['Level']=='Easy').sum()
chal_questions = (df_questions['Level']=='Challenge').sum()

print(f'Easy questions: {easy_questions} | Challenge questions: {chal_questions}')

correct_easy = ((df_questions['Level']=='Easy') & (df_questions['correct']=='True')).sum()
correct_chal = ((df_questions['Level']=='Challenge') & (df_questions['correct']=='True')).sum()

print(f'From the easy questions, {correct_easy} ({correct_easy/easy_questions:.3f}) answer were correct.')
print(f'From the challenge questions, {correct_chal} ({correct_chal/chal_questions:.3f}) answer were correct.')

correct_questions = (df_questions['correct']=='True').sum()
print(f'Total of correct answers: {correct_questions} ({correct_questions/df_questions.shape[0]:.3f})')

Easy questions: 4426 | Challenge questions: 2246
From the easy questions, 0 (0.000) answer were correct.
From the challenge questions, 0 (0.000) answer were correct.
Total of correct answers: 0 (0.000)


In [13]:
print(df_questions.groupby(['model_choice']).count()['question'])
print(df_questions.groupby(['correct_answer']).count()['question'])

model_choice
A    1411
B    2069
C    1785
D    1407
Name: question, dtype: int64
correct_answer
A    1589
B    1721
C    1745
D    1614
E       3
Name: question, dtype: int64


In [14]:
topomap_path = "topomap_llm.pkl"

if os.path.isfile(topomap_path):
    with open(topomap_path, 'rb') as f:
        topomap_llm = pickle.load(f)
    proj_topomap_llm = topomap_llm.projections
else:
    start_time = time.time()

    topomap_llm = TopoMap()
    proj_topomap_llm = topomap_llm.fit_transform(embs)

    topomap_time = time.time()-start_time
    print(f'Time for running TopoMap: {topomap_time:.3f}s')

    with open(topomap_path,'wb') as f:
        pickle.dump(topomap_llm, f)

In [15]:
topomap_path = "topomap_llm_ann.pkl"

if os.path.isfile(topomap_path):
    with open(topomap_path, 'rb') as f:
        topomap_llm = pickle.load(f)
    proj_topomap_llm = topomap_llm.projections
else:
    start_time = time.time()

    topomap_llm = TopoMap(approach='ANN')
    embs_32 = np.float32(embs)
    proj_topomap_llm = topomap_llm.fit_transform(embs_32)

    topomap_time = time.time()-start_time
    print(f'Time for running TopoMap: {topomap_time:.3f}s')

    with open(topomap_path,'wb') as f:
        pickle.dump(topomap_llm, f)

In [16]:
start_time = time.time()

topotree_llm = TopoTree(min_box_size=50)
topotree_llm.mst = topomap_llm.mst
topotree_llm.sorted_edges = topomap_llm.sorted_edges
comp_info_llm = topotree_llm.fit(embs)

topotree_time = time.time()-start_time
print(f'Time for running TopoTree: {topotree_time:.3f}s')

df_comp_llm = pd.DataFrame.from_dict(comp_info_llm)

df_comp_llm

Time for running TopoTree: 0.154s


Unnamed: 0,id,size,points,persistence,created_at,children,parent,died_at,persistence_density
0,0,1206,"[4516, 5128, 3138, 2884, 3063, 1478, 6252, 574...",7.109243,28.509794,817,5.0,35.619038,33.858298
1,1,328,"[1635, 3420, 3475, 3378, 126, 689, 1361, 551, ...",4.845154,31.309727,187,7.0,36.154881,9.072081
2,2,351,"[1585, 1738, 828, 1906, 1203, 4060, 995, 1036,...",5.041439,33.061996,187,8.0,38.103436,9.211768
3,3,151,"[1648, 1694, 208, 449, 4833, 71, 1205, 1688, 4...",1.794464,33.887196,54,6.0,35.68166,4.231866
4,4,65,"[3746, 3755, 3248, 4054, 1392, 3910, 4409, 172...",0.405972,35.213066,9,5.0,35.619038,1.824867
5,5,1285,"[5377, 5707, 4835, 5247, 2268, 2497, 4908, 513...",0.062622,35.619038,13,6.0,35.68166,36.012899
6,6,1654,"[5377, 5707, 4835, 5247, 2268, 2497, 4908, 513...",0.473221,35.68166,148,7.0,36.154881,45.747627
7,7,2953,"[3420, 3475, 3378, 126, 689, 1361, 551, 586, 4...",1.948555,36.154881,752,8.0,38.103436,77.499573
8,8,6672,"[5377, 5707, 4835, 5247, 2268, 2497, 4908, 513...",144.369919,38.103436,2884,,,


In [17]:
fig = plot_hierarchical_treemap(df_comp_llm, color='died_at')
fig.update_layout(title='TopoTree - LLM ARC Dataset',
                  height=500,
                    width=600,)
fig.write_image("TreeMap-LLM.png", scale=3)
fig.show()

In [18]:
fig = plot_icicle(df_comp_llm)
fig.update_layout(title='Icicle - LLM ARC Dataset',
                  height=500,
                  width=1000,)
fig.show()

In [19]:
start_time = time.time()

topotree_llm_small = TopoTree(min_box_size=10)
topotree_llm_small.mst = topomap_llm.mst
topotree_llm_small.sorted_edges = topomap_llm.sorted_edges
comp_info_llm_small = topotree_llm_small.fit(embs)

topotree_time = time.time()-start_time
print(f'Time for running TopoTree: {topotree_time:.3f}s')

df_comp_llm_small = pd.DataFrame.from_dict(comp_info_llm_small)

Time for running TopoTree: 0.115s


In [20]:
fig = plot_icicle(df_comp_llm_small)
fig.update_layout(title='Icicle - LLM ARC Dataset - Smaller components',
                  height=500,
                  width=1000,)
fig.show()

In [21]:
start_time = time.time()

hiertopomap_llm = HierarchicalTopoMap(k_components=5)
hiertopomap_llm.min_points_component = topotree_llm.min_box_size
hiertopomap_llm.mst = topomap_llm.mst
hiertopomap_llm.sorted_edges = topomap_llm.sorted_edges
proj_hier_llm = hiertopomap_llm.fit_transform(embs)

hier_time = time.time()-start_time
print(f'Time for running HierarchicalTreeMap: {hier_time:.3f}s')

Scalling component 0 - Scalar: 16.772 - initial area: 43031876.000... final area: 12104812544.000.
Scalling component 4 - Scalar: 16.791 - initial area: 230552.328... final area: 65003856.000.
Scalling component 3 - Scalar: 15.993 - initial area: 910290.750... final area: 232828688.000.
Scalling component 1 - Scalar: 16.593 - initial area: 3992383.000... final area: 1099231872.000.
Scalling component 2 - Scalar: 15.726 - initial area: 5020282.500... final area: 1241625984.000.
[INFO] Number of edges hit. Edges processed: 6670
Time for running HierarchicalTreeMap: 8.038s


In [22]:
components_to_highligth = hiertopomap_llm.components_to_scale

for c in components_to_highligth:
    comp_ids = hiertopomap_llm.components_info[c]['points']

    print(f'Component {c} - Number of points: {len(comp_ids)}\n')

    print(f'Points by model answer: {df_questions.iloc[comp_ids].groupby("model_choice").count()["question"]}\n')

    print(df_questions.iloc[comp_ids].groupby("correct_answer").count()["question"])
    
    print(f'Points by correct: {df_questions.iloc[comp_ids].groupby("correct").count()["question"]}\n')

    print(df_questions.iloc[comp_ids].groupby("Level").count()["question"])

    print(20*"-")

Component 0 - Number of points: 1206

Points by model answer: model_choice
A    241
B    426
C    361
D    178
Name: question, dtype: int64

correct_answer
A    315
B    297
C    321
D    273
Name: question, dtype: int64
Points by correct: correct
False    599
True     607
Name: question, dtype: int64

Level
Challenge    546
Easy         660
Name: question, dtype: int64
--------------------
Component 2 - Number of points: 351

Points by model answer: model_choice
A      3
B      9
C      5
D    334
Name: question, dtype: int64

correct_answer
A      7
B      6
C     12
D    326
Name: question, dtype: int64
Points by correct: correct
False     20
True     331
Name: question, dtype: int64

Level
Challenge     42
Easy         309
Name: question, dtype: int64
--------------------
Component 1 - Number of points: 328

Points by model answer: model_choice
A    317
B      4
C      4
D      3
Name: question, dtype: int64

correct_answer
A    306
B     10
C      3
D      9
Name: question, dtype:

In [23]:
components_to_highligth = hiertopomap_llm.components_to_scale

fig = plot_topomap_comparison_highlight(proj_topomap_llm, proj_hier_llm, 
                                        components_to_highligth, df_comp_llm)
fig.update_layout(title='LLM ARC Dataset', 
                    height=600,
                    width=1200,
                  )
fig.write_image("Comparison-LLM-ARC.png", scale=3)
fig.show()

In [24]:
mask = df_questions.map(type) != bool
d = {True: 'True', False: 'False'}

df_questions = df_questions.where(mask, df_questions.replace(d))

In [25]:
fig = plot_projections_discrete_feature(proj_hier_llm,
                                        df_questions,
                                        column_color = 'model_choice',
                                        legend_title="LLM's Answer",
                                        column_values=['A','B','C','D','E'],
                                        low_opacity=True,
                                        hiertopomap=hiertopomap_llm)
fig.update_layout(height=500,width=600)
fig.write_image("Projection-ModelAnswer-5Comps.png", scale=3)
fig.show()

In [26]:
fig = plot_projections_discrete_feature(proj_topomap_llm,
                                        df_questions,
                                        column_color = 'model_choice',
                                        legend_title="LLM's Answer",
                                        column_values=['A','B','C','D','E'],
                                        low_opacity=True,
                                        hiertopomap=hiertopomap_llm,
                                        show_hulls=True,
                                        topomap=True)
fig.update_layout(height=500,width=600)
fig.write_image("TopoMap-ModelAnswer-5Comps.png", scale=3)
fig.show()

In [27]:
colors_level = [px.colors.qualitative.T10[3],
                px.colors.qualitative.T10[1]
                ]

fig = plot_projections_discrete_feature(proj_hier_llm,
                                        df_questions,
                                        colors=colors_level,
                                        legend_title='Question Level',
                                        column_color = 'Level',
                                        #low_opacity=True,
                                        hiertopomap=hiertopomap_llm)
fig.update_layout(height=500,width=600)
fig.write_image("Projection-Level-5Comps.png", scale=3)
fig.show()

In [28]:
colors_correct = [px.colors.qualitative.T10[0],
                  px.colors.qualitative.T10[2]
                  ]

fig = plot_projections_discrete_feature(proj_hier_llm,
                                        df_questions,
                                        column_color = 'correct',
                                        legend_title='Answer Correct',
                                        colors = colors_correct,
                                        hiertopomap=hiertopomap_llm)
fig.update_layout(height=500,width=600)
fig.write_image("Projection-Correct-5Comps.png", scale=3)
fig.show()

In [29]:
start_time = time.time()

hiertopomap_llm_classes = HierarchicalTopoMap(components_to_scale=[1,2,3,4], 
                                              blowout_scalar=2, max_scalar=50)
hiertopomap_llm_classes.min_points_component = topotree_llm.min_box_size
hiertopomap_llm_classes.mst = topomap_llm.mst
hiertopomap_llm_classes.sorted_edges = topomap_llm.sorted_edges
proj_hier_llm_classes = hiertopomap_llm_classes.fit_transform(embs)

hier_time = time.time()-start_time
print(f'Time for running HierarchicalTreeMap: {hier_time:.3f}s')

Scalling component 4 - Scalar: 16.791 - initial area: 230552.328... final area: 65003856.000.
Scalling component 3 - Scalar: 15.993 - initial area: 910290.750... final area: 232828688.000.
Scalling component 1 - Scalar: 16.593 - initial area: 3992383.000... final area: 1099231872.000.
Scalling component 2 - Scalar: 15.726 - initial area: 5020282.500... final area: 1241625984.000.
[INFO] Number of edges hit. Edges processed: 6670
Time for running HierarchicalTreeMap: 7.458s


In [30]:
components_to_highligth = hiertopomap_llm.components_to_scale
data_comp = []

for c in components_to_highligth:
    comp_ids = hiertopomap_llm.components_info[c]['points']
    select_df = df_questions.iloc[comp_ids]
    n = len(comp_ids)

    data_comp.append({'Comp_id': c,
                      'n_points': n,
                      'A': len(select_df[select_df['model_choice']=='A'])/n,
                      'B': len(select_df[select_df['model_choice']=='B'])/n,
                      'C': len(select_df[select_df['model_choice']=='C'])/n,
                      'D': len(select_df[select_df['model_choice']=='D'])/n,
                      'Correct': len(select_df[select_df['correct']=='True'])/n,
                      'Easy': len(select_df[select_df['Level']=='Easy'])/n,
                        })

data_comp[0]['name'] = 'Center cluster'
data_comp[1]['name'] = 'Cluster D'
data_comp[2]['name'] = 'Cluster A'
data_comp[3]['name'] = 'Cluster C'
data_comp[4]['name'] = 'Cluster B'

df_comp = pd.DataFrame(data_comp)
df_comp = df_comp.reindex([0,2,4,3,1])

In [38]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure(data=[
        go.Bar(name='A', 
                x=list(df_comp['name']), 
                y=list(df_comp['A']),
                marker_color=px.colors.qualitative.T10[0]
                ),
        go.Bar(name='B', 
                x=list(df_comp['name']), 
                y=list(df_comp['B']),
                marker_color=px.colors.qualitative.T10[1]
                ),
        go.Bar(name='C', 
                x=list(df_comp['name']), 
                y=list(df_comp['C']),
                marker_color=px.colors.qualitative.T10[2]
                ),
        go.Bar(name='D', 
                x=list(df_comp['name']), 
                y=list(df_comp['D']),
                marker_color=px.colors.qualitative.T10[3]
                ),
]
    )
fig.update_layout(yaxis=dict(title='Proportion of answers'),
                  margin=dict(l=0, r=0, t=0, b=0),
                  height=200,
                  width=400,
                  yaxis_range=[0,1],
                  font_family="Arial",
                  legend=dict(title="LLM's Answer",
                              orientation="h",
                              yanchor="bottom",
                              y=1.02,
                              xanchor="right",
                              x=1
                        ),
                  )
fig.write_image("Bar-ModelAnswer-5Comps.png", scale=10)
fig.show()

In [39]:
fig = go.Figure(data=[
        go.Bar(name='Right Answer', 
                x=list(df_comp['name']), 
                y=list(df_comp['Correct']),
                marker_color=px.colors.qualitative.T10[0]
                ),
        go.Bar(name='Easy Question', 
                x=list(df_comp['name']), 
                y=list(df_comp['Easy']),
                marker_color=px.colors.qualitative.T10[3]
                ),   
]
    )
fig.update_layout(yaxis=dict(title='Proportion of answers'),
                  margin=dict(l=0, r=0, t=0, b=0),
                  height=200,
                  width=400,
                  yaxis_range=[0,1],
                  font_family="Arial",
                  legend=dict(orientation="h",
                              yanchor="bottom",
                              y=1.02,
                              xanchor="right",
                              x=1
                        ),
                  )
fig.write_image("Bar-Right-Easy-5Comps.png", scale=10)
fig.show()

In [34]:
from LLM_branch_ids import branch_ids

data_branch = []

for b in range(1,13):
    ids = branch_ids[b]
    select_df = df_questions.iloc[ids]
    n = len(ids)

    data_branch.append({'name': 'Branch '+str(b),
                        'label': str(b),
                      'n_points': n,
                      'A': len(select_df[select_df['model_choice']=='A'])/n,
                      'B': len(select_df[select_df['model_choice']=='B'])/n,
                      'C': len(select_df[select_df['model_choice']=='C'])/n,
                      'D': len(select_df[select_df['model_choice']=='D'])/n,
                      'Correct': len(select_df[select_df['correct']=='True'])/n,
                      'Easy': len(select_df[select_df['Level']=='Easy'])/n,
                        })
    
df_branch = pd.DataFrame(data_branch)

In [37]:
fig = go.Figure(data=[
        go.Bar(name='A', 
                x=list(df_branch['label']), 
                y=list(df_branch['A']),
                marker_color=px.colors.qualitative.T10[0]
                ),
        go.Bar(name='B', 
                x=list(df_branch['label']), 
                y=list(df_branch['B']),
                marker_color=px.colors.qualitative.T10[1]
                ),
        go.Bar(name='C', 
                x=list(df_branch['label']), 
                y=list(df_branch['C']),
                marker_color=px.colors.qualitative.T10[2]
                ),
        go.Bar(name='D', 
                x=list(df_branch['label']), 
                y=list(df_branch['D']),
                marker_color=px.colors.qualitative.T10[3]
                ),
]
    )
fig.update_layout(yaxis=dict(title='Proportion of answers'),
                  margin=dict(l=0, r=0, t=0, b=0),
                  height=200,
                  width=400,
                  yaxis_range=[0,1],
                  font_family="Arial",
                  legend=dict(title="LLM's Answer",
                              orientation="h",
                              yanchor="bottom",
                              y=1.02,
                              xanchor="right",
                              x=1
                        ),
                  )
fig.write_image("Bar-ModelAnswer-5Comps-Branches.png", scale=10)
fig.show()

In [40]:
fig = go.Figure(data=[
        go.Bar(name='Right Answer', 
                x=list(df_branch['label']), 
                y=list(df_branch['Correct']),
                marker_color=px.colors.qualitative.T10[0]
                ),
        go.Bar(name='Easy Question', 
                x=list(df_branch['label']), 
                y=list(df_branch['Easy']),
                marker_color=px.colors.qualitative.T10[3]
                ),   
]
    )
fig.update_layout(yaxis=dict(title='Proportion of answers'),
                  margin=dict(l=0, r=0, t=0, b=0),
                  height=200,
                  width=400,
                  yaxis_range=[0,1],
                  font_family="Arial",
                  legend=dict(orientation="h",
                              yanchor="bottom",
                              y=1.02,
                              xanchor="right",
                              x=1
                        ),
                  )
fig.write_image("Bar-Right-Easy-5Comps-Branches.png", scale=10)
fig.show()