## Install the package

In [None]:
%%capture
!pip install kaleido==0.2.1
!pip install plotly==5.14.1
import plotly.express as px
try:
    from src.visualize import Visualize
    from src.analysis import Analysis
except:
    ! git clone https://github.com/lchen001/LLMDrift
    %cd LLMDrift
    from src.visualize import Visualize
    from src.analysis import Analysis
import pandas

## Specify parameters

In [None]:
MyAnalysis = Analysis()
MyVisual = Visualize(root_path='figure/')
fontsize=50

## Create a pipeline for analysis and visualization

In [None]:
def visualize(datapath='generation/PRIME_EVAL.csv',
              name='prime',
              metric='Accuracy',
              v_range_4=[0,1000],
              v_range_35=[0,1200],
              colors=px.colors.qualitative.Plotly,
              y_name=True,
              prefix='YLABEL_',
              models_GPT4 = ['openaichat/gpt-4-0314',"openaichat/gpt-4-0613"],
              models_GPT35 = ['openaichat/gpt-3.5-turbo-0301',"openaichat/gpt-3.5-turbo-0613"],
              GPT_4_MAP = {'openaichat/gpt-4-0314':'March 2023',"openaichat/gpt-4-0613":"June 2023"},
              GPT_35_MAP = {'openaichat/gpt-3.5-turbo-0301':'March 2023',"openaichat/gpt-3.5-turbo-0613":"June 2023"},
             ):
    data = pandas.read_csv(open(datapath))
    MyAnalysis.format_answer(data,metric)
    score, scores_std = MyAnalysis.get_score(data,metric)
    MyVisual.plot_bars(score,scores_std,fontsize=fontsize,name_map=GPT_4_MAP,colors=colors,y_name=y_name)
    MyVisual.save_figure(prefix+name+"_GPT4.svg")
    MyVisual.plot_bars(score,scores_std,fontsize=fontsize,name_map=GPT_35_MAP,colors=colors,y_name=y_name)
    MyVisual.save_figure(prefix+name+"_GPT35.svg")

    MyVisual.plot_bars(score,scores_std,fontsize=fontsize,name_map=GPT_4_MAP,colors=colors,y_name=False)
    MyVisual.save_figure(name+"_GPT4.svg")
    MyVisual.plot_bars(score,scores_std,fontsize=fontsize,name_map=GPT_35_MAP,colors=colors,y_name=False)
    MyVisual.save_figure(name+"_GPT35.svg")

    # Verbosity
    score, scores_std = MyAnalysis.get_verbosity(data)
    MyVisual.plot_bars(score,scores_std,yrange=v_range_4,fontsize=fontsize,name_map=GPT_4_MAP,colors=colors,y_name=False,no_text=True)
    MyVisual.save_figure(name+"_verbose_GPT4.svg")
    MyVisual.plot_bars(score,scores_std,yrange=v_range_35,fontsize=fontsize,name_map=GPT_35_MAP,colors=colors,y_name=False,no_text=True)
    MyVisual.save_figure(name+"_verbose_GPT35.svg")

    # Mismatch
    score, scores_std = MyAnalysis.get_mismatch(data=data,models=models_GPT4,name=metric)
    MyVisual.plot_bar(score,scores_std,fontsize=fontsize,colors=colors[2:])
    MyVisual.save_figure(name+"_mismatch_GPT4.svg")
    score, scores_std = MyAnalysis.get_mismatch(data=data,models=models_GPT35,name=metric)
    MyVisual.plot_bar(score,scores_std,fontsize=fontsize,colors=colors[2:])
    MyVisual.save_figure(name+"_mismatch_GPT35.svg")

    return

## Perform the analysis and visualize the results for each dataset

In [None]:
visualize(datapath='generation/PRIME_FULL_EVAL.csv',
              name='prime_full',
              metric='Accuracy',
              v_range_4=[0,1000], v_range_35=[0,1200])

In [None]:
visualize(datapath='generation/COUNTAHAPPYNUMBER_EVAL.csv',
              name='counthappynumber',
              metric='Math Accuracy',
              v_range_4=[0,2500], v_range_35=[0,3000])

In [None]:
visualize(datapath='generation/SENSITIVEQ_EVAL.csv',
              name='sensitiveq',
              metric='Answer Rate',
              v_range_4=[0,800], v_range_35=[0,500])

In [None]:
visualize(datapath='generation/OPINIONQA_EVAL.csv',
              name='opinionqa',
              metric='Survey Rate',
              v_range_4=[0,100], v_range_35=[0,40])

In [None]:
visualize(datapath='generation/HOTPOTQA_EVAL.csv',
              name='hotpotqa',
              metric='Exact Match',
              v_range_4=[0,800], v_range_35=[0,500],
              models_GPT4 = ['agent_openai/gpt-4-0314',"agent_openai/gpt-4-0613"],
              models_GPT35 = ['agent_openai/gpt-3.5-turbo-0301',"agent_openai/gpt-3.5-turbo-0613"],
              GPT_4_MAP = {'agent_openai/gpt-4-0314':'March 2023',"agent_openai/gpt-4-0613":"June 2023"},
              GPT_35_MAP = {'agent_openai/gpt-3.5-turbo-0301':'March 2023',"agent_openai/gpt-3.5-turbo-0613":"June 2023"},
)

In [None]:
visualize(datapath='generation/LEETCODE_EASY_EVAL.csv',
              name='leetcode',
              metric='Directly Executable',
              v_range_4=[0,600], v_range_35=[0,800])

In [None]:
visualize(datapath='generation/USMLE_EVAL.csv',
              name='usmle',
              metric='Multiple-choice Accuracy',
              v_range_4=[0,100], v_range_35=[0,400])

In [None]:
visualize(datapath='generation/ARC_EVAL.csv',
              name='ARC',
              metric='Exact Match',
              v_range_4=[0,400], v_range_35=[0,400])