# LLMFAO: Large Language Model Feedback Analysis and Optimization

In [None]:
import json
import numpy as np
import pandas as pd
import plotly.express as px
from plotly.graph_objects import Figure
from gradio_client import Client

In [None]:
client = Client('https://dustalov-pair2rank.hf.space/')

def pair2rank(path: str, client: Client = client) -> pd.DataFrame:
    rankings, _ = client.predict(path, 'Bradley-Terry (1952)', False, False, 0)
    
    with open(rankings, 'rb') as f:
        rankings_json = json.load(f)
    
    df = pd.DataFrame(data=rankings_json['data'], columns=rankings_json['headers'])

    return df

In [None]:
def pairwise(df: pd.DataFrame, n: int = 7) -> Figure:
    scores = df['score'].to_numpy()

    df_pairwise = pd.DataFrame(data=scores[:, np.newaxis] / (scores + scores[:, np.newaxis]),
                           index=df['item'], columns=df['item'])

    df = pd.concat((df.head(n), df.tail(n)))
    df = df[~df.index.duplicated(keep='last')]

    df_pairwise = df_pairwise.reindex(labels=df['item'], columns=df['item'], copy=False)

    fig = px.imshow(df_pairwise, color_continuous_scale='RdBu', text_auto='.2f')
    fig.update_layout(xaxis_title='Loser', yaxis_title='Winner', xaxis_side='top')
    fig.update_traces(hovertemplate='Winner: %{y}<br>Loser: %{x}<br>Fraction of Wins: %{z}<extra></extra>')

    return fig

## Human Judgements

In [None]:
df_crowd = pair2rank('crowd-comparisons.csv')
df_crowd

In [None]:
pairwise(df_crowd)

## Evaluation with GPT-4

In [None]:
df_gpt4 = pair2rank('gpt4-crowd-comparisons.csv')
df_gpt4

In [None]:
pairwise(df_gpt4)

## Evaluation with GPT-3

In [None]:
df_gpt3 = pair2rank('gpt3-crowd-comparisons.csv')
df_gpt3

In [None]:
pairwise(df_gpt3)