# Sorting Tasks (GPT-3.5)

**Global Inputs**
- Set your OpenAI API key here. If you're using Azure, see the code documentation for `OpenAIConfig` for how to modify it.
- Set the aggregate size here. The default of 5 works well.

In [None]:
api_key = ''
api_type = 'openai'
num_aggregates = 5  # number of aggregates
num_limit = 50  # set to 100 for the full dataset; 50 is for fast inference

In [None]:
from permsc import SentenceSortRankingPromptBuilder, FastMathSortRankingPromptBuilder, OpenAIPromptPipeline, OpenAIConfig, ChatCompletionPool

config = OpenAIConfig(model_name='gpt-3.5-turbo', api_key=api_key, api_type=api_type)
builder = FastMathSortRankingPromptBuilder()
pool = ChatCompletionPool([config] * 5)  # 5 parallel instances
pipeline = OpenAIPromptPipeline(builder, pool)

In [None]:
from copy import deepcopy
import numpy as np


def run_pipeline(pipeline, dataset, num_aggregates, limit=100):
    prefs_list = []
    perms_list = []
    gts_list = []
    
    for it in dataset[:limit]:
        it = deepcopy(it)
        gt_perm = it.randomize_order(standardize=True)
        gts_list.append(gt_perm)
        prefs = []
        items = []
        perms = []
        
        for _ in range(num_aggregates):
            it_cpy = deepcopy(it)
            perms.append(it_cpy.randomize_order())
            items.append(it_cpy)
    
        outputs = pipeline.run(items, temperature=0, request_timeout=10)
    
        for output, perm in zip(outputs, perms):
            pref_restore_map = dict(zip(range(len(perm)), perm))
            pref_restore_map[-1] = -1
            prefs.append(np.array([pref_restore_map[x] for x in output]))
    
        prefs_list.append(np.array(prefs))
        perms_list.append(np.array(perms))

    return prefs_list, perms_list, gts_list  # output permutations, input permutations, and ground truth

In [None]:
from permsc import KemenyOptimalAggregator

def aggregate(prefs_list):
    aggregator = KemenyOptimalAggregator()
    results = []

    for prefs in prefs_list:
        results.append(aggregator.aggregate(prefs))

    return results

def compute_individual_taus(prefs_list, gts_list):
    taus = []

    for idx in range(num_aggregates):
        tau = []
        
        for gt, prefs in zip(gts_list, prefs_list):
            if len(prefs) > 0:
                tau.append(fn(np.argsort(gt), prefs[idx]))
    
        taus.append(np.mean(tau))
    
    return np.array(taus)

In [None]:
from permsc import ranks_from_preferences
import scipy.stats as stats

fn = lambda x, y: stats.kendalltau(ranks_from_preferences(x), ranks_from_preferences(y))[0]

## MathSort

In [None]:
from permsc import MathSortDataset

ds = MathSortDataset('../data/mathsort.tsv')

In [None]:
prefs_list, _, gts_list = run_pipeline(pipeline, ds, num_aggregates, limit=num_limit)

In [None]:
results = aggregate(prefs_list)

In [None]:
aggr_score = np.mean([fn(np.argsort(gt), x) for gt, x in zip(gts_list, results)])
aggr_score  # Aggregate tau

In [None]:
compute_individual_taus(prefs_list, gts_list)  # Individual runs

## WordSort

In [None]:
from permsc import WordSortRankingPromptBuilder, WordSortDataset

ds = WordSortDataset('../data/wordsort.tsv')
builder = WordSortRankingPromptBuilder()
pool = ChatCompletionPool([config] * 5)  # 5 parallel instances
pipeline = OpenAIPromptPipeline(builder, pool)

In [None]:
prefs_list, _, gts_list = run_pipeline(pipeline, ds, num_aggregates, limit=num_limit)

In [None]:
results = aggregate(prefs_list)

In [None]:
aggr_score = np.mean([fn(np.argsort(gt), x) for gt, x in zip(gts_list, results)])
aggr_score  # Aggregate tau

In [None]:
compute_individual_taus(prefs_list, gts_list)  # Individual runs

## GSM8KSort
- Please use Azure with this, as OpenAI seems to break.

In [None]:
from permsc import GSM8KSortDataset, SentenceSortRankingPromptBuilder

ds = GSM8KSortDataset('../data/gsm8ksort.jsonl')
builder = SentenceSortRankingPromptBuilder()
pool = ChatCompletionPool([config] * 5)  # 5 parallel instances
pipeline = OpenAIPromptPipeline(builder, pool)

In [None]:
prefs_list, _, gts_list = run_pipeline(pipeline, ds, num_aggregates, limit=num_limit)

In [None]:
results = aggregate(prefs_list)

In [None]:
aggr_score = np.mean([fn(np.argsort(gt), x) for gt, x in zip(gts_list, results)])
aggr_score  # Aggregate tau

In [None]:
compute_individual_taus(prefs_list, gts_list)  # Individual runs