In [1]:
import sys
sys.path.insert(1, "../..")

import pandas as pd
import numpy as np
import time
from tqdm import tqdm
import kagglehub
import json
import re
import random

from utils import bipartite
from utils import llm_api
from tasks.db import db_utils

from sklearn.datasets import fetch_openml

  from .autonotebook import tqdm as notebook_tqdm


Utils for post-processing the reports.

In [2]:
def extract_integer(s):
    match = re.search(r'\d+', s)  # Finds the first sequence of digits
    return int(match.group()) if match else None

def post_process(ls):
    ls = ls.split()
    res = []
    for item in ls:
        tmp = extract_integer(item.replace("int64", ""))
        if tmp is not None:
            res.append(tmp)
    return res

## Ask GPT

In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jakboss/chunk-of-dblp-dataset")

df = pd.read_csv(f"{path}/DBLP.csv")
df = df[["Author", "Journal"]]

df.head()

Unnamed: 0,Author,Journal
0,Hans Ulrich Simon,Acta Inf.
1,Nathan Goodman,Acta Inf.
2,Norbert Blum,Acta Inf.
3,Juha Honkala,Acta Inf.
4,Chua-Huang Huang,Acta Inf.


In [4]:
MODEL = "llama3.1:8b"

orders = pd.read_csv(f"venue_csv_{MODEL}.csv")
orders["reordered"] = orders["reordered"].apply(post_process)
orders.head()

Unnamed: 0,algorithm,original,reordered,model,size,column,value
0,random,[103441 10041 33390 84633 95269 90856 132...,"[103441, 10041, 33390, 84633, 95269, 90856, 13...",,100,,
1,opt,[ 90036 88632 11249 118195 90856 31196 6...,"[31196, 31138, 28512, 64755, 63783, 34055, 297...",,100,,
2,warmup,[119658 107530 130699 98890 88632 90856 58...,"[119658, 107530, 90856, 58539, 67180, 28512, 2...",llama3.1:8b,100,,
3,bigraph,[130715 90856 106345 110410 80321 21037 52...,"[25227, 29968, 93338, 34050, 103441, 107530, 8...",llama3.1:8b,100,,
4,random,[103056 3074 6120 44267 64090 39614 29...,"[103056, 3074, 6120, 44267, 64090, 39614, 2908...",,100,,


In [6]:
question = "How many papers are published in a physics related journal?"

Make API Calls:

In [7]:
def get_list_col(col):
    reordered = None
    try:
        reordered = json.loads(col)
    except:
        pattern = r"\d+"
        reordered = [int(x) for x in re.findall(pattern, col)]
    return reordered

In [13]:
GOD_MODEL = "gpt-3.5-turbo"
SAMPLES = 5

def get_gt(df, indices):
    # print(indices)
    tmp = df.iloc[indices]
    return tmp[tmp['Journal'].isin(["J. Comput. Physics", "SIAM J. Scientific Computing"])].shape[0]

errors = []

for i, row in orders.iterrows():

    # if row["algorithm"] not in ["opt", "random"]:
    #     errors.append(0)
    #     continue

    err = 0
    for s in range(SAMPLES):
        # time.sleep(1)
        # print(len(row["reordered"]))
        # reordered = get_list_col(row['reordered'])
        reordered = row["reordered"]

        def shift_right(lst, n):
            n = n % len(lst)  # Handle shifts larger than the length of the list
            return lst[-n:] + lst[:-n]
        
        # The shifting required to align with the exposure of GPT-4o
        if GOD_MODEL == "gpt-4o-mini":
            # Shift
            reordered = shift_right(reordered, 40) # IMDB
            # reordered = shift_right(reordered, 35) # Adults
            # reordered = shift_right(reordered, 50) # Enroll
            # reordered[20:40], reordered[60:80] = reordered[60:80], reordered[20:40] # Enroll

        answer = llm_api.ask(
            questions=[
                "Consider the following dataset table:\n" + df.iloc[reordered].to_string(),
                "How many papers in this table have a physics related journal ('SIAM J, Scientific' or 'J. Comput Physics')? Answer with just a number (number of rows)"
            ],
            model=GOD_MODEL
        )
        pattern = r"\d+"
        # print(answer)        
        answer = int(re.findall(pattern, answer)[0])
        gt = get_gt(df, reordered)
        err += abs(gt - answer)
        # print(gt, answer)
        print(f"{s + 1} / {SAMPLES} | {i + 1} / {orders.shape[0]} | {row['algorithm']} | {abs(gt - answer)}")
    errors.append(err / SAMPLES)

1 / 5 | 1 / 40 | random | 7
2 / 5 | 1 / 40 | random | 6
3 / 5 | 1 / 40 | random | 2
4 / 5 | 1 / 40 | random | 6
5 / 5 | 1 / 40 | random | 4
1 / 5 | 2 / 40 | opt | 6
2 / 5 | 2 / 40 | opt | 3
3 / 5 | 2 / 40 | opt | 7
4 / 5 | 2 / 40 | opt | 9
5 / 5 | 2 / 40 | opt | 6
1 / 5 | 3 / 40 | warmup | 6
2 / 5 | 3 / 40 | warmup | 6
3 / 5 | 3 / 40 | warmup | 5
4 / 5 | 3 / 40 | warmup | 7
5 / 5 | 3 / 40 | warmup | 6
1 / 5 | 4 / 40 | bigraph | 4
2 / 5 | 4 / 40 | bigraph | 6
3 / 5 | 4 / 40 | bigraph | 6
4 / 5 | 4 / 40 | bigraph | 7
5 / 5 | 4 / 40 | bigraph | 8
1 / 5 | 5 / 40 | random | 6
2 / 5 | 5 / 40 | random | 3
3 / 5 | 5 / 40 | random | 4
4 / 5 | 5 / 40 | random | 3
5 / 5 | 5 / 40 | random | 3
1 / 5 | 6 / 40 | opt | 0
2 / 5 | 6 / 40 | opt | 0
3 / 5 | 6 / 40 | opt | 5
4 / 5 | 6 / 40 | opt | 0
5 / 5 | 6 / 40 | opt | 7
1 / 5 | 7 / 40 | warmup | 2
2 / 5 | 7 / 40 | warmup | 5
3 / 5 | 7 / 40 | warmup | 7
4 / 5 | 7 / 40 | warmup | 6
5 / 5 | 7 / 40 | warmup | 1
1 / 5 | 8 / 40 | bigraph | 2
2 / 5 | 8 / 40 |

In [14]:
orders[f"{GOD_MODEL}_error"] = errors

In [15]:
orders[["algorithm", f"{GOD_MODEL}_error"]].groupby("algorithm").mean().reset_index()

Unnamed: 0,algorithm,gpt-3.5-turbo_error
0,bigraph,5.56
1,opt,4.24
2,random,5.18
3,warmup,5.26


In [16]:
(5.18  - 4.24) / (5.56 - 4.24)

0.712121212121212