Define the seed for reproductible results

In [1]:
_SEED = 42

In [2]:
TOP_5_OUR_DATASET_TAGS = "['data structures', 'greedy', 'math', 'implementation', 'dp']"
TOP_10_OUR_DATASET_TAGS = "['greedy', 'dp', 'graphs', 'brute force', 'math', 'constructive algorithms', 'sortings', 'implementation', 'binary search', 'data structures']"
TOP_20_OUR_DATASET_TAGS = "['implementation', 'binary search', 'math', 'number theory', 'greedy', 'graphs', 'data structures', 'geometry', 'sortings', 'dp', 'brute force', 'combinatorics', 'dfs and similar', 'constructive algorithms', 'trees', 'strings', 'two pointers', 'dsu', 'bitmasks', 'divide and conquer']"

TOP_5_OUTSIDE_DATASET_TAGS = "['data structures', 'implementation', 'dp', 'math', 'greedy']"
TOP_10_OUTSIDE_DATASET_TAGS = "['greedy', 'math', 'constructive algorithms', 'implementation', 'brute force', 'data structures', 'dp', 'geometry', 'strings', 'dfs and similar']"
TOP_20_OUTSIDE_DATASET_TAGS = "['implementation', 'trees', 'math', 'greedy', 'binary search', 'geometry', 'two pointers', 'data structures', 'sortings', 'combinatorics', 'dp', 'bitmasks', 'brute force', 'number theory', 'dsu', 'divide and conquer', 'strings', 'graphs', 'dfs and similar', 'shortest paths']"


Read the dataset

In [3]:
############################################
# IMPORTANT: 
# 1. Change the dataset path to the testing dataset you want to use.
# 2. If the dataset is changed, always update the TOP_TAGS to the corresponding TOP_TAGS of the dataset.
# 3. If you want to use the dataset corresponding to Kim et al., use the TOP_TAGS of the outside dataset.
############################################

import pandas as pd

# testing_df = pd.read_csv('../../../01_TASK_DATASETS/03_Task_Datasets/02_DATASETS_WO_TAG_ENCODING/OUR_DATASET/top_5_testing_dataset.csv')
testing_df = pd.read_csv('../../../01_TASK_DATASETS/03_Task_Datasets/02_DATASETS_WO_TAG_ENCODING/PSG_PREDICTING_ALGO/AMT5_test.csv')

testing_df.head(10)

TOP_TAGS = TOP_5_OUTSIDE_DATASET_TAGS

Define api request. Update the 'model' to desired gpt api

In [None]:
from openai import OpenAI

API_KEY = "API_KEY"

def calculate_relevance(problem_statement, problem_tags, seed=None):

    request_prompt = f"""Assign the most relevant labels for PROBLEM_STATEMENT from the given LABELS set.     
    Provide the answer in valid JSON format as follows: "labels": "label1, label2, label3, ..."
    PROBLEM_STATEMENT: "{problem_statement}"
    LABELS: "{problem_tags}"
    """

    client = OpenAI(api_key=API_KEY)  # Consider using environment variables for security
    
    response = client.chat.completions.create(
      # model="gpt-4o",
      # model="gpt-4o-mini",
      # model="o1-mini",
      model="o3-mini",
      messages=[
        {"role": "user", "content": request_prompt}
      ],
      seed=seed,
      n=1,
    )

    return response.choices[0].message.content

Call gpt api for the testing dataset

In [None]:
import pandas as pd

LOWER_BOUND = 0
UPPER_BOUND = 1500

# Initialize results_df as a DataFrame
results_df = pd.DataFrame()

for index, row in testing_df.iloc[LOWER_BOUND:UPPER_BOUND].iterrows():
    
    problem_statement = row['problem_statement']
    problem_tags = TOP_TAGS
    
    print("Row index: ", index)
    
    # print(problem_tags)
        
    message_content = calculate_relevance(problem_statement, problem_tags, seed=_SEED)
    
    # Convert the message content to a DataFrame and append it to results_df
    message_df = pd.DataFrame([message_content])
    results_df = pd.concat([results_df, message_df], ignore_index=True)
    
    if index % 50 == 0 and index != 0:
        results_df.to_csv(f'o3_mini_results_checkpoint_{index}.csv', index=False)

# Save the final results
results_df.to_csv('o3_mini_results.csv', index=False)

In [4]:
import ast

def create_binary_vector(tag_list, unique_tags):
    unique_tags = ast.literal_eval(unique_tags) if isinstance(unique_tags, str) else unique_tags  # Convert string representation of list to actual list
    
    binary_vector = [0]*len(unique_tags)
            
    if 'nan' != str(tag_list):
        tag_list = ast.literal_eval(tag_list) if isinstance(tag_list, str) else tag_list  # Convert string representation of list to actual list
        for tag in tag_list:
            if tag in unique_tags:
                binary_vector[unique_tags.index(tag)] = 1
    
    return binary_vector

In [None]:
import pandas as pd
import re
from sklearn.metrics import f1_score, roc_auc_score

results_df = pd.read_csv('o3_mini_results.csv')

# Function to transform the JSON string into a list of strings
def transform_labels(json_string):
    match = re.findall(r'"labels":\s*"([^"]+)"', json_string)
    if match:
        return match[0].split(', ')
    else:
        return []

results_df['0'] = results_df['0'].apply(transform_labels)

merged_df = pd.DataFrame({
    'truths': testing_df['problem_tags'],
    'predictions': results_df['0']
})

merged_df['truths'] = merged_df['truths'].apply(lambda x: create_binary_vector(x, TOP_TAGS))
merged_df['predictions'] = merged_df['predictions'].apply(lambda x: create_binary_vector(x, TOP_TAGS))

# Calculate F1 macro score
f1_macro = f1_score(merged_df['truths'].tolist(), merged_df['predictions'].tolist(), average='macro')

# Calculate AUROC
auroc = roc_auc_score(merged_df['truths'].tolist(), merged_df['predictions'].tolist(), average='macro')

print(f"F1 Macro Score: {f1_macro}")
print(f"AUROC: {auroc}")