# Prepare Data

In [1]:
import os
import pandas as pd
import json

In [2]:
dataframes = []
for root, dirs, _ in os.walk('/kaggle/input/finetuned-mcqs'):
    for dir in dirs:
        subfolder_path = os.path.join(root, dir)
        with open(f'{subfolder_path}/mcqs.json', 'r') as file:
            json_data = json.load(file)
            decoded_data = [json.loads(item) for item in json_data]
            df = pd.json_normalize(decoded_data)
            dataframes.append((dir, df))

# Prepare the estimator-model

In [3]:
!pip install openai

Collecting openai
  Downloading openai-1.55.3-py3-none-any.whl.metadata (24 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading openai-1.55.3-py3-none-any.whl (389 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.6/389.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (343 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.6/343.6 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jiter, openai
Successfully installed jiter-0.8.0 openai-1.55.3


In [4]:
from openai import OpenAI
from kaggle_secrets import UserSecretsClient

In [5]:
OPENAI_KEY = UserSecretsClient().get_secret("OPENAI_KEY")
client = OpenAI(api_key = OPENAI_KEY)

def call_openai_api(system_prompt, user_prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error occurred: {e}")
        return None

In [6]:
system_prompt = """You are tasked with evaluating the ambiguity of a multiple-choice question intended for use in a medical institution exam. Please assess the quality of ambiguity on a scale of 1 to 5, where:
1 indicates that the question has no meaningful ambiguity, with options that are either irrelevant, too obvious, or completely unrelated to the question.
5 indicates that the question has excellent ambiguity, with distractors that are plausible, logically related to the question, and reflective of common misconceptions or misunderstandings.
Provide only a score from 1 to 5.
"""

# Helper Functions

In [7]:
from tqdm import tqdm
import numpy as np

tqdm.pandas()

def generate_prompt_for_question(row):
    question_text = row['question']
    options = f"a) {row['option_a']}\nb) {row['option_b']}\nc) {row['option_c']}\nd) {row['option_d']}"
    user_prompt = f"""Question:\n{question_text}\nOptions:\n{options}"""
    try:
        return call_openai_api(system_prompt, user_prompt)
    except Exception as e:
        print(f"Error processing question at index {row.id}: {e}")
        return None

In [8]:
def process_dataframe(model_name, df):
    try:
        df['rank'] = df.progress_apply(generate_prompt_for_question, axis=1)
        # Save the processed batch to a file
        df.to_csv(f'/kaggle/working/results_of_{model_name}.csv', index=False)
    except Exception as e:
        print(f"Error occurred for model {model_name}: {e}")
        

# Orchestration

In [9]:
for model_name, df in dataframes:
    print(f"Processing results of model {model_name}")
    process_dataframe(model_name, df)

Processing results of model checkpoint-60


100%|██████████| 131/131 [01:09<00:00,  1.88it/s]


Processing results of model checkpoint-50


100%|██████████| 131/131 [01:06<00:00,  1.98it/s]


Processing results of model checkpoint-80


100%|██████████| 131/131 [01:08<00:00,  1.91it/s]


Processing results of model checkpoint-70


100%|██████████| 131/131 [01:02<00:00,  2.11it/s]


Processing results of model checkpoint-30


100%|██████████| 131/131 [01:03<00:00,  2.07it/s]


Processing results of model checkpoint-40


100%|██████████| 131/131 [01:03<00:00,  2.06it/s]


Processing results of model checkpoint-10


100%|██████████| 131/131 [01:04<00:00,  2.04it/s]


Processing results of model checkpoint-100


100%|██████████| 131/131 [01:04<00:00,  2.03it/s]


Processing results of model checkpoint-90


100%|██████████| 131/131 [01:05<00:00,  1.99it/s]


Processing results of model checkpoint-20


100%|██████████| 131/131 [01:08<00:00,  1.91it/s]


Processing results of model best_model


100%|██████████| 131/131 [01:07<00:00,  1.94it/s]


In [10]:
# json_array =["{\"question\":\"What is the main purpose of an orthotic device classified under 'O. de d\\u00e9charge ou de semi-d\\u00e9charge'?\",\"option_a\":\"To improve mobility in the lower limbs.\",\"option_b\":\"To download and/or partially support the lower limbs.\",\"option_c\":\"To stabilize a limb joint.\",\"option_d\":\"To correct spinal deformity.\",\"correct_option\":\"b\",\"id\":\"OIC-328-06-B\"}",
#     "{\"question\":\"What is the recommended indication for spa treatment that has shown validated clinical effectiveness according to AFRETH's clinical trials?\",\"option_a\":\"Dermatological conditions\",\"option_b\":\"Respiratory tract diseases\",\"option_c\":\"Rheumatological conditions\",\"option_d\":\"Psychosomatic disorders\",\"correct_option\":\"C\",\"id\":\"OIC-328-12-B\"}",
#     "{\"question\":\"Which of the following is NOT an objective of orthotic treatment?\",\"option_a\":\"Stabilization\",\"option_b\":\"Correction\",\"option_c\":\"Downtime\",\"option_d\":\"Muscle strengthening\",\"correct_option\":\"D\",\"id\":\"OIC-328-05-A\"}"]

# decoded_data = [json.loads(item) for item in json_array]
# df = pd.json_normalize(decoded_data)
# df.head()

In [11]:
# process_dataframe('test', df)