In [1]:
# https://platform.olimpiada-ai.ro/problems/43
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import torch
import re

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
train = pd.read_csv("/kaggle/input/residency-exam/train.csv")
test = pd.read_csv("/kaggle/input/residency-exam/test.csv")

In [4]:
train.head(3)

Unnamed: 0,SampleID,Question,Option0,Option1,Option2,Option3,Answer
0,e9ad821a-c438-4965-9f77-760819dfa155,Chronic urethral obstruction due to benign pri...,Hyperplasia,Hyperophy,Atrophy,Dyplasia,2
1,e3d3c4e1-4fb2-45e7-9f88-247cc8f373b3,Which vitamin is supplied from only animal sou...,Vitamin C,Vitamin B7,Vitamin B12,Vitamin D,2
2,dc6794a3-b108-47c5-8b1b-3b4931577249,Growth hormone has its effect on growth through?,Directly,IG1-1,Thyroxine,Intranuclear receptors,1


In [5]:
!pip install llama-cpp-python -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone


In [6]:
from huggingface_hub import hf_hub_download

model_path = hf_hub_download(
    repo_id='mradermacher/MedicalQwen3-Reasoning-4B-i1-GGUF',
    filename='MedicalQwen3-Reasoning-4B.i1-Q4_K_M.gguf',
    local_dir='medicalqwen'
)

model_path

MedicalQwen3-Reasoning-4B.i1-Q4_K_M.gguf:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

'medicalqwen/MedicalQwen3-Reasoning-4B.i1-Q4_K_M.gguf'

In [7]:
!nvidia-smi

Sun Nov 30 10:47:52 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.172.08             Driver Version: 570.172.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   32C    P0             27W /  250W |       3MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [11]:
from llama_cpp import Llama

llm = Llama(
    model_path=model_path,
    n_threads=4,
    verbose=True,
    n_gpu_layers=1
)

# llm("What are rabies?", stop=['\n'])['choices'][0]['text']

llama_model_loader: loaded meta data with 51 key-value pairs and 398 tensors from medicalqwen/MedicalQwen3-Reasoning-4B.i1-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen3
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = MedicalQwen3 Reasoning 4B
llama_model_loader: - kv   3:                           general.basename str              = MedicalQwen3-Reasoning
llama_model_loader: - kv   4:                         general.size_label str              = 4B
llama_model_loader: - kv   5:                            general.license str              = apache-2.0
llama_model_loader: - kv   6:                   general.base_model.count u32              = 1
llama_model_loader:

In [9]:
!nvidia-smi

Sun Nov 30 10:49:19 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.172.08             Driver Version: 570.172.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   32C    P0             27W /  250W |       3MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
def generate_prompt(df, idx):
    row = df.iloc[idx]
    question = row['Question']
    options = [row[f'Option{i}'] for i in range(4)]

    prompt = f"""
You are answering a multiple-choice medical question.

You MUST obey these rules:
1. Do NOT write your reasoning or thinking.
2. Do NOT explain anything.
3. Do NOT output anything except the final letter.
4. Your output MUST follow the format exactly:  
   Answer: <letter>

Question: {question}

Options:
A) {options[0]}
B) {options[1]}
C) {options[2]}
D) {options[3]}

Give ONLY the final answer letter. Nothing else.
"""

    return prompt


def extract_idx(output):
    match = re.search(r"Answer:\s*([A-D])", output)
    if match:
        chosen = match.group(1)
        idx = "ABCD".index(chosen)
        return idx
    else:
        return np.nan

In [None]:
print("Prompt:\n")

prompt = generate_prompt(train, 3)

print(prompt)

print(f"------------------------------")

print("Response:\n")

response = llm(prompt, stop=['\n', '.', '(', ')', '?', '!'])['choices'][0]['text']
idx = extract_idx(response)

print(response)

print(f"------------------------------")

print(f"{idx} - Chosen")

In [None]:
answers = []
nan_count = 0

for i in (pbar := tqdm(range(len(test)))):
    prompt = generate_prompt(test, i)
    response = llm(prompt, stop=['\n', '.', '(', ')', '?', '!'])['choices'][0]['text']
    idx = extract_idx(response)
    answers.append(idx)
    if idx is np.nan:
        nan_count += 1
    pbar.set_postfix({'nan_count': nan_count})
    if (i+1)%10==0:
        print(f"{i+1}/{len(test)} processed")

print(f"Nan count: {nan_count}/{len(test)}")

In [None]:
subm = pd.DataFrame({
    'DatapointID': test['SampleID'],
    'PredictedAnswer': answers
})

subm

In [None]:
subm.to_csv("submission.csv", index=False)

In [None]:
fill_nans_with_zeros_subm = subm.fillna(0)

fill_nans_with_zeros_subm.to_csv("filled_submisison.csv", index=False)

fill_nans_with_zeros_subm