In [1]:
import ollama
from ollama import chat
from ollama import ChatResponse
from pydantic import BaseModel
import json
import pandas as pd

In [7]:
df_ans = pd.read_json('../../data/mcqs_answerability.json')
df_ans.head()

Unnamed: 0,question,cop,opa,opb,opc,opd,subject_name,id
0,Hypersecretory glaucoma is seen in -,A,Epidemic dropsy,Marfan's syndrome,Hypeension,Diabetes,Ophthalmology,38f20899-3b8d-4e87-9b0f-6fbcfcf0f215
1,"Endometrial biopsy in infertility"" is done at",C,3-6 days of menstrual cycle,5-7 days of menstrual cycle,21 - 23rd day of the menstrual cycle,24 - 26th days of the menstrual cycle,Gynaecology & Obstetrics,7b2a9322-e4fd-4282-ae41-d2db753f9114
2,Which of the following is the least effective ...,A,Deep dentin,Superficial dentin,Enamel,Cementum,Dental,9de4c458-aab3-44a1-a75f-c395ccafc3ab
3,Refraction in eye not affected by:,C,Removal of vitreous,Lens thickened,Anterior chamber depth increased,Axial length changed,Ophthalmology,45521705-d58d-4315-a437-c2a40962d1c3
4,Which of the following measures The amount of ...,A,Kleihauer - Betke test,Singers test,APT test,Benedict test,Gynaecology & Obstetrics,4cb190ab-e927-4d7b-b958-bcda14248cb7


In [3]:
import json
from enum import Enum
from typing import Union

from typing_extensions import Annotated

from pydantic import BaseModel, Field
from pydantic.config import ConfigDict

class MCQQuestion(BaseModel):
    answer_option: str

In [4]:
from ollama import generate

def generate_mcq(prompt, model_name, temperature):
    generate_params = {
        'model': model_name,
        'options': {'temperature': temperature, 'num_ctx': 8196, 'max_tokens': 4, 'top_p': 1}, 
        'prompt': prompt,
        'format': MCQQuestion.model_json_schema()
    }
    
    # Get a response
    response = generate(**generate_params)
    
    return response['response']

In [5]:
def get_response_for_row(row, model_name, temp):
    options = f"A. {row['opa']}\nB. {row['opb']}\nC. {row['opc']}\nD. {row['opd']}"
    prompt = f"""
    Answer the following multiple-choice question:\n\n
    Question: {row['question']}\nOptions:\n{options}\n\n
    Provide only the letter corresponding to the correct answer (A, B, C, or D).
    """
    mcq = generate_mcq(prompt, model_name, temp)
    return mcq

In [70]:
%%time
df_ans['answerability_0.1'] = df_ans.apply(
    lambda row: get_response_for_row(row, model_name="llama3.2:1b-instruct-q8_0", temp=0.5),
    axis=1
)

CPU times: user 11.3 s, sys: 1.01 s, total: 12.3 s
Wall time: 28min 29s


In [71]:
%%time
df_ans['answerability_0.5'] = df_ans.apply(
    lambda row: get_response_for_row(row, model_name="llama3.2:1b-instruct-q8_0", temp=0.5),
    axis=1
)

CPU times: user 8.81 s, sys: 769 ms, total: 9.58 s
Wall time: 32min 25s


In [8]:
%%time
df_ans['answerability_0.7'] = df_ans.apply(
    lambda row: get_response_for_row(row, model_name="llama3.2:1b-instruct-q8_0", temp=0.7),
    axis=1
)

CPU times: user 12 s, sys: 806 ms, total: 12.8 s
Wall time: 42min 28s


In [9]:
df_ans.to_csv('./llama1b_ans.csv', index=False)

In [74]:
df_ans.head()

Unnamed: 0,question,cop,opa,opb,opc,opd,subject_name,id,answerability_0.1,answerability_0.5,answerability_0.7
0,Hypersecretory glaucoma is seen in -,A,Epidemic dropsy,Marfan's syndrome,Hypeension,Diabetes,Ophthalmology,38f20899-3b8d-4e87-9b0f-6fbcfcf0f215,"{ ""answer_option"" : ""a""}","{""answer_option"": ""A""}","{ ""answer_option"" : ""a"" }"
1,"Endometrial biopsy in infertility"" is done at",C,3-6 days of menstrual cycle,5-7 days of menstrual cycle,21 - 23rd day of the menstrual cycle,24 - 26th days of the menstrual cycle,Gynaecology & Obstetrics,7b2a9322-e4fd-4282-ae41-d2db753f9114,"{ ""answer_option"" : ""A""}","{ ""answer_option"" : ""B""}","{ ""answer_option"" : ""a"" }"
2,Which of the following is the least effective ...,A,Deep dentin,Superficial dentin,Enamel,Cementum,Dental,9de4c458-aab3-44a1-a75f-c395ccafc3ab,"{ ""answer_option"" :""a"" }","{ ""answer_option"" : ""a"" }","{ ""answer_option"" : ""a"" }"
3,Refraction in eye not affected by:,C,Removal of vitreous,Lens thickened,Anterior chamber depth increased,Axial length changed,Ophthalmology,45521705-d58d-4315-a437-c2a40962d1c3,"{ ""answer_option"" : ""a"" }","{ ""answer_option"" :""a"" }","{ ""answer_option"": ""A"" }"
4,Which of the following measures The amount of ...,A,Kleihauer - Betke test,Singers test,APT test,Benedict test,Gynaecology & Obstetrics,4cb190ab-e927-4d7b-b958-bcda14248cb7,"{ ""answer_option"" :""A"" }","{ ""answer_option""\n : ""a"" }","{ ""answer_option"" : ""A"" }"


In [12]:
from pydantic import ValidationError

def validate_mcq(mcq_json):
    try:
        return MCQQuestion.model_validate_json(mcq_json)
    except ValidationError as e:
        print(f"Validation failed: {e}")
        return None

In [13]:
df_ans['answerability_0.7_val'] = df_ans['answerability_0.7'].apply(validate_mcq)

In [10]:
df_ans['answerability_0.7'].unique()

array(['{ "answer_option"\n \t:"" }\n \t', '{ "answer_option" : "a" }',
       '{ "answer_option": "a" }', '{ "answer_option": "A" }',
       '{"answer_option": "A"}', '{ "answer_option" :"A" }',
       '{ "answer_option" : "A" }', '{ "answer_option" :"a" }',
       '{ "answer_option" : "c" }', '{ "answer_option" : "A"}',
       '{ "answer_option" : "a"}', '{ "answer_option": "b" }',
       '{ "answer_option" :\n "b" }', '{ "answer_option" :"A"}',
       '{ "answer_option" : "a"}\n  ', '{ "answer_option": "A,"}',
       '{ "answer_option"\n  :"A"}', '{ "answer_option" : " A" }',
       '{\n  "answer_option": "A" }', '{ "answer_option" : "a"}\n   ',
       '{ "answer_option" :\n  "C" }', '{ "answer_option" : "d" }',
       '{"answer_option": "B"}', '{ "answer_option" :"a"}',
       '{ "answer_option" : "d"}', '{ "answer_option" : "b" }',
       '{ "answer_option": "c" }', '{ "answer_option" : "b"}',
       '{ "answer_option"\n  : "a"\n}', '{ "answer_option"\n  : "A" }',
       '{ "answe

In [14]:
df_ans['model_answer'] = df_ans['answerability_0.7_val'].apply(lambda x: x.answer_option.upper())

In [17]:
df_ans['model_answer'].unique()

array(['', 'A', 'C', 'B', 'A,', ' A', 'D', 'A }  ', 'A   ', 'A }   ',
       'A  }  -  { ', '3.4', 'A}   ', 'A}', 'C,', 'A} ', 'A }', 'A ',
       'A}        ', 'OPTION_A', '1', 'E', 'A} & ', 'D,', '- A', '/A ',
       "A   }  // A IS THE ONLY OPTION FOR IGA IN THIS CASE AS IT'S NOT IGG, IGM OR IGE. THE REACTION USES FC REGION OF ANTIGEN TO ELICIT RESPONSE FROM VARIABLE REGION. OPTION A IGA (A) IS CORRECT ANSWER AS PK REACTION WAS USED PRIMARILY TO DEMONSTRATE IGA.  ",
       'C}  -GCT  A ST. IS AN AST ENZYME WITH MITOCHONDRIAL ORIGIN IN LIVER CELLS AND EXCRETED INTO BILE. THIS MEANS IT IS PREDOMINANTLY FOUND WITHIN MITOCHONDRIA. IT IS THE LEAST SPECIFIC OF THE FOUR OPTIONS, PRIMARILY BEING ASSOCIATED WITH HEPATOCYTES (LIVER CELLS) BUT CAN ALSO BE SEEN IN SKELETAL MUSCLE. THE OTHER ENZYMES LISTED, WHILE ALSO LIVER ENZYMES, HAVE A HIGHER ASSOCIATION WITH THE CYTOPLASM OR NUCLEUS.  SO OPTION C, GGT, IS CORRECT SINCE IT PREDOMINANTLY HAS A MITOCHONDRIAL ORIGIN.  B AND D ARE NOT PRIMARILY 

In [23]:
df_ans['model_answer'] = df_ans['model_answer'].replace({'': 'A', '3': 'D', '0': 'A', '1': 'B'})

In [22]:
df_ans['model_answer'].apply(lambda s: s.lstrip()[0]).unique()

array(['A', 'C', 'B', 'D', '3', 'O', '1', 'E', '-', '/'], dtype=object)

In [101]:
df_ans.columns

Index(['question', 'cop', 'opa', 'opb', 'opc', 'opd', 'subject_name', 'id',
       'answerability_0.1', 'answerability_0.5', 'answerability_0.7',
       'answerability_0.1_val', 'model_answer', 'answerability_0.5_val'],
      dtype='object')

In [24]:
df_ans[['id', 'question', 'cop', 'opa', 'opb', 'opc', 'opd', 'model_answer']].to_csv('../../data/base_models/llama1b/answerability/temp0.7.csv', index=False)