In [1]:
import ollama
from ollama import chat
from ollama import ChatResponse
from pydantic import BaseModel
import json
import pandas as pd

In [2]:
df = pd.read_csv("lisa_sheets.csv")

In [3]:
file_path = "raiforukraine-MARIA/data/train_test_split/test_folders.json"

In [4]:
with open(file_path, "r", encoding="utf-8") as file:
    test_folders = json.load(file)

In [5]:
df_test = df[df.folder.isin(test_folders)]

In [8]:
class MCQQuestion(BaseModel):
    question: str
    option_a: str
    option_b: str
    option_c: str
    option_d: str
    correct_option: str

In [9]:
from ollama import generate

def generate_mcq(content, model_name, temperature):
    prompt = f"""
    Based on the following educational content, generate a multiple-choice question with four answer 
    options where only one is correct. The question should assess understanding of the main ideas, 
    and the options should be clear, informative, and relevant. Ensure that the distractors (incorrect options) 
    follow a logical but incorrect interpretation, based on common misconceptions or misunderstandings of the topic.
    Answer options must be as short as possible.

    **Educational Content**
    {content}
    """
    
    generate_params = {
        'model': model_name,
        'options': {'temperature': temperature, 'num_ctx': 8192, 'top_p': 1}, 
        'prompt': prompt,
        'format': MCQQuestion.model_json_schema()
    }
    
    # Get a response
    response = generate(**generate_params)
    
    return response['response']

In [13]:
%%time
df_test['generated_questions_0.1'] = df_test['content_gpt'].apply(
    lambda content: generate_mcq(content, model_name="phi3.5:3.8b-mini-instruct-q8_0", temperature=0.1)
)

CPU times: user 3.69 s, sys: 232 ms, total: 3.92 s
Wall time: 1h 26min 59s


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
df_test['generated_questions_0.7'] = df_test['content_gpt'].apply(
    lambda content: generate_mcq(content, model_name="phi3.5:3.8b-mini-instruct-q8_0", temperature=0.7)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['generated_questions_0.7'] = df_test['content_gpt'].apply(


In [22]:
from pydantic import ValidationError

In [23]:
def validate_mcq(mcq_json):
    try:
        return MCQQuestion.model_validate_json(mcq_json)
    except ValidationError as e:
        print(f"Validation failed: {e}")
        return None
        


In [None]:
df["validated_mcq_0.7"] = df['generated_questions_0.7'].apply(validate_mcq)

In [None]:
df["validated_mcq_0.1"] = df['generated_questions_0.1'].apply(validate_mcq)