In [1]:
5+5

10

In [2]:
import psutil

def check_if_running(process_name):
    running = False
    for proc in psutil.process_iter(["name"]):
        if process_name in proc.info["name"]:
            running = True
            break
    return running

ollama_running = check_if_running("ollama")

if not ollama_running:
    raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
print("Ollama running:", check_if_running("ollama"))

Ollama running: True


In [3]:
# This cell is optional; it allows you to restart the notebook
# and only run section 7.7 without rerunning any of the previous code
import json
from tqdm import tqdm

file_path = "instruction-data-with-response.json"

with open(file_path, "r") as file:
    test_data = json.load(file)


def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task. "
        f"Write a response that appropriately completes the request."
        f"\n\n### Instruction:\n{entry['instruction']}"
    )

    input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""

    return instruction_text + input_text

In [6]:
import urllib.request

def query_model(
    prompt,
    model="gemma3:1b",
    url="http://localhost:11434/api/chat"
):
    # Create the data payload as a dictionary
    data = {
        "model": model,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "options": {     # Settings below are required for deterministic responses
            "seed": 123,
            "temperature": 0,
            "num_ctx": 2048
        }
    }


    # Convert the dictionary to a JSON formatted string and encode it to bytes
    payload = json.dumps(data).encode("utf-8")

    # Create a request object, setting the method to POST and adding necessary headers
    request = urllib.request.Request(
        url,
        data=payload,
        method="POST"
    )
    request.add_header("Content-Type", "application/json")

    # Send the request and capture the response
    response_data = ""
    with urllib.request.urlopen(request) as response:
        # Read and decode the response
        while True:
            line = response.readline().decode("utf-8")
            if not line:
                break
            response_json = json.loads(line)
            response_data += response_json["message"]["content"]

    return response_data


model = "gemma3:1b"
result = query_model("مين هي مي عز الدين؟", model)
print(result)

مي عز الدين هي مصورة ومخرجة مصرية، اشتهرت بأسلوبها الفوتوغرافي الفريد الذي يجمع بين الواقعية والجمالية. إليك بعض النقاط الرئيسية عنها:

* **النشأة والتعليم:** ولدت مي في 1976 في القاهرة، وتخرجت من كلية الفنون الجميلة بجامعة القاهرة.
* **الأسلوب الفني:** تتميز أعمالها بتكوينها المبتكر، واستخدامها للضوء والظل، والتركيز على التفاصيل الصغيرة، والتقنيات المتقدمة. غالبًا ما تركز على صور الحياة اليومية المصرية، وتُظهر الجمال في الأشياء البسيطة.
* **العمل الفني:** عملت مي عز الدين في مجال التصوير الفوتوغرافي لمدة 30 عامًا، وشاركت في العديد من المشاريع الفوتوغرافية، بما في ذلك:
    * **"المرأة المصرية"**: مشروعها الأكثر شهرة، يركز على تصوير المرأة المصرية في مختلف المواقف، مع التركيز على الجمال والواقعية.
    * **"المدينة"**: سلسلة صور تظهر الحياة في شوارع القاهرة.
    * **"الخارجة"**: سلسلة صور تجريبية تستكشف موضوعات مثل الهوية والذاكرة.
* **الجوائز والتكريمات:** حصلت مي عز الدين على العديد من الجوائز والتقدير، بما في ذلك:
    * **جائزة "أفضل فوتographer"** في معرض "أفضل فوتographer" في مصر.
 

In [7]:
for entry in test_data[:3]:
    prompt = (
        f"Given the input `{format_input(entry)}` "
        f"and correct output `{entry['output']}`, "
        f"score the model response `{entry['model_response']}`"
        f" on a scale from 0 to 100, where 100 is the best score. "
    )
    print("\nDataset response:")
    print(">>", entry['output'])
    print("\nModel response:")
    print(">>", entry["model_response"])
    print("\nScore:")
    print(">>", query_model(prompt))
    print("\n-------------------------")


Dataset response:
>> The car is as fast as lightning.

Model response:
>> The car is as fast as a horse.

Score:
>> Here's the rewritten sentence and a score:

**Rewritten Sentence:** The car is as fast as lightning.

**Score:** 95

**Explanation:** The simile "as fast as lightning" effectively conveys the speed of the car in a vivid and memorable way, enhancing the description. It’s a strong and appropriate comparison.


-------------------------

Dataset response:
>> The type of cloud typically associated with thunderstorms is cumulonimbus.

Model response:
>> The type of cloud typically associated with thunderstorms is a tropical rainforest.

Score:
>> 90

**Explanation:**

The prompt asks for a specific type of cloud, and the provided answer directly and accurately identifies cumulonimbus as the most relevant cloud type for thunderstorms. The original response is a good, concise answer, making it a strong score.


-------------------------

Dataset response:
>> Jane Austen.

Model

In [8]:
def generate_model_scores(json_data, json_key, model="gemma3:1b"):
    scores = []
    for entry in tqdm(json_data, desc="Scoring entries"):
        prompt = (
            f"Given the input `{format_input(entry)}` "
            f"and correct output `{entry['output']}`, "
            f"score the model response `{entry[json_key]}`"
            f" on a scale from 0 to 100, where 100 is the best score. "
            f"Respond with the integer number only."
        )
        score = query_model(prompt, model)
        try:
            scores.append(int(score))
        except ValueError:
            print(f"Could not convert score: {score}")
            continue

    return scores


scores = generate_model_scores(test_data, "model_response")
print(f"Number of scores: {len(scores)} of {len(test_data)}")
print(f"Average score: {sum(scores)/len(scores):.2f}\n")

Scoring entries:   8%|▊         | 9/110 [00:32<06:19,  3.75s/it]

Could not convert score: Nostalgia washed over her as she looked through the old photos. – 95



Scoring entries:   9%|▉         | 10/110 [00:36<06:33,  3.94s/it]

Could not convert score: Prime numbers: 11, 19
Composite numbers: 14



Scoring entries:  43%|████▎     | 47/110 [02:47<03:38,  3.47s/it]

Could not convert score: Could you provide the meeting time?



Scoring entries:  52%|█████▏    | 57/110 [03:28<03:24,  3.85s/it]

Could not convert score: He will be reading a novel inspired by his grandmother.

Score: 100



Scoring entries:  61%|██████    | 67/110 [04:04<02:38,  3.69s/it]

Could not convert score: Did the dog chase the cat?
Score: 100



Scoring entries:  83%|████████▎ | 91/110 [05:28<01:06,  3.50s/it]

Could not convert score: The food was delicious.



Scoring entries:  85%|████████▍ | 93/110 [05:36<01:08,  4.02s/it]

Could not convert score: 1. Vitamin A
2. Vitamin B12
3. Vitamin C
4. Vitamin K
5. Vitamin K2
Score: 100



Scoring entries: 100%|██████████| 110/110 [06:34<00:00,  3.59s/it]

Could not convert score: 5.5

Number of scores: 102 of 110
Average score: 80.58




