# Tek Bir JSON Dosyasından Veri Çekelim

In [1]:
import json

# Örnek dosya yolu
file_path = "Model2.json"

# JSON dosyasını oku
with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)

entries = []

model_name = data["model_name"]

for generation in data["generations"]:
    level = generation["cefr_level"]
    words = generation["word_list"]
    sentences = generation["generated_sentences"]

    for word, sentence in zip(words, sentences):
        entries.append({
            "model": model_name,
            "level": level,
            "word": word,
            "sentence": sentence
        })

# Kontrol amaçlı ilk birkaç girdiyi yazdıralım
for e in entries[:5]:
    print(e)


{'model': 'Llama-3.2-8B-Instruct/Llama-3.2-8B-Instruct-Q4_K_M.gguf', 'level': 'A1', 'word': 'age', 'sentence': 'My birthday is on January 12th.'}
{'model': 'Llama-3.2-8B-Instruct/Llama-3.2-8B-Instruct-Q4_K_M.gguf', 'level': 'A1', 'word': 'animal', 'sentence': 'The cat is a pet animal.'}
{'model': 'Llama-3.2-8B-Instruct/Llama-3.2-8B-Instruct-Q4_K_M.gguf', 'level': 'A1', 'word': 'ask', 'sentence': 'Can you ask your teacher for help?'}
{'model': 'Llama-3.2-8B-Instruct/Llama-3.2-8B-Instruct-Q4_K_M.gguf', 'level': 'A1', 'word': 'computer', 'sentence': 'The computer is very useful for students.'}
{'model': 'Llama-3.2-8B-Instruct/Llama-3.2-8B-Instruct-Q4_K_M.gguf', 'level': 'A1', 'word': 'eat', 'sentence': 'I like to eat pizza on Fridays.'}


# Çoklu Dosyadan Veriyi Al, Etiketle, Karıştır

In [3]:
import os
import json
import random
from collections import defaultdict

# 📁 JSON dosyalarının bulunduğu klasör
json_folder = "data/model_results"

# 💾 Tüm cümleleri saklayacağımız yer
all_entries = []

# 📥 Tüm dosyaları oku
for filename in os.listdir(json_folder):
    if filename.endswith(".json"):
        filepath = os.path.join(json_folder, filename)
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)
            model_name = data["model_name"]
            for generation in data["generations"]:
                level = generation["cefr_level"]
                words = generation["word_list"]
                sentences = generation["generated_sentences"]
                for word, sentence in zip(words, sentences):
                    all_entries.append({
                        "model": model_name,
                        "level": level,
                        "word": word,
                        "sentence": sentence
                    })

# ✅ Her kelime için 6 modelin cümlesini gruplama
grouped = defaultdict(list)

for entry in all_entries:
    key = (entry["level"], entry["word"])  # örnek: ("A1", "age")
    grouped[key].append(entry)

# ✅ Cümleleri karıştır, etiketle ve mapping oluştur
all_tasks = []

for (level, word), sentence_group in grouped.items():
    if len(sentence_group) != 6:
        print(f"Uyarı: {level} seviyesinde '{word}' kelimesi için {len(sentence_group)} cümle var. Atlanıyor.")
        continue

    # Cümleleri karıştır
    random.shuffle(sentence_group)

    labels = ["Sentence A", "Sentence B", "Sentence C", "Sentence D", "Sentence E", "Sentence F"]

    labeled_sentences = []
    mapping = {}

    for label, item in zip(labels, sentence_group):
        labeled_sentences.append((label, item["sentence"]))
        mapping[label] = {
            "model": item["model"],
            "level": level,
            "word": word,
            "sentence": item["sentence"]
        }

    all_tasks.append({
        "level": level,
        "word": word,
        "labeled_sentences": labeled_sentences,
        "mapping": mapping
    })

# 🔍 Örnek çıktı (bir task)
example = all_tasks[0]
print(f"\nSample task for word '{example['word']}' (Level: {example['level']})\n")
for label, sentence in example["labeled_sentences"]:
    print(f"{label}: {sentence}")



Sample task for word 'age' (Level: A1)

Sentence A: My age is seven years old.
Sentence B: My age is twenty-five years old.
Sentence C: My birthday is on January 12th.
Sentence D: I am 10 years old, and I love playing with my pet dog.
Sentence E: I am ten years old.
Sentence F: I am 20 years old.


# Promptları Otomatik Üretmek

In [4]:
task_prompts = []

prompt_template = """You are an expert English evaluator.

Evaluate the following 6 example sentences that all use the word: "{word}" (CEFR Level: {level}).

Rate each sentence from 1 (poor) to 5 (excellent) on the following four criteria:

1. Word Usage – Is the word used correctly and meaningfully?
2. Clarity – Is the sentence understandable and appropriate for the given CEFR level?
3. Grammar – Is the grammar correct and level-appropriate?
4. Naturalness – Does the sentence sound natural and fluent (not AI-generated)?

⚠️ Do not give any explanations or comments.
⚠️ Just return the ratings in the following format, with no extra output:

Sentence A: <Word Usage>, <Clarity>, <Grammar>, <Naturalness>
Sentence B: ...
Sentence C: ...
Sentence D: ...
Sentence E: ...
Sentence F: ...

Sentences:
Sentence A: {sentence_A}
Sentence B: {sentence_B}
Sentence C: {sentence_C}
Sentence D: {sentence_D}
Sentence E: {sentence_E}
Sentence F: {sentence_F}
"""

for task in all_tasks:
    word = task["word"]
    level = task["level"]
    labeled = dict(task["labeled_sentences"])  # {'Sentence A': '...', ...}

    prompt = prompt_template.format(
        word=word,
        level=level,
        sentence_A=labeled["Sentence A"],
        sentence_B=labeled["Sentence B"],
        sentence_C=labeled["Sentence C"],
        sentence_D=labeled["Sentence D"],
        sentence_E=labeled["Sentence E"],
        sentence_F=labeled["Sentence F"],
    )

    # Prompt'la birlikte task ID'sini (level, word) saklıyoruz
    task_prompts.append({
        "level": level,
        "word": word,
        "prompt": prompt
    })


In [11]:
print("🔍 First Prompt Preview:\n")
print(task_prompts[49]["prompt"])


🔍 First Prompt Preview:

You are an expert English evaluator.

Evaluate the following 6 example sentences that all use the word: "sovereignty" (CEFR Level: C1).

Rate each sentence from 1 (poor) to 5 (excellent) on the following four criteria:

1. Word Usage – Is the word used correctly and meaningfully?
2. Clarity – Is the sentence understandable and appropriate for the given CEFR level?
3. Grammar – Is the grammar correct and level-appropriate?
4. Naturalness – Does the sentence sound natural and fluent (not AI-generated)?

⚠️ Do not give any explanations or comments.
⚠️ Just return the ratings in the following format, with no extra output:

Sentence A: <Word Usage>, <Clarity>, <Grammar>, <Naturalness>
Sentence B: ...
Sentence C: ...
Sentence D: ...
Sentence E: ...
Sentence F: ...

Sentences:
Sentence A: The country's sovereignty is not being respected by the neighboring state.
Sentence B: The country's sovereignty was threatened by the invasion.
Sentence C: The disputed territory's 