In [5]:
import os
import json
import time
from openai import OpenAI

class Evaluator:
    def __init__(self, api_key, base_url, model_name="deepseek-chat", temperature=1.0, delay=2):
        self.client = OpenAI(api_key=api_key, base_url=base_url)
        self.model_name = model_name
        self.temperature = temperature
        self.delay = delay

    def evaluate_prompt(self, prompt):
        try:
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[{"role": "user", "content": prompt}],
                temperature=self.temperature
            )
            raw_output = response.choices[0].message.content
            time.sleep(self.delay)
            return raw_output
        except Exception as e:
            print(f"API call error: {e}")
            return None

    @staticmethod
    def parse_scores(raw_response):
        parsed = {}
        if not raw_response:
            return parsed
        lines = raw_response.strip().split("\n")
        for line in lines:
            if ":" in line:
                parts = line.split(":")
                label = parts[0].strip()
                scores = [int(s.strip()) for s in parts[1].split(",") if s.strip().isdigit()]
                if len(scores) == 4:
                    parsed[label] = {
                        "Word Usage": scores[0],
                        "Clarity": scores[1],
                        "Grammar": scores[2],
                        "Naturalness": scores[3]
                    }
        return parsed

def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def main():
    # ENV değişkenlerinden alabilir veya doğrudan buraya da yazabilirsin
    api_key = os.getenv("sk-9115f967efad41a09f30b761d5f36f53")
    base_url = os.getenv("https://api.deepseek.com", "https://api.deepseek.com/v1")  # Gerekirse değiştir
    model_name = "deepseek-chat"

    evaluator = Evaluator(api_key=api_key, base_url=base_url, model_name=model_name)

    base_dir = os.getcwd()

    tasks_dir = os.path.abspath(os.path.join(base_dir, "..", "data", "tasks"))
    scores_dir = os.path.abspath(os.path.join(base_dir, "..", "data", "scores"))
    prompts_dir = os.path.abspath(os.path.join(base_dir, "..", "data", "prompts"))
    os.makedirs(scores_dir, exist_ok=True)

    levels = ["A1", "A2", "B1", "B2", "C1"]

    for level in levels:
        task_file = os.path.join(tasks_dir, f"tasks_{level}.json")
        prompt_file = os.path.join(prompts_dir, f"prompts_{level}.json")

        tasks = load_json(task_file)
        prompts = load_json(prompt_file)

        prompt_dict = {p["word"]: p["prompt"] for p in prompts}
        scored_tasks = []

        for task in tasks:
            word = task["word"]
            if word not in prompt_dict:
                print(f"Warning: Prompt for word '{word}' not found in level {level}")
                continue

            prompt = prompt_dict[word]
            print(f"Evaluating word: {word} (level {level})")

            raw_response = evaluator.evaluate_prompt(prompt)
            parsed_scores = Evaluator.parse_scores(raw_response)

            if not parsed_scores:
                print(f"⚠️ Empty or invalid response for word: {word}")
                continue

            scored_tasks.append({
                "level": level,
                "word": word,
                "mapping": task["mapping"],
                "scores": parsed_scores
            })

        score_output_path = os.path.join(scores_dir, f"scores_{level}.json")
        with open(score_output_path, "w", encoding="utf-8") as f:
            json.dump(scored_tasks, f, ensure_ascii=False, indent=2)

        print(f"✅ Scores saved to {score_output_path}")

if __name__ == "__main__":
    main()


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [2]:
import os
import json
import re
from openai import OpenAI
from tqdm import tqdm

client = OpenAI(
    api_key="sk-9115f967efad41a09f30b761d5f36f53",
    base_url="https://api.deepseek.com"
)


# Task dosyalarının olduğu klasör
tasks_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "tasks"))
output_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "ratings"))
os.makedirs(output_dir, exist_ok=True)

# Yardımcı: Model cevabını ayrıştır
def parse_response(response_text):
    pattern = r"Sentence ([A-F]):\s*(\d),\s*(\d),\s*(\d),\s*(\d)"
    results = {}
    for match in re.finditer(pattern, response_text):
        label = match.group(1)
        scores = list(map(int, match.groups()[1:]))
        results[label] = {
            "word_usage": scores[0],
            "clarity": scores[1],
            "grammar": scores[2],
            "naturalness": scores[3]
        }
    return results

# Task dosyalarını sırayla işle
for filename in os.listdir(tasks_dir):
    if not filename.endswith(".json"):
        continue

    with open(os.path.join(tasks_dir, filename), "r", encoding="utf-8") as f:
        tasks = json.load(f)

    all_ratings = []

    for task in tqdm(tasks, desc=f"Processing {filename}"):
        prompt = task["prompt"]
        mapping = task["mapping"]
        task_id = task["task_id"]
        word = task["word"]
        level = task["level"]

        try:
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "user", "content": prompt}
                ],
                stream=False
            )

            reply = response.choices[0].message.content
            parsed = parse_response(reply)

            for label, rating in parsed.items():
                model = mapping[label]["model"]
                sentence = mapping[label]["sentence"]

                all_ratings.append({
                    "task_id": task_id,
                    "model": model,
                    "level": level,
                    "word": word,
                    "label": label,
                    "sentence": sentence,
                    "ratings": rating
                })

        except Exception as e:
            print(f"[HATA] Task {task_id}: {e}")
            continue

    # Sonuçları JSON olarak kaydet
    out_path = os.path.join(output_dir, f"ratings_{filename}")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(all_ratings, f, ensure_ascii=False, indent=2)
    print(f"✅ {len(all_ratings)} değerlendirme sonucu: {out_path}")


Processing tasks_B2.json:  10%|█▊                | 1/10 [00:09<01:24,  9.37s/it]

[HATA] Task B2_agency: 'A'


Processing tasks_B2.json:  20%|███▌              | 2/10 [00:17<01:10,  8.82s/it]

[HATA] Task B2_alter: 'A'


Processing tasks_B2.json:  30%|█████▍            | 3/10 [00:26<01:00,  8.68s/it]

[HATA] Task B2_heaven: 'A'


Processing tasks_B2.json:  40%|███████▏          | 4/10 [00:34<00:51,  8.63s/it]

[HATA] Task B2_hollow: 'A'


Processing tasks_B2.json:  50%|█████████         | 5/10 [00:42<00:42,  8.45s/it]

[HATA] Task B2_extend: 'A'


Processing tasks_B2.json:  60%|██████████▊       | 6/10 [00:50<00:32,  8.25s/it]

[HATA] Task B2_imply: 'A'


Processing tasks_B2.json:  70%|████████████▌     | 7/10 [00:59<00:24,  8.22s/it]

[HATA] Task B2_numerous: 'A'


Processing tasks_B2.json:  80%|██████████████▍   | 8/10 [01:07<00:16,  8.19s/it]

[HATA] Task B2_opponent: 'A'


Processing tasks_B2.json:  90%|████████████████▏ | 9/10 [01:15<00:08,  8.43s/it]

[HATA] Task B2_process: 'A'





KeyboardInterrupt: 

In [4]:
import os
import json
import re
from openai import OpenAI
from tqdm import tqdm

# DeepSeek API istemcisi
client = OpenAI(
    api_key="sk-9115f967efad41a09f30b761d5f36f53",  # Kendi anahtarını güvenli tut!
    base_url="https://api.deepseek.com"
)

# Klasör yolları
tasks_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "tasks"))
output_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "data", "ratings"))
os.makedirs(output_dir, exist_ok=True)

# Model yanıtlarını ayrıştırmak için fonksiyon
def parse_response(response_text):
    pattern = r"Sentence ([A-F]):\s*(\d),\s*(\d),\s*(\d),\s*(\d)"
    results = {}
    for match in re.finditer(pattern, response_text):
        label = f"Sentence {match.group(1)}"
        scores = list(map(int, match.groups()[1:]))
        results[label] = {
            "word_usage": scores[0],
            "clarity": scores[1],
            "grammar": scores[2],
            "naturalness": scores[3]
        }

    expected_labels = {f"Sentence {ch}" for ch in "ABCDEF"}
    found_labels = set(results.keys())
    missing = expected_labels - found_labels
    if missing:
        print(f"[UYARI] Eksik puanlanan cümleler: {missing}")

    return results


# Task dosyalarını sırayla işle
for filename in os.listdir(tasks_dir):
    if not filename.endswith(".json"):
        continue

    with open(os.path.join(tasks_dir, filename), "r", encoding="utf-8") as f:
        tasks = json.load(f)

    # 🔎 Test için yalnızca ilk 5 görevi al
    tasks = tasks[:5]

    all_ratings = []

    for task in tqdm(tasks, desc=f"Processing {filename}"):
        prompt = task["prompt"]
        mapping = task["mapping"]
        task_id = task["task_id"]
        word = task["word"]
        level = task["level"]

        try:
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "user", "content": prompt}
                ],
                stream=False
            )

            reply = response.choices[0].message.content
            parsed = parse_response(reply)

            for label, rating in parsed.items():
                if label not in mapping:
                    print(f"[UYARI] Cümle etiketi bulunamadı: {label}")
                    continue

                model = mapping[label]["model"]
                sentence = mapping[label]["sentence"]

                all_ratings.append({
                    "task_id": task_id,
                    "model": model,
                    "level": level,
                    "word": word,
                    "label": label,
                    "sentence": sentence,
                    "ratings": rating
                })

        except Exception as e:
            print(f"[HATA] Task {task_id}: {e}")
            continue

    # 🔐 Sonuçları güvenli şekilde kaydet
    out_path = os.path.join(output_dir, f"ratings_TEST_{filename}")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(all_ratings, f, ensure_ascii=False, indent=2)
    print(f"✅ {len(all_ratings)} değerlendirme sonucu kaydedildi: {out_path}")


Processing tasks_B2.json: 100%|███████████████████| 5/5 [00:41<00:00,  8.34s/it]


✅ 30 değerlendirme sonucu kaydedildi: /home/user/Documents/Tez/Deneyler/LLM_Degerlendirme/data/ratings/ratings_TEST_tasks_B2.json


Processing tasks_C1.json:  40%|███████▌           | 2/5 [00:17<00:25,  8.65s/it]


KeyboardInterrupt: 