In [9]:
import json
import pandas as pd
from tqdm import tqdm
import subprocess
import openai

# === KONFIG ===
PROJECT_ID = "norwegian-language-bank"
LOCATION = "us-central1"
MODEL_ID = "meta/llama3-405b-instruct-maas"

# === HENT TOKEN ===
def get_access_token():
    result = subprocess.run(
        ["gcloud", "auth", "print-access-token"],
        stdout=subprocess.PIPE,
        text=True,
        check=True
    )
    return result.stdout.strip()

# === INITIALISER KLIENT ===
def get_client(token):
    return openai.OpenAI(
        base_url=f"https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/openapi/chat/completions?",
        api_key=token
    )

# === PROMPT ===
PROMPT = """You are a linguist evaluating sentences in {lang}.
Each sentence should be rated solely for its grammatical acceptability.
Use the following 5-point scale:

1 – completely ungrammatical
2 – mostly ungrammatical
3 – marginal
4 – mostly grammatical
5 – fully grammatical

Return a JSON object with the format:
{{"score": <number>, "explanation": "<short explanation>"}}

Only provide one evaluation per sentence.

Here is the sentence:
{sentence}
"""

# === KJØRING ===
def process_chunk(chunk_text, lang):
    token = get_access_token()
    client = get_client(token)

    response = client.chat.completions.create(
        model=MODEL_ID,
        max_tokens=3000,
        messages=[{
            "role": "user",
            "content": PROMPT.format(lang=lang, sentence=chunk_text)
        }]
    )
    return response



In [15]:
import time

In [3]:
file_path = "../data/merged_test_set.xlsx"
df = pd.read_excel(file_path)
N=5

In [16]:
records=[]
for i, row in df.iloc[:N].iterrows():
    sent = row["sentence"]
    lang = "Norwegian" if row["lang"] == "no" else "English"
    raw_output = process_chunk(sent, lang)
    records.append({
        "example_id": row["id"],
        "sentence": sent,
        "lang": lang,
        "raw_output": raw_output.choices[0].message.content

    })
    time.sleep(0.5)

out_df = pd.DataFrame(records)
out_df.to_csv(output_file, index=False)
print("Done.")


NameError: name 'output_file' is not defined

In [18]:
out_df.style

Unnamed: 0,example_id,sentence,lang,raw_output
0,1a,Hvilken artikkel kastet du uten å lese,Norwegian,"ChatCompletion(id='2A11aNWmILyVmecP44S8YA', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{""score"": 5, ""explanation"": ""The sentence is fully grammatical, with correct word order and use of the verb \'kastet\' in the past tense.""}', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1752501720, model='meta/llama3-405b-instruct-maas', object='chat.completion', service_tier=None, system_fingerprint='', usage=CompletionUsage(completion_tokens=39, prompt_tokens=148, total_tokens=187, completion_tokens_details=None, prompt_tokens_details=None))"
1,1b,Hvilken artikkel kastet du uten å lese den,Norwegian,"ChatCompletion(id='3A11aIqpCLWrmecPr9-EiAg', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{""score"": 5, ""explanation"": ""The sentence is fully grammatical, with correct use of the interrogative pronoun \'hvilken\', verb conjugation, and sentence structure.""}', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1752501724, model='meta/llama3-405b-instruct-maas', object='chat.completion', service_tier=None, system_fingerprint='', usage=CompletionUsage(completion_tokens=42, prompt_tokens=149, total_tokens=191, completion_tokens_details=None, prompt_tokens_details=None))"
2,1c,Hvilken artikkel kastet du den uten å lese,Norwegian,"ChatCompletion(id='3w11aL_qL6eam9IPzujiuAQ', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{""score"": 4, ""explanation"": ""The sentence is mostly grammatical, but it sounds a bit unnatural without a question mark at the end, and \'den\' should probably be \'denne\' or \'den artikkelen\' for clarity. A more natural way to phrase it would be \'Hvilken artikkel kastet du uten å lese?\' or \'Hvilken artikkel kastet du uten å lese den?\'.""}', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1752501727, model='meta/llama3-405b-instruct-maas', object='chat.completion', service_tier=None, system_fingerprint='', usage=CompletionUsage(completion_tokens=99, prompt_tokens=149, total_tokens=248, completion_tokens_details=None, prompt_tokens_details=None))"
3,1d,Hvilken artikkel kastet du den uten å lese den,Norwegian,"ChatCompletion(id='5Q11aPn3EYO5qsMPkMKk6AE', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{""score"": 5, ""explanation"": ""The sentence is fully grammatical, with correct word order and use of pronouns and prepositions.""}', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1752501733, model='meta/llama3-405b-instruct-maas', object='chat.completion', service_tier=None, system_fingerprint='', usage=CompletionUsage(completion_tokens=33, prompt_tokens=150, total_tokens=183, completion_tokens_details=None, prompt_tokens_details=None))"
4,1a,Which article did you throw away without reading,English,"ChatCompletion(id='6A11aNqHJtnYhMIPoKKpSA', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{""score"": 5, ""explanation"": ""The sentence is fully grammatical, with proper use of a wh-question and a subordinate clause.""}', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1752501736, model='meta/llama3-405b-instruct-maas', object='chat.completion', service_tier=None, system_fingerprint='', usage=CompletionUsage(completion_tokens=32, prompt_tokens=142, total_tokens=174, completion_tokens_details=None, prompt_tokens_details=None))"
