In [9]:
import json
import pandas as pd
from tqdm import tqdm
import subprocess
import openai

# === KONFIG ===
PROJECT_ID = "norwegian-language-bank"
LOCATION = "us-central1"
MODEL_ID = "meta/llama3-405b-instruct-maas"

# === HENT TOKEN ===
def get_access_token():
    result = subprocess.run(
        ["gcloud", "auth", "print-access-token"],
        stdout=subprocess.PIPE,
        text=True,
        check=True
    )
    return result.stdout.strip()

# === INITIALISER KLIENT ===
def get_client(token):
    return openai.OpenAI(
        base_url=f"https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/openapi/chat/completions?",
        api_key=token
    )

# === PROMPT ===
PROMPT = """You are a linguist evaluating sentences in {lang}.
Each sentence should be rated solely for its grammatical acceptability.
Use the following 5-point scale:

1 – completely ungrammatical
2 – mostly ungrammatical
3 – marginal
4 – mostly grammatical
5 – fully grammatical

Return a JSON object with the format:
{{"score": <number>, "explanation": "<short explanation>"}}

Only provide one evaluation per sentence.

Here is the sentence:
{sentence}
"""

# === KJØRING ===
def process_chunk(chunk_text, lang):
    token = get_access_token()
    client = get_client(token)

    response = client.chat.completions.create(
        model=MODEL_ID,
        max_tokens=3000,
        messages=[{
            "role": "user",
            "content": PROMPT.format(lang=lang, sentence=chunk_text)
        }]
    )
    return response



In [15]:
import time

In [31]:
file_path = "../data/data_for_processing.xlsx"
output_file = "../results/llama_nb_friday.csv"
df = pd.read_excel(file_path)
N=len(df)

In [32]:
import json

In [47]:
records=[]
for i, row in df.iloc[:N].iterrows():
    sent = row["sentence"]
    try:
        lang = "Norwegian" if "-n-" in row["code"] else "English"
        raw_output = process_chunk(sent, lang)
        try:
            parsed = json.loads(raw_output.choices[0].message.content)
            score = parsed["score"]
        
            records.append({
                "example_id": row["Unnamed: 0"],
                "sentence": sent,
                "code": row['code'],
                "keyword":row['keyword'],
                "lang": lang,
                'score':score,
                "raw_output": parsed['explanation']
        
            })
        except:
            print(row["Unnamed: 0"], raw_output.choices[0].message.content)
        time.sleep(0.5)
    except:
        pass

out_df = pd.DataFrame(records)
out_df.to_csv(output_file, index=False)
print("Done.")


Done.


In [48]:
out_df.style

Unnamed: 0,example_id,sentence,code,keyword,lang,score,raw_output
0,0,You donated money to UNICEF because you believed in them.,lp-e-b,organization,English,5,"The sentence is fully grammatical, with correct subject-verb agreement, pronoun usage, and clause structure."
1,1,Which organization did you donate money to because you believed in?,lp-e-0,organization,English,4,"The sentence is mostly grammatical, but the phrase 'because you believed in' feels slightly incomplete and might be improved with 'because you believed in it' or 'because you believed in their cause' for full clarity and grammaticality."
2,2,Which organization did you donate money to because you believed in it?,lp-e-2,organization,English,5,"The sentence is fully grammatical, with correct use of a subordinate clause and proper word order."
3,3,Which organization did you donate money to it because you believed in?,lp-e-1,organization,English,2,"The sentence is mostly ungrammatical due to the unnecessary pronoun 'it', which creates a redundancy. A grammatically correct alternative would be 'Which organization did you donate money to because you believed in it?' or 'Which organization did you donate money to because you believed in its cause?'"
4,4,Which organization did you donate money to it because you believed in it?,lp-e-3,organization,English,2,"The sentence is mostly ungrammatical due to the redundant pronoun 'it' after 'to', which is already implied by the preposition, and the unclear connection between the two instances of 'it'."
5,6,Hvilken organisasjon donerte du penger til fordi du trodde på den?,lp-n-2,organization,Norwegian,5,"The sentence is fully grammatical, with correct use of subordinate clause 'fordi du trodde på den' and interrogative pronoun 'Hvilken'."
6,7,Hvilken organisasjon donerte du penger til den fordi du trodde på?,lp-n-1,organization,Norwegian,5,"The sentence is fully grammatical, with correct word order and use of subordinate clause 'fordi du trodde på den'."
7,8,Hvilken organisasjon donerte du penger til den fordi du trodde på den?,lp-n-3,organization,Norwegian,5,"The sentence is fully grammatical, with correct use of subordinate clauses and word order."
8,9,Hvilken organisasjon donerte du penger til fordi du trodde på?,lp-n-0,organization,Norwegian,5,"The sentence is fully grammatical, with correct use of the interrogative pronoun 'hvilken', verb 'donerte', and subordinate clause 'fordi du trodde på'."
9,10,Du donerte penger til Frelsesarmeen fordi du trodde på den.,lp-n-b,organization,Norwegian,5,"The sentence is fully grammatical, with correct verb conjugation, word order, and use of prepositions and subordinate clause structure."
