In [1]:
import re
import os
import sys
import tqdm
import json
import pandas as pd
from langchain import PromptTemplate, FewShotPromptTemplate

In [None]:
# TRANSFORMS THE DATA FROM JSONL TO CSV WITH SRC, TGT, OUTPUT

def jsonl_to_csv(jsonl_path : str, csv_path : str):
    with open(jsonl_path, "r") as f:
        data = [json.loads(line) for line in f.readlines()]
    df = []
    pattern_srctgt = re.compile(r"Source: (.*)\nTarget: (.*)")
    for sample in tqdm.tqdm(data, total=len(data), desc="Processing jsonl"):
        match = pattern_srctgt.search(sample["input"])
        if not match:
            print(f"Skipping {sample['input']}")
            continue
        src = match.group(1).strip()
        tgt = match.group(2).strip()
        df.append({
            "src": src,
            "tgt": tgt,
            "output": sample["output"],
        })
    df = pd.DataFrame(df)
    df.to_csv(csv_path, index=False)
data_path = "/fs/startiger0/nmoghe/data/llama/pilot/classification/exp1/ref-free"
jsonl_to_csv(f"{data_path}/dev.jsonl", f"./data/dev.csv")
jsonl_to_csv(f"{data_path}/train.jsonl", f"./data/train.csv")

In [4]:
# UTILITY TO SAMPLE FROM FEW-SHOT PROMPTS

train_df = pd.read_csv("./data/train.csv")
train_minor = train_df[train_df["output"] == "Minor"]
train_major = train_df[train_df["output"] == "Major"]
train_noerror = train_df[train_df["output"] == "No-error"]
assert len(train_minor) > 0 and len(train_major) > 0 and len(train_noerror) > 0
def generate_examples():
    sampled_minor = train_minor.sample(n=2, replace=False)
    sampled_major = train_major.sample(n=2, replace=False)
    sampled_noerror = train_noerror.sample(n=2, replace=False)
    sampled = pd.concat([sampled_minor, sampled_major, sampled_noerror])
    # shuffle rows of sampled
    sampled = sampled.sample(frac=1)
    examples = [
        {
            "src": row["src"],
            "tgt": row["tgt"],
            "output": row["output"],
        }
        for i, row in sampled.iterrows()
    ]
    return examples

example_prompt = PromptTemplate(
    input_variables=["src", "tgt", "output"], 
    template="Source: {src}\nTarget: {tgt}\nOutput: {output}",
)



In [5]:
dev_df = pd.read_csv("./data/dev.csv")
data = []
for i, row in dev_df.iterrows():
    prompt = FewShotPromptTemplate(
        examples=generate_examples(),
        example_prompt = example_prompt,
        suffix="Source: {src}\nTarget: {tgt}\nOutput: ",
        input_variables=["src", "tgt"],
    ).format(
        src=row["src"],
        tgt=row["tgt"],
    )
    data.append({
        "prompt": prompt,
        "output": row["output"],
        "me":""
    })
#save data as csv
df = pd.DataFrame(data)
df.to_csv("./data/prompts_fewshot.csv", index=False)


In [7]:
# EVALUATION
df = pd.read_csv("./data/fewshot_outputs.csv")
# create a column that is Minor if me is Minor, Major if me is Major, No-error if me No-error and None otherwise
# df["me-clean"] = df["me"].apply(lambda x: x if x in ["Minor", "Major", "No-error"] else None)
# assert df[df["me-clean"].isna()] == 0
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
accuracy = accuracy_score(df['output'], df['me'])
# precision = precision_score(df['output'], df['me'])
# recall = recall_score(df['output'], df['me'])
f1 = f1_score(df['output'], df['me'], average='weighted')

# Print the metrics
print("Accuracy:", accuracy)
# print("Precision:", precision)
# print("Recall:", recall)
print("F1 score:", f1)

Accuracy: 0.3201465201465201
F1 score: 0.32491751609229963
