In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel

load_dotenv(dotenv_path="../.env")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(
    api_key = OPENAI_API_KEY,
)

In [7]:
import pandas as pd
df = pd.read_csv("../data/COVID-Fact/test1.tsv", sep="\t")
df

Unnamed: 0,index,sentence1,Sentence2,label
0,0,While each surrogate virus exhibited subtle di...,Measuring sars-cov-2 neutralizing antibody act...,entailment
1,1,While each surrogate virus exhibited subtle di...,Measuring chs-cov-2 neutralizing antibody acti...,not_entailment
2,2,While each surrogate virus exhibited subtle di...,Measuring aces-cov-2 neutralizing antibody act...,not_entailment
3,3,While each surrogate virus exhibited subtle di...,Measuring gps-cov-2 neutralizing antibody acti...,not_entailment
4,4,IgG titers in SARS-CoV-infected healthcare wor...,Long-term persistence of igg antibodies in sar...,entailment


In [None]:
from pydantic import BaseModel

class FactLabel(BaseModel):
    label: str

results = []

for _, row in df.iterrows():
    text = row["Sentence2"]

    # Gọi API model
    response = client.responses.parse(
        model="gpt-4o-mini",
        input=[
            {
                "role": "system",
                "content": (
                    "You are an information verification system. "
                    "Given a statement, determine whether it is SUPPORTED or REFUTED "
                    "based on scientific evidence and general knowledge. "
                    "Output exactly one word: 'SUPPORTED' or 'REFUTED'."
                ),
            },
            {
                "role": "user",
                "content": text,
            },
        ],
        text_format=FactLabel,
    )

    predicted_label = response.output_parsed.label
    results.append(predicted_label)

    # Ghi kết quả ngay lập tức vào file
    row_result = row.to_dict()
    row_result["predicted_label"] = predicted_label
    pd.DataFrame([row_result]).to_csv(
        "results_evaluation_GPT.tsv", sep="\t", mode="a", index=False, header=not bool(_)
    )

print("Done! Predictions saved to results_evaluation_GPT.tsv")


Done! Predictions saved to results.tsv


In [None]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

# Đọc file kết quả đã lưu
df = pd.read_csv("results_evaluation_GPT.tsv", sep="\t")

# Map label test sang format dự đoán
label_map = {
    "entailment": "SUPPORTED",
    "not_entailment": "REFUTED"
}
df["true_label_mapped"] = df["label"].map(label_map)

# Lấy danh sách để đánh giá
y_true = df["true_label_mapped"]
y_pred = df["predicted_label"]

# Bảng đánh giá chi tiết
report = classification_report(y_true, y_pred, labels=["SUPPORTED", "REFUTED"])
print(report)


              precision    recall  f1-score   support

   SUPPORTED       0.40      1.00      0.57         2
     REFUTED       0.00      0.00      0.00         3

    accuracy                           0.40         5
   macro avg       0.20      0.50      0.29         5
weighted avg       0.16      0.40      0.23         5


Confusion Matrix:
                Pred_SUPPORTED  Pred_REFUTED
True_SUPPORTED               2             0
True_REFUTED                 3             0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
