In [None]:
import pandas as pd
import numpy as np
from openai import OpenAI
from sentence_transformers import SentenceTransformer
bert_model = SentenceTransformer('bert-base-nli-mean-tokens')

data = pd.read_csv('../Datasets/all-claims.csv')
headlineData = pd.read_csv('augmented_headliens.csv')
data['headline'] = data['text']
mergedData = pd.concat([data, headlineData])

In [26]:
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def test(x_encode, df):
    X = x_encode
    y = df['rating']
    cat = df['text']  # Assuming 'text' is the category you want to group by
    y_acc = []
    y_preds = []
    
    gkf = GroupKFold(n_splits=10)
    
    for train_index, test_index in gkf.split(X, y, groups=cat):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        clf = LogisticRegression(random_state=0, max_iter=1000).fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        y_preds.extend(y_pred)
        y_acc.extend(y_test)
        
    return y_acc, y_preds

        
       

In [19]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
def get_scores(y_acc, y_preds): 
    print('Accuracy: ', accuracy_score(y_acc, y_preds))
    print('F1 Score: ', f1_score(y_acc, y_preds, average='weighted'))
    print('Precision: ', precision_score(y_acc, y_preds, average='weighted'))
    print('Recall: ', recall_score(y_acc, y_preds, average='weighted'))
    print('Confusion Matrix: ', confusion_matrix(y_acc, y_preds))

In [None]:
y_acc, y_preds = test(bert_model.encode(mergedData['text']), data)
get_scores(y_acc, y_preds)


In [56]:
OPENAI_API_KEY = "YOUR_API_KEY"
client = OpenAI(api_key=OPENAI_API_KEY)

def chat(system_msg, user_msg, model="gpt-4o-mini"):
  system_msg = [{"role": "system", "content": system_msg}]
  user_assistant_msgs = [{"role": "assistant", "content": user_msg} ]
  msgs = system_msg + user_assistant_msgs
  response = client.chat.completions.create(model=model,messages=msgs)
  return response.choices[0].message.content

In [None]:
def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [None]:
mergedData['openAI-large'] = mergedData['headline'].apply(lambda x: get_embedding(x, model='text-embedding-3-large'))
mergedData['openAI-large-clean'] = mergedData['openAI-large'].apply(lambda x: np.array(x))
mergedData['openAI-large-clean']
embedding = np.array(mergedData['openAI-large-clean'])
embedding = np.vstack(data)

In [None]:
y_acc, y_preds = test(embedding, mergedData)
get_scores(y_acc, y_preds)

In [59]:
def label_text(text):
    system_msg = "You are a fact-checker. For each of the following, return 1 if the claim is true, and 0 if the claim is false. Do not return anything else."
    user_msg = text
    return chat(system_msg, user_msg)

In [62]:
pred_y = mergedData['headline'].apply(lambda x: label_text(x))
pred_y = pred_y.apply(lambda x: 1 if x == '1' else 0)
get_scores(mergedData['rating'], pred_y)