In [32]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd 
from transformers import AutoTokenizer, AutoModelForSequenceClassification

def classify(model, tokenizer, texts, device="cuda:1"):
    # Set the model to evaluate mode
    model.eval()

    # Tokenize the input sentences and create input tensors for the model
    inputs = tokenizer(
            texts,
            truncation=True,
            add_special_tokens=True,
            max_length=128,
            padding='max_length',
            return_tensors="pt")

    # Pass the inputs through the model to get the predicted labels
    labels = []
    with torch.no_grad():
        outputs = model(inputs['input_ids'].to(device), attention_mask=inputs['attention_mask'].to(device))
        logits = outputs.logits.detach().cpu().numpy()
        # If the first logit is greater than 3, append 1 (True) to the labels list, else append 0 (False)
        labels.append([1 if logit[0] > 3 else 0 for logit in logits])

    return labels

def datapoints(df, model, tokenizer):
    labels = []
    for i, row in df.iterrows():
        try:
            labels.append(classify(model,tokenizer,row["text"]))
        except:
            print("Didn't work")
    return labels
    
 
    
    


In [33]:
model = "/scratch/venia/socialgpt/venia_worker_vs_gpt/models/gpturk_size:622_intfloat/e5-base/checkpoint-780"

In [34]:
device = "cuda:1"
df = pd.read_csv("../data/processed_responses.csv")



In [35]:
df = df.rename({"summary":"text"}, axis=1)

In [36]:
# df = pd.read_csv("/scratch/venia/socialgpt/SocialSynth/data/sarcasm/synth/Train-SynthReal.csv").sample(10)
model = AutoModelForSequenceClassification.from_pretrained(model).to(device)
tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-base")
labels = datapoints(df, model, tokenizer)


In [37]:
mapping = {0: "synthetic", 1: "real"}

In [38]:
df["predicted"] = [mapping[k[0][0]] for k in labels]

In [39]:
df

Unnamed: 0,HITId,original_text,key_strokes,datetime,text,copied,predicted
0,38Z7YZ2SB3N12WW16H4JJFMLWAMQIO,Comparison of Weight-Loss Diets with Different...,Backspace Delete,"['2023-05-29T22:30:57.844Z', '2023-05-29T22:31...",Because carbs usually provide over half of cal...,False,synthetic
1,38Z7YZ2SB3N12WW16H4JJFMLWAMQIO,Comparison of Weight-Loss Diets with Different...,Control Control Control Control c c Control c ...,"['2023-05-29T20:58:29.931Z', '2023-05-29T20:58...",A study was conducted to compare the effective...,True,real
2,38Z7YZ2SB3N12WW16H4JJFMLWAMQIO,Comparison of Weight-Loss Diets with Different...,ArrowLeft Backspace ArrowLeft ArrowLeft ArrowL...,"['2023-05-30T02:49:46.401Z', '2023-05-30T02:49...",This two-year study found that diets varying i...,True,real
3,3SBX2M1TKD87UPXMLS2FPEC2RSM4QO,Effect of Screening Mammography on Breast-Canc...,Control c Control v Control a Delete Control v...,"['2023-05-29T20:23:32.104Z', '2023-05-29T20:23...",Effect of Screening Mammography on Breast-Canc...,True,real
4,3SBX2M1TKD87UPXMLS2FPEC2RSM4QO,Effect of Screening Mammography on Breast-Canc...,CapsLock A CapsLock s t u d y i n CapsLock N C...,"['2023-05-29T18:40:24.249Z', '2023-05-29T18:40...",A study in Norway found that screening mammogr...,False,real
5,3SBX2M1TKD87UPXMLS2FPEC2RSM4QO,Effect of Screening Mammography on Breast-Canc...,Enter,['2023-05-29T18:23:15.814Z'],The Norwegian breast-cancer screening program ...,False,synthetic
6,38DCH97KHHNXX7G7OZPJVG1L9PBQJ0,"First Results of Phase 3 Trial of RTS,S/AS01 M...",Backspace Backspace Backspace Backspace Backsp...,"['2023-05-29T18:24:22.677Z', '2023-05-29T18:24...",Exciting news from a study on a malaria vaccin...,False,synthetic
7,38DCH97KHHNXX7G7OZPJVG1L9PBQJ0,"First Results of Phase 3 Trial of RTS,S/AS01 M...",Delete Delete Control v t h e 1 c h i s u s e ...,"['2023-05-29T18:19:27.221Z', '2023-05-29T18:19...","A phase 3 study of the malaria vaccine RTS, S/...",True,real
8,38DCH97KHHNXX7G7OZPJVG1L9PBQJ0,"First Results of Phase 3 Trial of RTS,S/AS01 M...",Control v,"['2023-05-29T19:14:18.519Z', '2023-05-29T19:14...","A phase 3 trial of the RTS, S/AS01 malaria vac...",True,real
9,3VI0PC2ZAY5V7F8GQQCGXKE05M8XON,Azithromycin and the Risk of Cardiovascular De...,Control Control Control Control Control Contro...,"['2023-05-29T21:20:34.138Z', '2023-05-29T21:20...",Azithromycin is a broad-spectrum macrolide ant...,True,real
