In [1]:
import pandas as pd
import numpy as np
# Load model directly
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("SravaniNirati/bert_fake_review_detection")
model = AutoModelForSequenceClassification.from_pretrained("SravaniNirati/bert_fake_review_detection").to(device)

In [6]:
inputs = tokenizer("This is super great product. Must Buy.", return_tensors="pt").to(device)
output = model(**inputs)

model.config.id2label

{0: 'LABEL_0', 1: 'LABEL_1'}

In [7]:
df = pd.read_csv("./data/fake_reviews_dataset.csv")
df.sample(7)

Unnamed: 0,category,rating,label,text_
24091,Kindle_Store_5,5.0,OR,This book is great. A guy turns his life aroun...
19700,Pet_Supplies_5,5.0,OR,Sent to Hurricane Relief site to Best Friends....
14111,Movies_and_TV_5,4.0,OR,"Seriously folks, lighten up a little. it's a 3..."
31172,Books_5,5.0,OR,A guided travel brochure through ancient and R...
27565,Kindle_Store_5,5.0,CG,I must have pestered the author to explain the...
1786,Home_and_Kitchen_5,3.0,CG,looked great to uniform upholstery and the fin...
21217,Pet_Supplies_5,5.0,OR,My two month old chihuahua puppy really likes ...


In [8]:
df.isna().sum()

category    0
rating      0
label       0
text_       0
dtype: int64

In [9]:
df['label'] = df['label'].apply(lambda x: 1 if x == "CG" else 0)

In [10]:
df

Unnamed: 0,category,rating,label,text_
0,Home_and_Kitchen_5,5.0,1,"Love this! Well made, sturdy, and very comfor..."
1,Home_and_Kitchen_5,5.0,1,"love it, a great upgrade from the original. I..."
2,Home_and_Kitchen_5,5.0,1,This pillow saved my back. I love the look and...
3,Home_and_Kitchen_5,1.0,1,"Missing information on how to use it, but it i..."
4,Home_and_Kitchen_5,5.0,1,Very nice set. Good quality. We have had the s...
...,...,...,...,...
40427,Clothing_Shoes_and_Jewelry_5,4.0,0,I had read some reviews saying that this bra r...
40428,Clothing_Shoes_and_Jewelry_5,5.0,1,I wasn't sure exactly what it would be. It is ...
40429,Clothing_Shoes_and_Jewelry_5,2.0,0,"You can wear the hood by itself, wear it with ..."
40430,Clothing_Shoes_and_Jewelry_5,1.0,1,I liked nothing about this dress. The only rea...


In [None]:
# Predict using model in label_predicted

# Tokenize the review texts in batches for efficiency
batch_size = 64
preds = []

from tqdm import tqdm

for i in tqdm(range(0, len(df), batch_size), desc="Predicting batches"):
    batch_texts = df['text_'].iloc[i:i+batch_size].tolist()
    inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        # Move tensor to CPU before converting to numpy
        batch_preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
        preds.extend(batch_preds)
df['label_predicted'] = [1 if x == 0 else 0 for x in preds]


In [16]:
df['label_predicted'] = df['label_predicted'].map({0: 1, 1: 0})

In [17]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_true = df['label']
y_pred = df['label_predicted']

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

Accuracy:  0.9779
Precision: 0.9618
Recall:    0.9953
F1 Score:  0.9783
