In [1]:
import pandas as pd
from features import categorize_population, encode_categories
from ast import literal_eval
import re
import torch

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("astrosbd/fake-reviews-distilbert")
model = AutoModelForSequenceClassification.from_pretrained("astrosbd/fake-reviews-distilbert")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
phila = pd.read_csv('../data/phila/test_features.csv', usecols=['reviews'])
phila['reviews'] = phila['reviews'].apply(literal_eval)
phila = phila.reset_index().rename(columns={'index':'uuid'})

In [246]:
test = tokenizer(phila['reviews'][13], padding=True, truncation=True)
with torch.no_grad():
    logits = model(torch.tensor(test['input_ids']), attention_mask=torch.tensor(test['attention_mask'])).logits

In [258]:
42 - torch.argmax(logits, dim=1).sum()


tensor(3)

In [4]:
def clean_reviews(reviews):
    cleaned = []
    for review in reviews:
        cleaned.append(re.sub(r'\n.*?(?=\s)', '', review.strip())) #may need to find a better way to do so

    return cleaned

def check_fake(row):
    tokens = tokenizer(row['reviews'], padding=True, truncation=True)
    
    with torch.no_grad():
        logits = model(torch.tensor(tokens['input_ids']), attention_mask=torch.tensor(tokens['attention_mask'])).logits

    predicted = torch.argmax(logits, dim=1)

    num_reivews = len(row['reviews'])
    fake_count = num_reivews - predicted.sum()
    real_count = predicted.sum()
    
    return row['reviews'], predicted, fake_count.item(), real_count.item(), num_reivews

def count_reviews(reviews):
    return len(reviews)

In [5]:
phila['reviews'] = phila['reviews'].apply(clean_reviews)
phila= phila.apply(check_fake, axis=1, result_type='expand').rename(columns={0:"reviews", 1 : "scores", 2 : "num_fake", 3 : "num_real", 4 : "num_reviews"})

In [6]:
# percentage of 'fake' reviews
phila['num_fake'].sum()/phila['num_reviews'].sum() 

0.03112978635260697

In [8]:
phila[phila['num_fake'] > 0]

Unnamed: 0,reviews,scores,num_fake,num_real,num_reviews
2,[Six of us went here for an early Saturday lun...,"[tensor(1), tensor(1), tensor(1), tensor(1), t...",1,6,7
13,[If you are looking for a healthy breakfast an...,"[tensor(1), tensor(1), tensor(0), tensor(1), t...",3,39,42
37,[I'd give Agave a 2.5 bc it was really a littl...,"[tensor(1), tensor(1), tensor(1), tensor(1), t...",1,21,22
41,[Awesome creamy crab soup. Overall the food wa...,"[tensor(1), tensor(1), tensor(1), tensor(0), t...",1,6,7
48,[DONT EAT THERE !!! Food taste awful and over ...,"[tensor(1), tensor(0), tensor(1), tensor(1), t...",1,13,14
...,...,...,...,...,...
2129,[Amazing food and amazing drinks!! Highly reco...,"[tensor(1), tensor(1), tensor(1), tensor(1), t...",1,57,58
2132,[Amazing food and amazing drinks!! Highly reco...,"[tensor(1), tensor(1), tensor(1), tensor(1), t...",1,60,61
2134,[Amazing food and amazing drinks!! Highly reco...,"[tensor(1), tensor(1), tensor(1), tensor(1), t...",1,60,61
2136,[Amazing food and amazing drinks!! Highly reco...,"[tensor(1), tensor(1), tensor(1), tensor(1), t...",1,60,61
