In [None]:
import pandas as pd
import random
from rouge import Rouge
rouge = Rouge()
import numpy as np
import os
import re
import math
from transformers import AutoModelForMaskedLM, AutoModelForCausalLM, AutoTokenizer
import torch

In [None]:
gen_model_unspervised_name = './review_generate/unsupervised/checkpoints/bart-base-cnn-rating-tokens/checkpoint-47271'
tokenizer = AutoTokenizer.from_pretrained(gen_model_unspervised_name)

In [None]:
def clean_str(string):
    string = re.sub(r"[^A-Za-z0-9]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    string = re.sub(r"\s{2,}", " ", string)
    string = re.sub(r"sssss ", " ", string)
    return string.strip().lower()

# dataset = 'Digital_Music_data'
# dataset = 'Musical_Instruments_data'
# dataset = 'Video_Games_data'
dataset = 'Office_Products_data'
all_df = pd.read_csv(f'dataset/{dataset}/data.csv')
print(len(all_df))
all_df = all_df[all_df['reviews'].notna()]
all_df = all_df[all_df['reviews'] != '']

all_df['clean_reviews'] = all_df['reviews'].apply(clean_str)
all_df = all_df[all_df['clean_reviews'] != '']

print(len(all_df))

In [None]:
import nltk
from nltk.corpus import stopwords
from utils import get_stopwords
stop = stopwords.words('english')
print('nltk stop ' ,len(stop))
stop = [clean_str(s) for s in stop]
stop = set(stop)

In [None]:
clean_reviews = []
num_of_item = 0
for item_id, item_df in all_df.groupby('item_id'):
    num_of_item += 1
    sample_df = item_df.sample(min(1, len(item_df)))
    clean_reviews.extend(sample_df['reviews'].tolist())

clean_reviews = [clean_str(review) for review in clean_reviews]
print(num_of_item)
print(len(clean_reviews))


### Aspect evaluation

In [None]:
n_aspect = 15
aspect_df = pd.read_csv(f'aspect/data/{dataset}_{n_aspect}.csv')
print(aspect_df.shape)
aspect_df.head()

In [None]:
aspect_vocab = []
aspect_vocab_list = []
topk = 30
for i , row in aspect_df.iterrows():
    each_aspect_words = eval(row.aspect_words)
    each_aspect_words = each_aspect_words[:topk]
    each_aspect_words = [clean_str(word) for word in each_aspect_words]
    
    filter_words = []
    for word in each_aspect_words:
        if len(word) > 2 and word not in stop and word != '':
            filter_words.append(word)
            
    aspect_vocab_list.append(set(filter_words))
    
    aspect_vocab.extend(filter_words)
print(len(aspect_vocab))
aspect_vocab = set(aspect_vocab)
print(len(aspect_vocab))
print(len(aspect_vocab_list))

In [None]:
def eval_aspect(clean_reviews, do_print=True):
    aspect_prob_list = []
    has_aspect_words = 0
    aspect_words = set()
    for review in clean_reviews:
        words = review.split()        
        aspect_word_count = 0
        for word in words:
            if word in aspect_vocab:
                aspect_words.add(word)
                aspect_word_count += 1
        if aspect_word_count > 0:
            has_aspect_words += 1
        if len(words) > 0:
            aspect_prob_list.append(aspect_word_count / len(words))
        else:
            aspect_prob_list.append(0)
    
    if do_print:
        print('aspect evaluation')
        print('% of aspect words')
        print(np.mean(aspect_prob_list))
    return aspect_words

In [None]:
eval_aspect(clean_reviews)

In [None]:
def eval_rouge(df):
    reviews = df['attack_reviews'].tolist()
    reviews = [clean_str(review) for review in reviews]
    sample_nums = 100
    reviews = random.sample(reviews, k=sample_nums)

    avg_rouge1_score = 0.0
    avg_rouge2_score = 0.0
    avg_rougeL_score = 0.0


    for i in range(len(reviews)):
        hyp = [reviews[i]] * (len(reviews) - 1)
        ref = reviews[:i] + reviews[i + 1:]
        rouge_scores = rouge.get_scores(hyp, ref, avg=True, ignore_empty=True)

        avg_rouge1_score += rouge_scores['rouge-1']['f']
        avg_rouge2_score += rouge_scores['rouge-2']['f']
        avg_rougeL_score += rouge_scores['rouge-l']['f']


    avg_rouge1_score /= sample_nums
    avg_rouge2_score /= sample_nums    
    avg_rougeL_score /= sample_nums 

    print('avg rourge1/2/L score from other generated reviews')
    print(f'{avg_rouge1_score:.3f} / {avg_rouge2_score:.3f} / {avg_rougeL_score:.3f}')

In [None]:
def get_ppl(reviews, model, tokenizer, batch_size=8, max_length=128):
    ppl = 0.0
    count = 0
    for i in range(0, len(reviews), batch_size):
        input_texts = [reviews[k] for k in range(i, i + batch_size) if k < len(reviews)]
        encoded_input = tokenizer(input_texts, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
        encoded_input.to(device)
        with torch.no_grad():
            loss = model(**encoded_input, labels=encoded_input["input_ids"]).loss
        ppl += math.exp(loss.item())
        count += 1
    return ppl / count

In [None]:
device = 'cuda'
lm_pretrained_model = 'distilgpt2'
lm_tokenizer = AutoTokenizer.from_pretrained(lm_pretrained_model, use_fast=True)
lm_tokenizer.pad_token = lm_tokenizer.eos_token

lm_model_path = 'lm/distilgpt2-Digital_Music_data_reviews/checkpoint-7731'


if dataset == 'Musical_Instruments_data':
    lm_model_path = './lm/distilgpt2_128-Musical_Instruments_data_reviews/checkpoint-41272'
elif dataset == 'Video_Games_data':
    lm_model_path = './lm/distilgpt2_128-Video_Games_data_reviews/checkpoint-59257'
elif dataset == 'Office_Products_data':
    lm_model_path = './lm/distilgpt2_128-Office_Products_data_reviews/checkpoint-45936'
else:
    print('no fine tune dataset for lm model')
    lm_model_path = 'distilgpt2'
print(lm_model_path)

lm_model = AutoModelForCausalLM.from_pretrained(lm_model_path).to(device)
lm_model = lm_model.eval()

In [None]:
def get_length(row):
    return len(row['attack_reviews'].strip().split())

df_path = 'ATTACK_REVIEWS_OUTPUT_PATH'

print(df_path)

df = pd.read_csv(df_path)
df['review_words'] = df.apply(get_length, axis=1)
df = df[df['review_words'] > 2]
print(df['attack_ps'].mean())

attack_reviews = df['attack_reviews'].tolist()
clean_reviews = [clean_str(review) for review in attack_reviews]

aspect_words = eval_aspect(clean_reviews)
print('---')
ppl = get_ppl(df.attack_reviews.values, lm_model, lm_tokenizer, batch_size=8, max_length=128)
print(f'ppl')
print('-----')
eval_rouge(df)
print()