In [1]:
import utils
import evaluators
import numpy as np
import pandas as pd
from matplotlib import pyplot

In [2]:
dataset = 'YelpChi'
path = 'Yelp_Dataset/'+dataset+'/metadata.gz'

In [3]:
reviews_array = np.array(utils.load_reviews(path))
reviews = pd.DataFrame(reviews_array, columns=['UserId', 'ProductId', 'Rating', 'Label', 'Date'])
reviews

No of Lines read# 67395


Unnamed: 0,UserId,ProductId,Rating,Label,Date
0,201,0,5.0,1,2011-06-08
1,202,0,3.0,1,2011-08-30
2,203,0,5.0,1,2009-06-26
3,204,0,1.0,1,2010-09-16
4,205,0,3.0,1,2010-02-05
...,...,...,...,...,...
67390,14981,199,5.0,1,2012-04-07
67391,38261,199,4.0,1,2009-06-05
67392,38262,200,5.0,1,2012-05-06
67393,26974,200,2.0,1,2011-01-08


In [4]:
one = 0
zero = 0
for x in reviews.Label:
    if x=='1':
        one += 1
    else:
        zero += 1
one, zero

(58476, 8919)

In [5]:
def extract_elite_accounts(elite_threshold):
    values, counts = np.unique(reviews.UserId, return_counts=True)
    elite_accounts = values[counts >= elite_threshold]
    elite_accounts = sorted(elite_accounts, key=lambda x: int(x))
    print("No of elite accounts #", len(elite_accounts))
    return elite_accounts

In [6]:
print(len(np.unique(reviews.ProductId)) , len(np.unique(reviews.UserId)))

201 38063


In [7]:
def extract_target(total_targets):
    values, counts = np.unique(reviews.ProductId, return_counts=True)
    target_products = values[np.argsort(counts)][:total_targets]
    target_products = ['194', '178', '187']
    print("No of Targets: ", len(target_products))
    return target_products

In [8]:
def init(obj):
    obj_prob = {}
    for a in obj:
        obj_prob[a] = 1/len(obj)
    print(obj_prob)
    return obj_prob

In [9]:
def get_reviews(user_graph, product_graph, target_products, attack_p, elite_accounts, review_per_target, epsilon):
    new_reviews = []
    singleton_offset = 0
    target_attack_map = {}
    for index, target in enumerate(target_products):
        #attack = utils.e_greedy_sample(attack_p, epsilon)
        attack = attackors[index]
        target_attack_map[target] = attack
        new_reviews_1 = utils.get_new_reviews(user_graph, product_graph, attack, elite_accounts, review_per_target, singleton_offset, target)
        print(attack, len(new_reviews_1))
        new_reviews += new_reviews_1
        if attack == 'Singleton':
            singleton_offset += 1
    return new_reviews, target_attack_map
    

In [10]:
def play_min_max_game(attack_p, detect_q, episodes, elite_threshold, total_targets, review_per_target, learning_rate_attackors, learning_rate_detectors, epsilon, mode='Training'):
    LOSS = []
    PE = []
    recall = []
    all_attack_p = {attack: [attack_p[attack]] for attack in attack_p}
    all_detect_q = {detect: [detect_q[detect]] for detect in detect_q}
    
    target_attack_map = [{} for i in range(episodes)]
    
    elite_accounts = extract_elite_accounts(elite_threshold)
    target_products = extract_target(total_targets)
    user_graph = utils.convert_review_to_user_graph(reviews)
    product_graph = utils.convert_review_to_product_graph(reviews)
    
    old_ri, old_eri, old_revenue = evaluators.compute_revenue(product_graph, target_products, elite_accounts)
    old = [old_ri, old_eri, old_revenue]
    print("Old Revenue #", old_revenue)
        
        
    for i in range(episodes):
        print("Startng episode #", i)
        
        new_reviews, target_attack_map[i] = get_reviews(user_graph, product_graph, target_products, attack_p, elite_accounts, review_per_target, epsilon)
        
        
        spam_review_probs, sum_review_probs, detector_review_probs, top_k_reviews, new_product_graph = utils.run_detectors(user_graph, product_graph, new_reviews, detect_q, top_k)

        rem_new_reviews = [review for review in new_reviews if review not in top_k_reviews] 
        print("no of remaining_new_reviews ", len(rem_new_reviews))
        new_product_graph = utils.remove_edges(new_product_graph, top_k_reviews)

        new_ri, new_eri, new_revenue = evaluators.compute_revenue(new_product_graph, target_products, elite_accounts)
        new = [new_ri, new_eri, new_revenue]
        print("New Revenue #", new_revenue)
        
        cost = evaluators.compute_cost(old, new, rem_new_reviews, elite_accounts)

        rewards = evaluators.compute_reward(old, new, target_products)
        if mode == 'Training':
            attack_p = evaluators.update_p(attack_p, target_attack_map[i], rewards, learning_rate_attackors)
            attack_p = evaluators.normalize_map(attack_p)
        
            detect_q = evaluators.update_q(detect_q, cost, detector_review_probs, sum_review_probs, rem_new_reviews, learning_rate_detectors)

        practical_effect = evaluators.compute_pe(old, new, target_products)
        total_loss = evaluators.compute_loss(cost, spam_review_probs)
        
        LOSS.append(total_loss)
        PE.append(practical_effect)
        recall.append((len(rem_new_reviews), len(new_reviews)))
        for attack in attack_p:
            all_attack_p[attack].append(attack_p[attack])
        
        for detect in detect_q:
            all_detect_q[detect].append(detect_q[detect])
        
        print("Practical Effect: ", practical_effect)
        print("Total Loss: ", total_loss)
    return attack_p, detect_q, all_attack_p, all_detect_q, LOSS, PE, recall
    

In [None]:
elite_threshold = 10
attackors = ['IncBP', 'IncDS', 'IncPR', 'Random', 'Singleton']
detectors = ['GANG', 'Prior', 'SpEagle', 'fBox', 'Fraudar']
episodes = 5
total_targets = 3
epsilon = 0.1
review_per_target = 5
top_k = 0.01
learning_rate_attackors = 0.01
learning_rate_detectors = 30
attack_p = init(attackors)
detect_q = init(detectors)
attack_p, detect_q, all_attack_p, all_detect_q, LOSS, PE, recall = play_min_max_game(attack_p, detect_q, episodes, \
            elite_threshold, total_targets, review_per_target, learning_rate_attackors, learning_rate_detectors, epsilon)

{'IncBP': 0.2, 'IncDS': 0.2, 'IncPR': 0.2, 'Random': 0.2, 'Singleton': 0.2}
{'GANG': 0.2, 'Prior': 0.2, 'SpEagle': 0.2, 'fBox': 0.2, 'Fraudar': 0.2}
No of elite accounts # 536
No of Targets:  3
Old Revenue # {'194': 0.24138348320260924, '178': 0.13138348320260926, '187': 0.09638348320260925}
Startng episode # 0
IncBP 5
IncDS 5
IncPR 5
Run GANG ...
Run Prior ...
Run SpEagle ...
Run fBox ...
Run Fraudar ...
[('5429', '194'), ('5364', '194'), ('5409', '194'), ('6359', '194'), ('6739', '194'), ('6197', '178'), ('6381', '178'), ('6024', '178'), ('6585', '178'), ('7419', '178'), ('7202', '187'), ('5330', '187'), ('6381', '187'), ('13818', '187'), ('11557', '187')]
('7202', '187')
top k is  337
no of remaining_new_reviews  14
New Revenue # {'194': 0.3103835291015732, '178': 0.3103835291015732, '187': 0.3033835291015732}
Practical Effect:  0.4550001376968918
Total Loss:  0.017828483586196636
Startng episode # 1
IncBP 5
IncDS 5
IncPR 5
Run GANG ...
Run Prior ...
Run SpEagle ...
Run fBox ...
Run

In [None]:
def plotData(x_axis_data, y_axis_data, x_label, y_label, curve_label=""):
    pyplot.plot(x_axis_data, y_axis_data, label=curve_label)
    pyplot.xlabel(x_label)
    pyplot.ylabel(y_label)


In [None]:
LOSS, PE, all_attack_p, all_detect_q, recall

In [None]:
plotData(range(1, episodes+1), LOSS, "Episodes", "Loss")

In [None]:
plotData(range(1, episodes+1), PE, "Episodes", "PE")

In [None]:
for attack in all_attack_p:
    plotData(range(1, episodes+2), all_attack_p[attack], "Episodes", "Attack P", attack)
pyplot.legend()
pyplot.show()

In [None]:
for detect in all_detect_q:
    plotData(range(1, episodes+2), all_detect_q[detect], "Episodes", "Detect P", detect)
pyplot.legend()
pyplot.show()

In [None]:
loss_detectors = []
detectors_l = []
for detect in detect_q:
    _,_,_,_, LOSS, PE, recall = play_min_max_game(attack_p, {detect: 1}, 1, \
            elite_threshold, 3, 3, learning_rate_attackors, learning_rate_detectors, epsilon, "Testing")
    loss_detectors.append(LOSS)
    detectors_l.append(detect)
    
    print(detect, LOSS, PE)
_,_,_,_, nash_loss, PE, recall = play_min_max_game(attack_p, detect_q, 1, \
            elite_threshold, 3, 3, learning_rate_attackors, learning_rate_detectors, epsilon, "Testing")
loss_detectors.append(nash_loss)
detectors_l.append("nash detect")

In [None]:
plotData(detectors_l, loss_detectors, "Detectors", "Loss")