In [1]:
%run random_attack.py --data alpha --alg fraudar

Namespace(alg='fraudar', data='alpha')


In [2]:
from UGFraud.Utils.helper import *
from UGFraud.Detector.Fraudar import *

In [39]:
def listToSparseMatrix(edgesSource, edgesDest):
    m = max(edgesSource) + 1
    n = max(edgesDest) + 1
    M = sparse.coo_matrix(([1] * len(edgesSource), (edgesSource, edgesDest)), shape=(m, n))
    M1 = M > 0
    return M1.astype('int')
def runFraudar(graph, multiple=0):
    new_upriors = node_attr_filter(graph, 'types', 'user', 'prior')
    new_rpriors = edge_attr_filter(graph, 'types', 'review', 'prior')
    # print('Start detection on the new graph with Fraudar')
    user_to_product = {}
    prod_to_user = {}
    u_id_dict = node_attr_filter(graph, 'types', 'user', 'types')
    for u_id in u_id_dict.keys():
        if u_id not in user_to_product:
            user_to_product[u_id] = []
        for p_id in graph[u_id].keys():
            if p_id not in prod_to_user:
                prod_to_user[p_id] = []
                user_to_product[u_id].append(p_id)
                prod_to_user[p_id].append(u_id)
    u_id2idx = {}
    p_id2idx = {}
    idx2u_id = {}
    idx2p_id = {}
    i = 0
    for u_id in user_to_product.keys():
        u_id2idx[u_id] = i
        idx2u_id[i] = u_id
        i += 1

    i = 0
    for p_id in prod_to_user.keys():
        p_id2idx[p_id] = i
        idx2p_id[i] = p_id
        i += 1

    edgesSource = []
    edgesDest = []
    for u_id in u_id_dict.keys():
        for p_id in graph[u_id].keys():
            edgesSource.append(u_id2idx[u_id])
            edgesDest.append(p_id2idx[p_id])
    M = listToSparseMatrix(edgesSource, edgesDest)
    # print("finished reading data ")

    if multiple == 0:
        # detect all dense blocks 
        res = detect_blocks(M, logWeightedAveDegree)
    else:
        # detect the top #multiple dense blocks
        res = detectMultiple(M, logWeightedAveDegree, multiple)

    detected_users = {}
    weight_dict = {}
    for lwRes in res:
        detected_u_idx = lwRes[0][0]
        detected_p_idx = lwRes[0][1]
        weight = lwRes[1]
        weight_dict[weight] = weight
        for i in detected_u_idx:
            uid_tmp = idx2u_id[i]
            if uid_tmp not in detected_users.keys():
                detected_users[uid_tmp] = weight

    max_den = res[0][1]
    min_den = res[-1][1]
    den_interval = max_den - min_den

    ranked_rpriors = [(review, new_rpriors[review]) for review in new_rpriors.keys()]
    ranked_rpriors = sorted(ranked_rpriors, reverse=True, key=lambda x: x[1])
    r_max, r_mean, r_min = ranked_rpriors[0][1], ranked_rpriors[int(len(ranked_rpriors) / 2)][1], ranked_rpriors[-1][1]
    aux_rpriors = cp.deepcopy(new_rpriors)
    for i, p in aux_rpriors.items():
        if r_max - r_min == 0:
            new_rpriors[i] = 0
        else:
            new_rpriors[i] = (p - r_min) / (r_max - r_min)

    user_density = {}
    for u in new_upriors.keys():
        if u in detected_users.keys():
            user_density[u] = (detected_users[u] - min_den) / den_interval
        else:
            user_density[u] = 1e-6

    user_prob = {}
    review_prob = {}
    for review in new_rpriors.keys():
        review_prob.update({review: 1e-6})
        user_prob.update({review[0]: 1e-6})
    print(len(detected_users))
#     print(detected_users['302'])

    for user in detected_users.keys():
        user_prob.update({user: user_density[user]})
        for prod in graph[user].keys():
            review_prob.update({(user, prod): user_density[user]})

    return user_prob, review_prob

In [45]:
from UGFraud.Detector.fBox import *
def runfBox(graph, t, k):
    user_priors = node_attr_filter(graph, 'types', 'user', 'prior')
    review_priors = edge_attr_filter(graph, 'types', 'review', 'prior')

    # run fBox
    model = fBox(graph)
    num_detected_users = []

    detected_users_by_degree, detected_products_by_degree = model.run(t, k)
    detected_users = set()
    for d, user_list in detected_users_by_degree.items():
        detected_users.update([u for u in user_list])

    num_detected_users.append(len(detected_users))

    detected_products = set()
    for d, prod_list in detected_products_by_degree.items():
        detected_products.update([p for p in prod_list])

    result_uid = []
    user_prob = {}  # result_prob means user_prob
    review_prob = {}
    for u, v in user_priors.items():
        result_uid.append(u)
        if u in detected_users:
            user_prob.update({u: user_priors.get(u)})
        else:
            user_prob.update({u: 1e-7})

    for user_prod in graph.edges:
        if user_prod[0] in detected_users:
            review_prob[(user_prod[0], user_prod[1])] = review_priors.get((user_prod[0], user_prod[1]))
        else:
            review_prob[(user_prod[0], user_prod[1])] = 0

    return user_prob, review_prob

In [3]:
review_ground_truth = edge_attr_filter(G, 'types', 'review', 'label')

In [4]:
review_ground_truth

{}

In [5]:
G_test = load_graph("../../UGFraud/UGFraud/Demo/Yelp_graph_data.json")

Loaded ../../UGFraud/UGFraud/Demo/Yelp_graph_data.json into the nextorkx graph


In [6]:
l = [G_test.nodes[n]["types"] for n in G_test.nodes]
set(l)

{'prod', 'user'}

In [7]:
G_test.edges[("201", "0")]

{'rating': 1,
 'label': 1,
 'date': 'None',
 'prior': 0.35048557119705304,
 'types': 'review'}

In [8]:
G_test.nodes["0"]

{'prior': 0.3951113464084369, 'types': 'prod'}

In [9]:
G.nodes["u7188"]

{}

In [59]:
G2 = G.copy()
nodes_update = {
    n: {
        "types": "user" if n[0] == "u" else "prod",
        "prior": 0,
       }
    for n in G2.nodes
}

edges_update = {
    e: {
        "prior": 0,
        "types": "review",
    }
    for e in G2.edges
}

nx.set_node_attributes(G2, nodes_update)
nx.set_edge_attributes(G2, edges_update)

for n, label in zip("u" + data_gt_df["id"].astype(str), data_gt_df["label"]):
    if n in G2.nodes:
        G2.nodes[n]["label"] = (label + 1) / 2
        for e in G2.out_edges("u3"):
            G2.edges[e]["label"] = (label + 1) / 2

In [60]:
import copy as cp
userBelief, reviewBelief = runFraudar(G2, multiple=0)

1969


In [61]:
userBelief["u3"]

1.0

In [62]:
userBelief, reviewBelief = runfBox(G2, t=0.5, k=70)
print(max([userBelief[u] for u in userBelief]))
print(min([reviewBelief[u] for u in reviewBelief]))

Finished run in 0.093 secs
1e-07
0


In [63]:
userBelief["u3"]

1e-07

In [155]:
np.random.seed(0)
import uuid
np.random.randint(0, 10)

5