In [4]:
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
import multiprocessing as mp

from utils import load_data, split_data_by_time, build_nx, normalize_dict
from detecters import do_fraudar, do_rev2, do_rsd

import cvxpy
import networkx as nx

In [32]:
data_nw_df, data_gt_df = load_data(data_name="alpha")
G = build_nx(data_nw_df)
G_nodes = list(G.nodes)

In [35]:
sm = nx.to_scipy_sparse_matrix(G, nodelist = G_nodes)

In [36]:
es = sm.sum()
print(f"number of edge {es} == {len(G.edges)}")

number of edge 24186 == 24186


In [37]:
sm.shape

(7040, 7040)

In [38]:
from specgreedy.greedy import *

In [39]:
w_g = "even"
topk = 10
#scale = args.scale
alpha = 1.0

greedy_func = None

if w_g == 'even':
    greedy_func = avgdeg_even
elif w_g == 'sqrt':
    greedy_func = avgdeg_sqrt
else:
    greedy_func = avgdeg_log

# t0 = time.time()

if not w_g:
    print("max edge weight: {}".format(sm.max()))
    sm = sm > 0
    sm = sm.astype('int')
es = sm.sum()
ms, ns = sm.shape
# print("load graph time @ {}s".format(time.time() - t0))
print("graph: #node: {},  #edge: {}".format((ms, ns), es))
print("matrix max: {}, min: {}, shape: {}\n".format(sm.max(), sm.min(), sm.shape))

graph: #node: (7040, 7040),  #edge: 24186
matrix max: 1, min: 0, shape: (7040, 7040)



In [40]:
opt_k = -1
opt_density = 0.0
orgnds, cans = None, None
fin_pms, fin_pns = 0, 0

k = 0
decom_n = 0

start = 3
step = 3
isbreak = False
# t1 = time.time()
while k < topk:
    print("\ncurrent ks: {}".format(start + decom_n * step))
    U, S, V = linalg.svds(sm.asfptype(), k=start + decom_n * step, which='LM', tol=1e-4)
    U, S, V = U[:, ::-1], S[::-1], V.T[:, ::-1]
    print("lambdas: {}".format(S))
    kth  = k
    while kth < start + decom_n * step - 1 and kth < topk:
        if abs(max(U[:, kth])) < abs(min(U[:, kth])):
            U[:, kth] *= -1
        if abs(max(V[:, kth])) < abs(min(V[:, kth])):
            V[:, kth] *= -1
        row_cans = list(np.where(U[:, kth] >= 1.0 / np.sqrt(ms))[0])
        col_cans = list(np.where(V[:, kth] >= 1.0 / np.sqrt(ns))[0])
        if len(row_cans) <= 1 or len(col_cans) <= 1:
            print("SKIP_ERROR: candidates size: {}".format((len(row_cans), len(col_cans))))
            kth += 1
            k += 1
            continue
        sm_part = sm[row_cans, :][:, col_cans]
        nds_gs, avgsc_gs = greedy_func(sm_part, alpha)
        print("k_cur: {} size: {}, density: {}".format(kth, (len(nds_gs[0]), len(nds_gs[1])), 
                                                              avgsc_gs))
        kth += 1
        k += 1
        if avgsc_gs > opt_density:
            opt_k, opt_density = kth + 1, avgsc_gs
            (sm_pms, sm_pns) = sm_part.shape
            fin_pms, fin_pns = len(nds_gs[0]), len(nds_gs[1])
            print("+++=== svd tops shape (candidates size): {}".format((sm_pms, sm_pns)))
            print("+++=== size: {}, score: {}\n".format((fin_pms, fin_pns), avgsc_gs))

            row_idx = dict(zip(range(sm_pms), sorted(row_cans)))
            col_idx = dict(zip(range(sm_pns), sorted(col_cans)))
            org_rownds = [row_idx[id] for id in nds_gs[0]]
            org_calnds = [col_idx[id] for id in nds_gs[1]]
            cans = [row_cans, col_cans]
            orgnds = [org_rownds, org_calnds]

        if 2.0 * opt_density >= S[kth]: # kth < topk and
            print("k_cur = {},  optimal density: {}, compare: {}".format(kth, opt_density, S[kth]))
            isbreak = True
            break
    if isbreak:
        break
    decom_n += 1

print("\noptimal k: {}, density: {}".format(opt_k, opt_density))    
# print("total time @ {}s".format(time.time() - t1))


current ks: 3
lambdas: [42.35891874 23.69143501 21.83563143]
k_cur: 0 size: (136, 133), density: 14.059479553903346
+++=== svd tops shape (candidates size): (434, 472)
+++=== size: (136, 133), score: 14.059479553903346

k_cur = 1,  optimal density: 14.059479553903346, compare: 23.6914350087886

optimal k: 2, density: 14.059479553903346


In [49]:
max(cans[0])

3875

In [45]:
us = [G_nodes[i] for i in orgnds[0]]
up = [G_nodes[i] for i in orgnds[1]]

In [46]:
up

['p1',
 'p11',
 'p35',
 'p44',
 'p10',
 'p89',
 'p20',
 'p87',
 'p142',
 'p156',
 'p9',
 'p22',
 'p15',
 'p255',
 'p4',
 'p113',
 'p249',
 'p96',
 'p118',
 'p146',
 'p154',
 'p158',
 'p115',
 'p174',
 'p124',
 'p95',
 'p116',
 'p491',
 'p67',
 'p29',
 'p123',
 'p117',
 'p7603',
 'p75',
 'p72',
 'p90',
 'p155',
 'p42',
 'p2',
 'p37',
 'p40',
 'p185',
 'p188',
 'p31',
 'p8',
 'p47',
 'p17',
 'p145',
 'p107',
 'p24',
 'p7',
 'p66',
 'p21',
 'p14',
 'p13',
 'p19',
 'p51',
 'p77',
 'p133',
 'p70',
 'p211',
 'p272',
 'p140',
 'p106',
 'p43',
 'p5',
 'p111',
 'p16',
 'p5342',
 'p85',
 'p3',
 'p219',
 'p93',
 'p25',
 'p52',
 'p97',
 'p83',
 'p39',
 'p65',
 'p7552',
 'p798',
 'p92',
 'p6',
 'p207',
 'p103',
 'p34',
 'p56',
 'p120',
 'p32',
 'p105',
 'p197',
 'p36',
 'p30',
 'p136',
 'p7595',
 'p88',
 'p163',
 'p244',
 'p295',
 'p12',
 'p203',
 'p288',
 'p49',
 'p2336',
 'p205',
 'p27',
 'p135',
 'p173',
 'p125',
 'p266',
 'p100',
 'p53',
 'p62',
 'p172',
 'p7604',
 'p159',
 'p177',
 'p26',
 'p5