In [2]:
import ujson as json
import pandas as pd

data_file = 'graphnodes_edgecase_node_9.ndjson'
records = map(json.loads, open(data_file))
df = pd.DataFrame.from_records(records)

In [3]:
neighbor_nodes = []
for col1, row in zip(df["node_id"], df["node"]):
    for d in row:
        for k, v in d.items():
            neighbor_nodes.append({"node_id": col1, "neighbor_nodes": k, "direction": v})
df_1 = pd.DataFrame(neighbor_nodes)
#relevant_nodes_list = tmp["node_id"].unique(). 

In [4]:
transaction_time = []
transaction_list = []
for col1, col2 in zip(df["transaction_val"], df["transaction_time"]):
    for c1 in col1:
        transaction_list.append(c1)
    for c2 in col2:
        transaction_time.append(c2)
df_1["transaction_val"] = transaction_list
df_1["transaction_time"] = transaction_time

df["FA_1_case_1"] = df["labels"].apply(lambda x: x["FA_1_case_1"])
df["FA_1_case_2"] = df["labels"].apply(lambda x: x["FA_1_case_2"]) 

df_final = pd.merge(df_1, df[["node_id","FA_1_case_1", "FA_1_case_2"]], on="node_id", how="left")

In [5]:
# filter out single transation
tmp = (df_final.groupby(["node_id","direction"]).count() == 1).neighbor_nodes.reset_index().rename(columns={"neighbor_nodes":"single_trans"})
tmp = tmp.loc[tmp.single_trans == False].drop(["single_trans"], axis=1)

# create the df_mask dataframe
# mask_1_case_1 checks if it's a "in" relation 
# mask_1_case_2 checks if it's a "out" relation

df_mask = pd.merge(tmp,df_final, on = ["node_id","direction"], how="left")
df_mask["mask_1_case_1"] = df_mask["direction"].apply(lambda x: True if x == "out" else False)
df_mask["mask_1_case_2"] = df_mask["direction"].apply(lambda x: True if x == "in" else False)

# filter out the trans_val > 10000
# mask_2_case_1 checks if sum of all "in" transactions is > 10000
# mask_2_case_2 checks if sum of all "out" transactions is > 10000
df_mask = df_mask[df_mask["transaction_val"] < 10000]

tmp_2 = df_mask.groupby(["node_id","direction"]).sum().transaction_val.reset_index()
tmp_2["threshold_tot_trans_val"] = tmp_2["transaction_val"] > 10000
df_mask = pd.merge(df_mask, tmp_2.drop(["transaction_val"],axis=1), on = ["node_id","direction"], how="left")

df_mask["mask_2_case_1"] = df_mask["mask_1_case_1"] & df_mask["threshold_tot_trans_val"]
df_mask["mask_2_case_2"] = df_mask["mask_1_case_2"] & df_mask["threshold_tot_trans_val"]
#df_mask

In [6]:
def amount_time_check_FA1(df_node, start_index):
    node_flag = False
    end = df_node.shape[0]
    total_amount = 0
    for i in list(range(start_index, end)):
        total_amount = total_amount + df_node['transaction_val'].values[i]
        if total_amount >= 10000:
            time_window = df_node['transaction_time'].values[i] - df_node['transaction_time'].values[start_index]
            if time_window < 172800:
                node_flag = True
                break
            else: 
                start_index = i
                end = end
                node_flag = amount_time_check_FA1(df_node, start_index)
    return node_flag

In [7]:
def flagger_FA1(df_candidates):
    start_index = 0
    #j keeps track of current line in df_candidates to check if next node is same as previous
    j = 0
    node_flags = {}
    #loop over all candidates
    for node in df_candidates['node_id']:
        #check if current node is same as previous node
        prev_node = int(df_candidates['node_id'].values[max(0,j-1)])
        j = j+1
        if node == prev_node:
            pass
        #If new node: Extract Lines of current Node from df_candidates, Sort for time and pass to threshold_time_check funtion
        else:
            df_node = df_candidates[df_candidates['node_id'] == node]
            df_node.sort_values(by='transaction_time')
            df_node = df_node.reset_index(drop=True)
            node_flag = amount_time_check_FA1(df_node, start_index)
            node_flags[node] = node_flag
    return node_flags

In [8]:
case1_candidates = df_mask[df_mask['mask_2_case_1']]
case2_candidates = df_mask[df_mask['mask_2_case_2']]

In [11]:
case1_flags = flagger_FA1(case1_candidates)
case2_flags = flagger_FA1(case2_candidates)

In [12]:
case1_flags

{'0': True, '6': False}

In [13]:
case2_flags

{'11': True, '9': True}

In [14]:
dummy_flags = [False] * df_final.shape[0]
df_final['Case1_Flag'] = dummy_flags
df_final['Case2_Flag'] = dummy_flags
for node in df_final['node_id']:
    #mask = (int(df_final['node_id']) == node)
    #print(node)
    if node in case1_flags:
        df_final.loc[(df_final['node_id'] == node, 'Case1_Flag')] = case1_flags[node]
    if node in case2_flags:
        df_final.loc[(df_final['node_id'] == node, 'Case2_Flag')] = case2_flags[node]

In [15]:
df_final

Unnamed: 0,node_id,neighbor_nodes,direction,transaction_val,transaction_time,FA_1_case_1,FA_1_case_2,Case1_Flag,Case2_Flag
0,0,1,out,9000,1655244000,True,False,True,False
1,0,2,out,5000,1655262000,True,False,True,False
2,0,3,out,6000,1655413200,True,False,True,False
3,1,0,in,9000,1655244000,False,False,False,False
4,1,4,out,15000,1655330400,False,False,False,False
5,1,5,out,1000,1655416800,False,False,False,False
6,2,0,in,5000,1655262000,False,False,False,False
7,3,0,in,6000,1655413200,False,False,False,False
8,3,6,out,5000,1655503200,False,False,False,False
9,4,1,in,15000,1655330400,False,False,False,False
