In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
# Set display option to show all columns
pd.set_option('display.max_columns', None)

# 1. Data Loading

# 1.1 Read CSV

In [2]:
df = pd.read_csv('SAML-D.csv')

In [3]:
df.head()

Unnamed: 0,Time,Date,Sender_account,Receiver_account,Amount,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type
0,10:35:19,2022-10-07,8724731955,2769355426,1459.15,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits
1,10:35:20,2022-10-07,1491989064,8401255335,6019.64,UK pounds,Dirham,UK,UAE,Cross-border,0,Normal_Fan_Out
2,10:35:20,2022-10-07,287305149,4404767002,14328.44,UK pounds,UK pounds,UK,UK,Cheque,0,Normal_Small_Fan_Out
3,10:35:21,2022-10-07,5376652437,9600420220,11895.0,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In
4,10:35:21,2022-10-07,9614186178,3803336972,115.25,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9504852 entries, 0 to 9504851
Data columns (total 12 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   Time                    object 
 1   Date                    object 
 2   Sender_account          int64  
 3   Receiver_account        int64  
 4   Amount                  float64
 5   Payment_currency        object 
 6   Received_currency       object 
 7   Sender_bank_location    object 
 8   Receiver_bank_location  object 
 9   Payment_type            object 
 10  Is_laundering           int64  
 11  Laundering_type         object 
dtypes: float64(1), int64(3), object(8)
memory usage: 870.2+ MB


## 1.2 Data Checking

# 2. Data Cleaning

## 2.1 Handle Null Values

In [5]:
df.isna().sum()

Time                      0
Date                      0
Sender_account            0
Receiver_account          0
Amount                    0
Payment_currency          0
Received_currency         0
Sender_bank_location      0
Receiver_bank_location    0
Payment_type              0
Is_laundering             0
Laundering_type           0
dtype: int64

# 3. Feature Engineering

## 3.1 Create A New Column Called 'Different_Currency'

In [6]:
def is_different_currency(Payment_currency, Received_currency):
    if Payment_currency == Received_currency:
        return "No"
    else:
        return "Yes"

df['Different_Currency'] = np.vectorize(is_different_currency)(df['Payment_currency'], df['Received_currency'])

In [7]:
df.head()

Unnamed: 0,Time,Date,Sender_account,Receiver_account,Amount,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency
0,10:35:19,2022-10-07,8724731955,2769355426,1459.15,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No
1,10:35:20,2022-10-07,1491989064,8401255335,6019.64,UK pounds,Dirham,UK,UAE,Cross-border,0,Normal_Fan_Out,Yes
2,10:35:20,2022-10-07,287305149,4404767002,14328.44,UK pounds,UK pounds,UK,UK,Cheque,0,Normal_Small_Fan_Out,No
3,10:35:21,2022-10-07,5376652437,9600420220,11895.0,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In,No
4,10:35:21,2022-10-07,9614186178,3803336972,115.25,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No


## 3.2 Create a New Column Called 'Amount_USD'

In [8]:
df['Payment_currency'].unique()

array(['UK pounds', 'Indian rupee', 'Albanian lek', 'Swiss franc',
       'Pakistani rupee', 'Naira', 'Yen', 'Euro', 'Dirham',
       'Mexican Peso', 'Turkish lira', 'US dollar', 'Moroccan dirham'],
      dtype=object)

In [9]:
def get_us_dollar(Amount, Payment_currency):
    # Exchange rate around 12 FEB 2024
    exchange_rate_dict = {
        'UK pounds':1.2636,
        'Indian rupee':0.012,
        'Albanian lek':0.0104,
        'Swiss franc':1.1441,
        'Pakistani rupee':0.0036,
        'Naira':0.00069,
        'Yen':0.0067,
        'Euro':1.0796,
        'Dirham':0.2723,
        'Mexican Peso':0.0586,
        'Turkish lira':0.0326,
        'US dollar':1,
        'Moroccan dirham':0.1
    }

    us_dollar = Amount * exchange_rate_dict[Payment_currency]
    return us_dollar

df['Amount_USD'] = np.vectorize(get_us_dollar)(df['Amount'], df['Payment_currency'])


In [10]:
df.head()

Unnamed: 0,Time,Date,Sender_account,Receiver_account,Amount,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency,Amount_USD
0,10:35:19,2022-10-07,8724731955,2769355426,1459.15,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No,1843.78194
1,10:35:20,2022-10-07,1491989064,8401255335,6019.64,UK pounds,Dirham,UK,UAE,Cross-border,0,Normal_Fan_Out,Yes,7606.417104
2,10:35:20,2022-10-07,287305149,4404767002,14328.44,UK pounds,UK pounds,UK,UK,Cheque,0,Normal_Small_Fan_Out,No,18105.416784
3,10:35:21,2022-10-07,5376652437,9600420220,11895.0,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In,No,15030.522
4,10:35:21,2022-10-07,9614186178,3803336972,115.25,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No,145.6299


## 3.3 Create a New Columns Called 'Year', 'Month', 'Day'

In [11]:
df[['Year', 'Month', 'Day']] = df['Date'].str.split('-', expand=True).astype(int)

In [12]:
df.head()

Unnamed: 0,Time,Date,Sender_account,Receiver_account,Amount,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency,Amount_USD,Year,Month,Day
0,10:35:19,2022-10-07,8724731955,2769355426,1459.15,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No,1843.78194,2022,10,7
1,10:35:20,2022-10-07,1491989064,8401255335,6019.64,UK pounds,Dirham,UK,UAE,Cross-border,0,Normal_Fan_Out,Yes,7606.417104,2022,10,7
2,10:35:20,2022-10-07,287305149,4404767002,14328.44,UK pounds,UK pounds,UK,UK,Cheque,0,Normal_Small_Fan_Out,No,18105.416784,2022,10,7
3,10:35:21,2022-10-07,5376652437,9600420220,11895.0,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In,No,15030.522,2022,10,7
4,10:35:21,2022-10-07,9614186178,3803336972,115.25,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No,145.6299,2022,10,7


## 3.4 Create a New Columns Called 'Hour', 'Minutes'

In [13]:
# Assuming 'Time' column is a string column
hour_minute = df['Time'].str.split(':')

# Extracting Hour and Minute
df['Hour'] = hour_minute.str[0]
df['Minute'] = hour_minute.str[1]

# Converting to integer
df['Hour'] = df['Hour'].astype(int)
df['Minute'] = df['Minute'].astype(int)

In [14]:
df.head()

Unnamed: 0,Time,Date,Sender_account,Receiver_account,Amount,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency,Amount_USD,Year,Month,Day,Hour,Minute
0,10:35:19,2022-10-07,8724731955,2769355426,1459.15,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No,1843.78194,2022,10,7,10,35
1,10:35:20,2022-10-07,1491989064,8401255335,6019.64,UK pounds,Dirham,UK,UAE,Cross-border,0,Normal_Fan_Out,Yes,7606.417104,2022,10,7,10,35
2,10:35:20,2022-10-07,287305149,4404767002,14328.44,UK pounds,UK pounds,UK,UK,Cheque,0,Normal_Small_Fan_Out,No,18105.416784,2022,10,7,10,35
3,10:35:21,2022-10-07,5376652437,9600420220,11895.0,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In,No,15030.522,2022,10,7,10,35
4,10:35:21,2022-10-07,9614186178,3803336972,115.25,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No,145.6299,2022,10,7,10,35


In [15]:
# rearange columns
df = df[['Time', 'Hour', 'Minute', 'Date','Year', 'Month',
       'Day', 'Sender_account', 'Receiver_account', 'Amount', 'Amount_USD',
       'Payment_currency', 'Received_currency', 'Sender_bank_location',
       'Receiver_bank_location', 'Payment_type', 'Is_laundering',
       'Laundering_type', 'Different_Currency']]

In [16]:
df.head()

Unnamed: 0,Time,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency
0,10:35:19,10,35,2022-10-07,2022,10,7,8724731955,2769355426,1459.15,1843.78194,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No
1,10:35:20,10,35,2022-10-07,2022,10,7,1491989064,8401255335,6019.64,7606.417104,UK pounds,Dirham,UK,UAE,Cross-border,0,Normal_Fan_Out,Yes
2,10:35:20,10,35,2022-10-07,2022,10,7,287305149,4404767002,14328.44,18105.416784,UK pounds,UK pounds,UK,UK,Cheque,0,Normal_Small_Fan_Out,No
3,10:35:21,10,35,2022-10-07,2022,10,7,5376652437,9600420220,11895.0,15030.522,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In,No
4,10:35:21,10,35,2022-10-07,2022,10,7,9614186178,3803336972,115.25,145.6299,UK pounds,UK pounds,UK,UK,Cash Deposit,0,Normal_Cash_Deposits,No


## 3.5 Create a Columns Called 'Transaction_id'

In [17]:
import pandas as pd

# Assuming your DataFrame is named df
df['Transaction_id'] = 'T' + (df.index + 1).astype(str)

In [18]:
# rearange columns
df = df[['Transaction_id', 'Time', 'Hour', 'Minute', 'Date','Year', 'Month',
       'Day', 'Sender_account', 'Receiver_account', 'Amount', 'Amount_USD',
       'Payment_currency', 'Received_currency', 'Sender_bank_location',
       'Receiver_bank_location', 'Payment_type', 'Is_laundering',
       'Laundering_type', 'Different_Currency']]

In [19]:
df.tail()

Unnamed: 0,Transaction_id,Time,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency
9504847,T9504848,10:57:01,10,57,2023-08-23,2023,8,23,2453933570,519744068,2247.25,2839.6251,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Small_Fan_Out,No
9504848,T9504849,10:57:06,10,57,2023-08-23,2023,8,23,9805510177,5416607878,927.18,1171.584648,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Small_Fan_Out,No
9504849,T9504850,10:57:06,10,57,2023-08-23,2023,8,23,7282330957,2995527149,1455.14,1838.714904,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Small_Fan_Out,No
9504850,T9504851,10:57:11,10,57,2023-08-23,2023,8,23,940337377,4812815165,25995.7,32848.16652,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_In,No
9504851,T9504852,10:57:12,10,57,2023-08-23,2023,8,23,105185176,6824994831,9586.08,12112.970688,UK pounds,UK pounds,UK,UK,ACH,0,Normal_Fan_Out,No


## 3.6 Create Column 'Last_transaction_time_elapsed_minutes'

In [20]:
# Concatenate 'Date' and 'Time' columns and convert to datetime
df['Timestamp'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])

In [21]:
# rearange columns
df = df[['Transaction_id','Timestamp', 'Time', 'Hour', 'Minute', 'Date','Year', 'Month',
       'Day', 'Sender_account', 'Receiver_account', 'Amount', 'Amount_USD',
       'Payment_currency', 'Received_currency', 'Sender_bank_location',
       'Receiver_bank_location', 'Payment_type', 'Is_laundering',
       'Laundering_type', 'Different_Currency']]

In [22]:
df = df.sort_values(by=['Sender_account', 'Timestamp'])

In [23]:
import pandas as pd

# Assuming your dataframe is named df and is already sorted by 'Sender_account' and 'Timestamp'

# Calculate the time elapsed between transactions for each sender account
df['Last_transaction_time_elapsed_minutes'] = df.groupby('Sender_account')['Timestamp'].diff().dt.total_seconds() / 60

# For the first transaction of each sender account, the time elapsed will be NaN. You can fill it with appropriate values if needed.
# For example, you can fill NaN values with 0 if you want to consider the time elapsed from the beginning.
df['Last_transaction_time_elapsed_minutes'] = df['Last_transaction_time_elapsed_minutes'].fillna(0)

In [24]:
# rearange columns
df = df[['Transaction_id','Timestamp','Last_transaction_time_elapsed_minutes',
         'Time', 'Hour', 'Minute', 'Date','Year', 'Month',
        'Day', 'Sender_account', 'Receiver_account', 'Amount', 'Amount_USD',
       'Payment_currency', 'Received_currency', 'Sender_bank_location',
       'Receiver_bank_location', 'Payment_type', 'Is_laundering',
       'Laundering_type', 'Different_Currency']]

In [25]:
df.head()

Unnamed: 0,Transaction_id,Timestamp,Last_transaction_time_elapsed_minutes,Time,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency
8572082,T8572083,2023-07-22 09:51:28,0.0,09:51:28,9,51,2023-07-22,2023,7,22,9018,2388293593,3319.06,4193.964216,UK pounds,Euro,UK,Germany,Cross-border,0,Normal_Foward,Yes
3210514,T3210515,2023-01-24 23:28:15,0.0,23:28:15,23,28,2023-01-24,2023,1,24,28511,3072405466,6371.25,8050.7115,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No
4191567,T4191568,2023-02-24 23:31:38,44643.383333,23:31:38,23,31,2023-02-24,2023,2,24,28511,3072405466,3878.0,4900.2408,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No
5018226,T5018227,2023-03-24 20:51:59,40160.35,20:51:59,20,51,2023-03-24,2023,3,24,28511,3072405466,4109.92,5193.294912,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No
5938108,T5938109,2023-04-24 19:38:10,44566.183333,19:38:10,19,38,2023-04-24,2023,4,24,28511,3072405466,7147.58,9031.682088,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No


In [26]:
mask = df['Is_laundering'] == 1
df[mask]

Unnamed: 0,Transaction_id,Timestamp,Last_transaction_time_elapsed_minutes,Time,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency
3997063,T3997064,2023-02-18 23:23:58,723.800000,23:23:58,23,23,2023-02-18,2023,2,18,92172,6611372120,10690.147860,111.177538,Albanian lek,UK pounds,Albania,UK,Cross-border,1,Layered_Fan_Out,Yes
4046983,T4046984,2023-02-20 15:12:06,919.566667,15:12:06,15,12,2023-02-20,2023,2,20,92172,9886773415,10163.542406,105.700841,Albanian lek,UK pounds,Albania,UK,Cross-border,1,Layered_Fan_Out,Yes
2700829,T2700830,2023-01-07 16:22:38,14572.983333,16:22:38,16,22,2023-01-07,2023,1,7,155434,7748494288,7605.210000,9609.943356,UK pounds,US dollar,UK,USA,Cross-border,1,Structuring,Yes
706710,T706711,2022-10-31 22:39:39,1779.250000,22:39:39,22,39,2022-10-31,2022,10,31,1021972,3575435324,7988.530000,10094.306508,UK pounds,Euro,UK,Pakistan,Cross-border,1,Deposit-Send,Yes
1711243,T1711244,2022-12-04 18:12:07,2687.300000,18:12:07,18,12,2022-12-04,2022,12,4,1021972,2433989556,20821.230000,26309.706228,UK pounds,UK pounds,UK,UK,Debit card,1,Deposit-Send,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7130349,T7130350,2023-06-03 22:42:38,694.300000,22:42:38,22,42,2023-06-03,2023,6,3,9990989594,7286271407,23081.230000,29165.442228,UK pounds,UK pounds,UK,UK,ACH,1,Scatter-Gather,No
1810913,T1810914,2022-12-08 08:10:29,26117.666667,08:10:29,8,10,2022-12-08,2022,12,8,9992249143,5980835605,4477.640000,5657.945904,UK pounds,UK pounds,UK,UK,Credit card,1,Cycle,No
5982166,T5982167,2023-04-26 08:32:50,1395.883333,08:32:50,8,32,2023-04-26,2023,4,26,9992526432,9254521765,4004.440000,5060.010384,UK pounds,UK pounds,UK,UK,Cash Deposit,1,Deposit-Send,No
8492195,T8492196,2023-07-19 16:02:26,1468.166667,16:02:26,16,2,2023-07-19,2023,7,19,9994647302,5336032152,4500.120000,5686.351632,UK pounds,Turkish lira,UK,Turkey,Cross-border,1,Structuring,Yes


## 3.7 Finally Save DataFrame as feather

In [27]:
import pandas as pd

# Assuming your DataFrame is named 'launder_df'
# Save DataFrame to a pickle file
df.to_feather('df.feather')

In [28]:
# Load DataFrame from the pickle file
df = pd.read_feather('df.feather')

In [29]:
df.head()

Unnamed: 0,Transaction_id,Timestamp,Last_transaction_time_elapsed_minutes,Time,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency
8572082,T8572083,2023-07-22 09:51:28,0.0,09:51:28,9,51,2023-07-22,2023,7,22,9018,2388293593,3319.06,4193.964216,UK pounds,Euro,UK,Germany,Cross-border,0,Normal_Foward,Yes
3210514,T3210515,2023-01-24 23:28:15,0.0,23:28:15,23,28,2023-01-24,2023,1,24,28511,3072405466,6371.25,8050.7115,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No
4191567,T4191568,2023-02-24 23:31:38,44643.383333,23:31:38,23,31,2023-02-24,2023,2,24,28511,3072405466,3878.0,4900.2408,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No
5018226,T5018227,2023-03-24 20:51:59,40160.35,20:51:59,20,51,2023-03-24,2023,3,24,28511,3072405466,4109.92,5193.294912,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No
5938108,T5938109,2023-04-24 19:38:10,44566.183333,19:38:10,19,38,2023-04-24,2023,4,24,28511,3072405466,7147.58,9031.682088,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No


# 4. Graph Construction

## 4.1 Create all unique account nodes

In [71]:
launder_graph = nx.DiGraph()

In [72]:
all_account = pd.concat([df['Sender_account'], df['Receiver_account']])
print(f"Total Unique Account: {all_account.nunique()}")

Total Unique Account: 855460


In [73]:
launder_graph.add_nodes_from(all_account)
del all_account

In [74]:
print(f"Number of nodes: {len(launder_graph.nodes())}")

Number of nodes: 855460


## 4.2 Create Transaction Edges

In [75]:
df.columns

Index(['Transaction_id', 'Time', 'Hour', 'Minute', 'Date', 'Year', 'Month',
       'Day', 'Sender_account', 'Receiver_account', 'Amount', 'Amount_USD',
       'Payment_currency', 'Received_currency', 'Sender_bank_location',
       'Receiver_bank_location', 'Payment_type', 'Is_laundering',
       'Laundering_type', 'Different_Currency'],
      dtype='object')

In [76]:
count = 1
for index, row in df.iterrows():
    transaction_id = row['Transaction_id']
    source_node = row['Sender_account']
    target_node = row['Receiver_account']
    amount_usd = row['Amount_USD']

    if launder_graph.has_edge(source_node, target_node) == False:
        launder_graph.add_edge(source_node, target_node, relationship="PERFORM_TRANSACTION",
                              Transaction_ID=[transaction_id],
                              Sender_Account=source_node,
                              Receiver_Account=target_node,
                              Total_Amount_USD=amount_usd,
                              Total_Transactions=1,
                              )
    else:
        # if edge already exists, update the data to its respective list
        #print('Updating..')
        edge_data = launder_graph.edges[source_node, target_node]
        edge_data['Transaction_ID'].append(transaction_id)
        edge_data['Total_Amount_USD'] += amount_usd
        edge_data['Total_Transactions'] += 1
        #print(edge_data)
        
    # print(f"Operation number {count} is done.\n")
    # count += 1



In [77]:
len(launder_graph.edges())

887497

In [78]:
import networkx as nx

# Assuming launder_graph is your NetworkX graph
# Save the graph as a pickle file
nx.write_gpickle(launder_graph, "launder_graph.pickle")

In [2]:
import pickle

# Load the graph from the pickle file
with open("launder_graph.pickle", "rb") as f:
    launder_graph = pickle.load(f)

# 5. Graph Features Extraction

In [3]:
def get_subgraph(graph, target_node, radius):
    # Get neighbors in the first hop
    first_hop_neighbors = set(nx.all_neighbors(graph, target_node))
    
    # Get neighbors in the subsequent hops up to the specified radius
    neighbors = set(first_hop_neighbors).union({target_node})

    if radius > 1:
        for _ in range(radius-1):
            next_hop_neighbors = set()
            for node in neighbors:
                next_hop_neighbors.update(nx.all_neighbors(graph, node))
            neighbors.update(next_hop_neighbors)

    subgraph = graph.subgraph(neighbors)

    return subgraph

def get_ego_graph(graph, target_node):
    # Get neighbors in the first hop
    first_hop_neighbors = set(nx.all_neighbors(graph, target_node))
    
    # Get neighbors in the subsequent hops up to the specified radius
    neighbors = set(first_hop_neighbors).union({target_node})
    subgraph = graph.subgraph(neighbors)
    return subgraph

def get_egored_subgraph(graph, target_node):
    # Get neighbors in the first hop
    first_hop_neighbors = set(nx.all_neighbors(graph, target_node))
    
    # Initialize set to store nodes connected only with target_node
    single_edge_neighbors = set()

    # Identify nodes connected only with target_node by a single edge
    for neighbor in first_hop_neighbors:
        if graph.degree(neighbor) == 1:
            single_edge_neighbors.add(neighbor)
    
    # Remove single-edge neighbors from the first hop neighbors
    reduced_neighbors = first_hop_neighbors - single_edge_neighbors
    
    # Add the target_node to the reduced neighbors
    reduced_neighbors.add(target_node)
    
    # Create the egored subgraph
    subgraph = graph.subgraph(reduced_neighbors)
    
    return subgraph
    

def get_graph_features(graph, account_node, cycle_subgraph_radius=20, verbose=True, show_graph=True):
    '''
    Part 1: Egonet Features:
    f_degree_in’: indegree of the node in egonet subgraph

    ‘f_degree_out’: outdegree of the node in egonet subgraph
    
    ‘f_amount_in’: total amount received by node from neighbors in egonet subgraph
    
    ‘f_amount_out’: total amount sent by node to neighbors in egonet subgraph
    
    ‘f_nr_trans_in’: number of transactions to the node in egonet subgraph
    
    ‘f_nr_trans_out’: number of transactions from the node in egonet subgraph
    
    ‘f_ego_nr_nodes’: number of nodes in the egonet
    
    ‘f_ego_nr_edges’: number of edges in the egonet

    'f_average_amount_in’: amount-in divided by the number of transactions in egonet subgraph

    ‘f_average_amount_out’: same, for amount-out

    ‘f_ego_edge_density’: number of egonet edges divided by number of egonet nodes


    Part 2: Egored Features:

    ‘f_egored_degree_in’

    ‘f_egored_degree_out’
    
    ‘f_egored_amount_in’
    
    ‘f_egored_amount_out’
    
    ‘f_egored_nr_trans_in’
    
    ‘f_egored_nr_trans_out’
    
    ‘f_egored_nr_nodes’
    
    ‘f_egored_nr_edges’

    ‘f_egored_average_amount_in’: same as above, but in egored context

    ‘f_egored_average_amount_out’:

    ‘f_egored_edge_density’: same, for egored
    
    ‘f_egored_degree_in_rel’: egored indegree divided by indegree
    
    ‘f_egored_degree_out_rel’: same, for outdegree
    
    ‘f_egored_amount_in_rel’: egored amount-in divided by amount-in
    
    ‘f_egored_amount_out_rel’: same, for amount-out
    
    ‘f_egored_average_amount_in_rel’: egored average amount-in divided by average amount-in
    
    ‘f_egored_average_amount_out_rel’: same, for amount-out
    
    ‘f_egored_nr_nodes_rel’: number of nodes in egored divided by number of nodes in egonet
    
    ‘f_egored_nr_edges_rel’: same, for edges

    '''

    ego_subgraph = get_ego_graph(graph, account_node)
    egored_subgraph = get_egored_subgraph(ego_subgraph, account_node)

    # ------------------------------------------------------------------------------------------------------------------------------
    # Part 1: Get Egonet Features 
    # ------------------------------------------------------------------------------------------------------------------------------  
    
    # Calculate out-degree nodes (successors) and in-degree nodes (predecessors)
    egonet_out_degree_nodes = set(ego_subgraph.successors(account_node))
    egonet_in_degree_nodes = set(ego_subgraph.predecessors(account_node))

    # 1. Get f_degree_in
    f_degree_in = len(egonet_in_degree_nodes)

    # 2. Get f_degree_out
    f_degree_out = len(egonet_out_degree_nodes)

    # 3. Get f_amount_out and f_nr_trans_in
    f_amount_in = 0
    f_nr_trans_in = 0
    for node in egonet_in_degree_nodes:
        f_amount_in += ego_subgraph[node][account_node]['Total_Amount_USD']
        f_nr_trans_in += ego_subgraph[node][account_node]['Total_Transactions']

    # 4. Get f_amount_in f_nr_trans_out
    f_amount_out = 0
    f_nr_trans_out = 0
    for node in egonet_out_degree_nodes:
        f_amount_out += ego_subgraph[account_node][node]['Total_Amount_USD']
        f_nr_trans_out += ego_subgraph[account_node][node]['Total_Transactions']

    # 5. Get f_ego_nr_nodes
    f_ego_nr_nodes = len(ego_subgraph.nodes())
    
    # 6. Get f_ego_nr_edges
    f_ego_nr_edges = len(ego_subgraph.edges())

    # 7. f_average_amount_in
    f_average_amount_in = (f_amount_in / f_nr_trans_in) if f_nr_trans_in > 0 else 0

    # 8. Get f_average_amount_out
    f_average_amount_out = (f_amount_out / f_nr_trans_out) if f_nr_trans_out > 0 else 0
    
    # 9. Get f_ego_edge_density
    f_ego_edge_density = f_ego_nr_edges / f_ego_nr_nodes

    # ------------------------------------------------------------------------------------------------------------------------------
    # Part 2: Get Egored Features 
    # ------------------------------------------------------------------------------------------------------------------------------
    
    # Calculate out-degree nodes (successors) and in-degree nodes (predecessors)
    egored_out_degree_nodes = set(egored_subgraph.successors(account_node))
    egored_in_degree_nodes = set(egored_subgraph.predecessors(account_node))

    # 1. Get f_egored_degree_in
    f_egored_degree_in = len(egored_in_degree_nodes)

    # 2. Get f_egored_degree_out
    f_egored_degree_out = len(egored_out_degree_nodes)

    # 3. Get f_egored_amount_in and f_nr_trans_in
    f_egored_amount_in = 0
    f_egored_nr_trans_in = 0
    for node in egored_in_degree_nodes:
        f_egored_amount_in += egored_subgraph[node][account_node]['Total_Amount_USD']
        f_egored_nr_trans_in += egored_subgraph[node][account_node]['Total_Transactions']

    # 4. Get f_amount_in f_nr_trans_out
    f_egored_amount_out = 0
    f_egored_nr_trans_out = 0
    for node in egored_out_degree_nodes:
        f_egored_amount_out += egored_subgraph[account_node][node]['Total_Amount_USD']
        f_egored_nr_trans_out += egored_subgraph[account_node][node]['Total_Transactions']

    # 5. Get f_egored_nr_nodes
    f_egored_nr_nodes = len(egored_subgraph.nodes())

    # 6. Get f_egored_nr_edges
    f_egored_nr_edges = len(egored_subgraph.edges())

    # 7. Get f_egored_average_amount_in
    f_egored_average_amount_in = (f_egored_amount_in / f_egored_nr_trans_in) if f_egored_nr_trans_in > 0 else 0
    
    # 8. Get f_egored_average_amount_out
    f_egored_average_amount_out = (f_egored_amount_out / f_egored_nr_trans_out) if f_egored_nr_trans_out > 0 else 0

    # 9. Get f_egored_edge_density 
    f_egored_edge_density = f_egored_nr_edges / f_egored_nr_nodes

    # 10. f_egored_degree_in_rel
    f_egored_degree_in_rel = (f_egored_degree_in / f_degree_in) if f_degree_in > 0 else 0

    # 11. f_egored_degree_out_rel
    f_egored_degree_out_rel = (f_egored_degree_out / f_degree_out) if f_degree_out > 0 else 0

    # 12. Get f_egored_amount_in_rel
    f_egored_amount_in_rel = (f_egored_amount_in / f_amount_in) if f_amount_in > 0 else 0 

    # 13. Get f_egored_amount_out_rel
    f_egored_amount_out_rel = (f_egored_amount_out / f_amount_out) if f_amount_out > 0 else 0

    # 14. Get f_egored_average_amount_in_rel
    f_egored_average_amount_in_rel = (f_egored_average_amount_in / f_average_amount_in) if f_average_amount_in > 0 else 0

    # 15. Get f_egored_average_amount_out_rel
    f_egored_average_amount_out_rel = (f_egored_average_amount_out / f_average_amount_out) if f_average_amount_out > 0 else 0

    # 16. Get f_egored_nr_nodes_rel
    f_egored_nr_nodes_rel = (f_egored_nr_nodes / f_ego_nr_nodes)

    # 17. Get f_egored_nr_edges_rel
    f_egored_nr_edges_rel = (f_egored_nr_edges / f_ego_nr_edges)

    # ------------------------------------------------------------------------------------------------------------------------------
    # Part 3: Get Cycle Features 
    # ------------------------------------------------------------------------------------------------------------------------------

    cycle_subgraph = get_subgraph(graph, account_node, cycle_subgraph_radius)
    cycles_list = list(nx.simple_cycles(cycle_subgraph))
    cycles_list  = [this_list for this_list in cycles_list if account_node in this_list]
    del cycle_subgraph
    max_cycles = max(cycles_list, key=len) if len(cycles_list) > 0 else []

    # 1. Get max_cycle_length
    max_cycle_length = len(max_cycles)

    # 2. Get cycle_total_amount_USD
    cycle_total_amount_USD = 0
    if max_cycle_length > 0:
        subgraph_edges = graph.subgraph(max_cycles).edges(data=True)
        cycle_total_amount_USD = sum(edge_data['Total_Amount_USD'] for _, _, edge_data in subgraph_edges)


    if verbose:
        # print(f"{account_node} Egonet In degree nodes: {egonet_in_degree_nodes}")
        # print(f"{account_node} Egonet Out degree nodes: {egonet_out_degree_nodes}")
        # print(f"{account_node} Egored In degree nodes: {egored_in_degree_nodes}")
        # print(f"{account_node} Egored Out degree nodes: {egored_out_degree_nodes}")
        print("-" * 50)
        print(f"Graph features for account {account_node}")
        
        # Display Egonet Features
        print(f"\nEgonet Features: ")
        print(f"f_degree_in: {f_degree_in}")
        print(f"f_degree_out: {f_degree_out}")
        print(f"f_amount_in: {f_amount_in}")
        print(f"f_amount_out: {f_amount_out}")
        print(f"f_nr_trans_in: {f_nr_trans_in}")
        print(f"f_nr_trans_out: {f_nr_trans_out}")
        print(f"f_ego_nr_nodes: {f_ego_nr_nodes}")
        print(f"f_ego_nr_edges: {f_ego_nr_edges}")
        print(f"f_average_amount_in: {f_average_amount_in}")
        print(f"f_average_amount_out: {f_average_amount_out}")
        print(f"f_ego_edge_density: {f_ego_edge_density}")

        # Display Egored Features
        print(f"\nEgored Features: ")
        print(f"f_egored_degree_in: {f_egored_degree_in}")
        print(f"f_egored_degree_out: {f_egored_degree_out}") 
        print(f"f_egored_amount_in: {f_egored_amount_in}")
        print(f"f_egored_amount_out: {f_egored_amount_out}")
        print(f"f_egored_nr_trans_in: {f_egored_nr_trans_in}")
        print(f"f_egored_nr_trans_out: {f_egored_nr_trans_out}")
        print(f"f_egored_nr_nodes: {f_egored_nr_nodes}")
        print(f"f_egored_nr_edges: {f_egored_nr_edges}")
        print(f"f_egored_average_amount_in: {f_egored_average_amount_in}")
        print(f"f_egored_average_amount_out: {f_egored_average_amount_out}")
        print(f"f_egored_edge_density: {f_egored_edge_density}")
        print(f"f_egored_degree_in_rel: {f_egored_degree_in_rel}")
        print(f"f_egored_degree_out_rel: {f_egored_degree_out_rel}")
        print(f"f_egored_amount_in_rel: {f_egored_amount_in_rel}")
        print(f"f_egored_amount_out_rel: {f_egored_amount_out_rel}")
        print(f"f_egored_average_amount_in_rel: {f_egored_average_amount_in_rel}")
        print(f"f_egored_average_amount_out_rel: {f_egored_average_amount_out_rel}")
        print(f"f_egored_nr_nodes_rel: {f_egored_nr_nodes_rel}")
        print(f"f_egored_nr_edges_rel: {f_egored_nr_edges_rel}")

        print(f"\nCycle Features: ")
        print(f"max_cycle_length: {max_cycle_length}")
        print(f"cycle_total_amount_USD: {cycle_total_amount_USD}")
        print("-" * 50)
        print("\n")

    # Complete this for me
    graph_features_dictionary = {
    'f_degree_in': f_degree_in,
    'f_degree_out': f_degree_out,
    'f_amount_in': f_amount_in,
    'f_amount_out': f_amount_out,
    'f_nr_trans_in': f_nr_trans_in,
    'f_nr_trans_out': f_nr_trans_out,
    'f_ego_nr_nodes': f_ego_nr_nodes,
    'f_ego_nr_edges': f_ego_nr_edges,
    'f_average_amount_in': f_average_amount_in,
    'f_average_amount_out': f_average_amount_out,
    'f_ego_edge_density': f_ego_edge_density,
    'f_egored_degree_in': f_egored_degree_in,
    'f_egored_degree_out': f_egored_degree_out,
    'f_egored_amount_in': f_egored_amount_in,
    'f_egored_amount_out': f_egored_amount_out,
    'f_egored_nr_trans_in': f_egored_nr_trans_in,
    'f_egored_nr_trans_out': f_egored_nr_trans_out,
    'f_egored_nr_nodes': f_egored_nr_nodes,
    'f_egored_nr_edges': f_egored_nr_edges,
    'f_egored_average_amount_in': f_egored_average_amount_in,
    'f_egored_average_amount_out': f_egored_average_amount_out,
    'f_egored_edge_density': f_egored_edge_density,
    'f_egored_degree_in_rel': f_egored_degree_in_rel,
    'f_egored_degree_out_rel': f_egored_degree_out_rel,
    'f_egored_amount_in_rel': f_egored_amount_in_rel,
    'f_egored_amount_out_rel': f_egored_amount_out_rel,
    'f_egored_average_amount_in_rel': f_egored_average_amount_in_rel,
    'f_egored_average_amount_out_rel': f_egored_average_amount_out_rel,
    'f_egored_nr_nodes_rel': f_egored_nr_nodes_rel,
    'f_egored_nr_edges_rel': f_egored_nr_edges_rel,
    'max_cycle_length': max_cycle_length,
    'cycle_total_amount_USD': cycle_total_amount_USD
    }

        
    if show_graph: 
        plt.figure(figsize=(10, 7))
        plt.title(f"Egonet of account {account_node}")
        # Draw the egonet subgraph
        pos = nx.spring_layout(ego_subgraph)  # You can choose a different layout if needed
        nx.draw(ego_subgraph, pos, with_labels=True, node_size=500, node_color='skyblue', font_size=8)
        # Highlight the target node
        nx.draw_networkx_nodes(ego_subgraph, pos, nodelist=[account_node], node_size=500, node_color='purple')
        plt.show()
    
        # draw egored subgraph
        plt.figure(figsize=(10, 7))
        plt.title(f"Egored of account {account_node}")
        # Draw the subgraph
        pos = nx.spring_layout(egored_subgraph)  # You can choose a different layout if needed
        nx.draw(egored_subgraph, pos, with_labels=True, node_size=500, node_color='skyblue', font_size=8)
        # Highlight the target node
        nx.draw_networkx_nodes(egored_subgraph, pos, nodelist=[account_node], node_size=500, node_color='purple')
        plt.show()


    return graph_features_dictionary

'''
cycle account: 9992249143 18775565

normal account: # 9997320086 9018 28511

'''


get_graph_features(launder_graph, 9992249143, cycle_subgraph_radius=30, verbose=False, show_graph=False)

{'f_degree_in': 12,
 'f_degree_out': 5,
 'f_amount_in': 1508524.1353009925,
 'f_amount_out': 495964.693224,
 'f_nr_trans_in': 120,
 'f_nr_trans_out': 8,
 'f_ego_nr_nodes': 16,
 'f_ego_nr_edges': 17,
 'f_average_amount_in': 12571.034460841604,
 'f_average_amount_out': 61995.586653,
 'f_ego_edge_density': 1.0625,
 'f_egored_degree_in': 2,
 'f_egored_degree_out': 2,
 'f_egored_amount_in': 273504.109788,
 'f_egored_amount_out': 387644.162724,
 'f_egored_nr_trans_in': 11,
 'f_egored_nr_trans_out': 5,
 'f_egored_nr_nodes': 3,
 'f_egored_nr_edges': 4,
 'f_egored_average_amount_in': 24864.009980727275,
 'f_egored_average_amount_out': 77528.8325448,
 'f_egored_edge_density': 1.3333333333333333,
 'f_egored_degree_in_rel': 0.16666666666666666,
 'f_egored_degree_out_rel': 0.4,
 'f_egored_amount_in_rel': 0.1813057566582641,
 'f_egored_amount_out_rel': 0.7815962870343321,
 'f_egored_average_amount_in_rel': 1.9778809817265175,
 'f_egored_average_amount_out_rel': 1.2505540592549313,
 'f_egored_nr_node

In [9]:
account_feats_dict = {
    'Account_node':[],
    'f_degree_in': [],
    'f_degree_out': [],
    'f_amount_in': [],
    'f_amount_out': [],
    'f_nr_trans_in': [],
    'f_nr_trans_out': [],
    'f_ego_nr_nodes': [],
    'f_ego_nr_edges': [],
    'f_average_amount_in': [],
    'f_average_amount_out': [],
    'f_ego_edge_density': [],
    'f_egored_degree_in': [],
    'f_egored_degree_out': [],
    'f_egored_amount_in': [],
    'f_egored_amount_out': [],
    'f_egored_nr_trans_in': [],
    'f_egored_nr_trans_out': [],
    'f_egored_nr_nodes': [],
    'f_egored_nr_edges': [],
    'f_egored_average_amount_in': [],
    'f_egored_average_amount_out': [],
    'f_egored_edge_density': [],
    'f_egored_degree_in_rel': [],
    'f_egored_degree_out_rel': [],
    'f_egored_amount_in_rel': [],
    'f_egored_amount_out_rel': [],
    'f_egored_average_amount_in_rel': [],
    'f_egored_average_amount_out_rel': [],
    'f_egored_nr_nodes_rel': [],
    'f_egored_nr_edges_rel': [],
    'max_cycle_length': [],
    'cycle_total_amount_USD': []
}

count = 1
# Loop through the nodes in launder_graph
for account_node in launder_graph.nodes():

    # print("-" * 100)
    # print(account_node)
    feature_dict = get_graph_features(launder_graph, account_node, cycle_subgraph_radius=30, verbose=False, show_graph=False)

    # Assigning values
    account_feats_dict['Account_node'].append(account_node) 
    account_feats_dict['f_degree_in'].append(feature_dict['f_degree_in'])
    account_feats_dict['f_degree_out'].append(feature_dict['f_degree_out'])
    account_feats_dict['f_amount_in'].append(feature_dict['f_amount_in'])
    account_feats_dict['f_amount_out'].append(feature_dict['f_amount_out'])
    account_feats_dict['f_nr_trans_in'].append(feature_dict['f_nr_trans_in'])
    account_feats_dict['f_nr_trans_out'].append(feature_dict['f_nr_trans_out'])
    account_feats_dict['f_ego_nr_nodes'].append(feature_dict['f_ego_nr_nodes'])
    account_feats_dict['f_ego_nr_edges'].append(feature_dict['f_ego_nr_edges'])
    account_feats_dict['f_average_amount_in'].append(feature_dict['f_average_amount_in'])
    account_feats_dict['f_average_amount_out'].append(feature_dict['f_average_amount_out'])
    account_feats_dict['f_ego_edge_density'].append(feature_dict['f_ego_edge_density'])
    account_feats_dict['f_egored_degree_in'].append(feature_dict['f_egored_degree_in'])
    account_feats_dict['f_egored_degree_out'].append(feature_dict['f_egored_degree_out'])
    account_feats_dict['f_egored_amount_in'].append(feature_dict['f_egored_amount_in'])
    account_feats_dict['f_egored_amount_out'].append(feature_dict['f_egored_amount_out'])
    account_feats_dict['f_egored_nr_trans_in'].append(feature_dict['f_egored_nr_trans_in'])
    account_feats_dict['f_egored_nr_trans_out'].append(feature_dict['f_egored_nr_trans_out'])
    account_feats_dict['f_egored_nr_nodes'].append(feature_dict['f_egored_nr_nodes'])
    account_feats_dict['f_egored_nr_edges'].append(feature_dict['f_egored_nr_edges'])
    account_feats_dict['f_egored_average_amount_in'].append(feature_dict['f_egored_average_amount_in'])
    account_feats_dict['f_egored_average_amount_out'].append(feature_dict['f_egored_average_amount_out'])
    account_feats_dict['f_egored_edge_density'].append(feature_dict['f_egored_edge_density'])
    account_feats_dict['f_egored_degree_in_rel'].append(feature_dict['f_egored_degree_in_rel'])
    account_feats_dict['f_egored_degree_out_rel'].append(feature_dict['f_egored_degree_out_rel'])
    account_feats_dict['f_egored_amount_in_rel'].append(feature_dict['f_egored_amount_in_rel'])
    account_feats_dict['f_egored_amount_out_rel'].append(feature_dict['f_egored_amount_out_rel'])
    account_feats_dict['f_egored_average_amount_in_rel'].append(feature_dict['f_egored_average_amount_in_rel'])
    account_feats_dict['f_egored_average_amount_out_rel'].append(feature_dict['f_egored_average_amount_out_rel'])
    account_feats_dict['f_egored_nr_nodes_rel'].append(feature_dict['f_egored_nr_nodes_rel'])
    account_feats_dict['f_egored_nr_edges_rel'].append(feature_dict['f_egored_nr_edges_rel'])
    account_feats_dict['max_cycle_length'].append(feature_dict['max_cycle_length'])
    account_feats_dict['cycle_total_amount_USD'].append(feature_dict['cycle_total_amount_USD'])

    
    
    # print(f"Operation number: {count} is done")
    # print("-" * 100)
    # print("\n")
    # count += 1


# Creating DataFrame
acc_df = pd.DataFrame(account_feats_dict)

In [37]:
mask = df['Is_laundering'] == 1
launder_df = df[mask]
suspicious_acc_list = pd.concat([launder_df['Sender_account'], launder_df['Receiver_account']]).unique()

In [41]:
suspicious_acc_list[0:10]

array([   92172,   155434,  1021972,  2491634, 11230718, 14479020,
       16226026, 16683594, 18775565, 19195430], dtype=int64)

In [43]:
def add_fraud_acc_label(Account_node, suspicious_acc_list):
    return 1 if Account_node in suspicious_acc_list else 0
    

# Assuming acc_df['Account_node'] contains the account nodes
acc_df['fraud_account'] = acc_df['Account_node'].apply(lambda x: add_fraud_acc_label(x, suspicious_acc_list))


In [44]:
acc_df.head()

Unnamed: 0,Account_node,f_degree_in,f_degree_out,f_amount_in,f_amount_out,f_nr_trans_in,f_nr_trans_out,f_ego_nr_nodes,f_ego_nr_edges,f_average_amount_in,f_average_amount_out,f_ego_edge_density,f_egored_degree_in,f_egored_degree_out,f_egored_amount_in,f_egored_amount_out,f_egored_nr_trans_in,f_egored_nr_trans_out,f_egored_nr_nodes,f_egored_nr_edges,f_egored_average_amount_in,f_egored_average_amount_out,f_egored_edge_density,f_egored_degree_in_rel,f_egored_degree_out_rel,f_egored_amount_in_rel,f_egored_amount_out_rel,f_egored_average_amount_in_rel,f_egored_average_amount_out_rel,f_egored_nr_nodes_rel,f_egored_nr_edges_rel,max_cycle_length,cycle_total_amount_USD,fraud_account
0,8724731955,0,1,0.0,35214.0,0,15,2,1,0.0,2347.60032,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0,0
1,1491989064,16,46,2592373.0,3617478.0,170,495,61,62,15249.251148,7308.036402,1.016393,2,2,251844.541948,234597.676248,12,4,3,4,20987.045162,58649.419062,1.333333,0.125,0.043478,0.097148,0.064851,1.376267,8.025332,0.04918,0.064516,2,389168.936208,0
2,287305149,27,27,3431549.0,2839036.0,271,221,49,54,12662.542221,12846.315661,1.102041,6,6,369956.277448,749721.338028,27,7,7,12,13702.08435,107103.04829,1.714286,0.222222,0.222222,0.10781,0.264076,1.082096,8.337258,0.142857,0.222222,2,358671.545856,0
3,5376652437,0,1,0.0,120805.3,0,9,2,1,0.0,13422.810475,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0,0
4,9614186178,0,1,0.0,8104.01,0,6,2,1,0.0,1350.668358,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0,0


In [45]:
# Save the DataFrame as a Feather file
acc_df.to_feather('acc_df.feather')

In [4]:
import pandas as pd
# Load the Feather file into a DataFrame
acc_df = pd.read_feather('acc_df.feather')

In [5]:
acc_df.head()

Unnamed: 0,Account_node,f_degree_in,f_degree_out,f_amount_in,f_amount_out,f_nr_trans_in,f_nr_trans_out,f_ego_nr_nodes,f_ego_nr_edges,f_average_amount_in,f_average_amount_out,f_ego_edge_density,f_egored_degree_in,f_egored_degree_out,f_egored_amount_in,f_egored_amount_out,f_egored_nr_trans_in,f_egored_nr_trans_out,f_egored_nr_nodes,f_egored_nr_edges,f_egored_average_amount_in,f_egored_average_amount_out,f_egored_edge_density,f_egored_degree_in_rel,f_egored_degree_out_rel,f_egored_amount_in_rel,f_egored_amount_out_rel,f_egored_average_amount_in_rel,f_egored_average_amount_out_rel,f_egored_nr_nodes_rel,f_egored_nr_edges_rel,max_cycle_length,cycle_total_amount_USD,fraud_account
0,8724731955,0,1,0.0,35214.0,0,15,2,1,0.0,2347.60032,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0,0
1,1491989064,16,46,2592373.0,3617478.0,170,495,61,62,15249.251148,7308.036402,1.016393,2,2,251844.541948,234597.676248,12,4,3,4,20987.045162,58649.419062,1.333333,0.125,0.043478,0.097148,0.064851,1.376267,8.025332,0.04918,0.064516,2,389168.936208,0
2,287305149,27,27,3431549.0,2839036.0,271,221,49,54,12662.542221,12846.315661,1.102041,6,6,369956.277448,749721.338028,27,7,7,12,13702.08435,107103.04829,1.714286,0.222222,0.222222,0.10781,0.264076,1.082096,8.337258,0.142857,0.222222,2,358671.545856,0
3,5376652437,0,1,0.0,120805.3,0,9,2,1,0.0,13422.810475,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0,0
4,9614186178,0,1,0.0,8104.01,0,6,2,1,0.0,1350.668358,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0,0


# 6. Add Graph Features to transaction dataframe

## 6.1 Add Sender Graph Features

In [7]:
'''
all features
['f_degree_in', 'f_degree_out', 'f_amount_in', 'f_amount_out',
           'f_nr_trans_in', 'f_nr_trans_out', 'f_ego_nr_nodes', 'f_ego_nr_edges',
           'f_average_amount_in', 'f_average_amount_out', 'f_ego_edge_density',
           'f_egored_degree_in', 'f_egored_degree_out', 'f_egored_amount_in',
           'f_egored_amount_out', 'f_egored_nr_trans_in', 'f_egored_nr_trans_out',
           'f_egored_nr_nodes', 'f_egored_nr_edges', 'f_egored_average_amount_in',
           'f_egored_average_amount_out', 'f_egored_edge_density',
           'f_egored_degree_in_rel', 'f_egored_degree_out_rel',
           'f_egored_amount_in_rel', 'f_egored_amount_out_rel',
           'f_egored_average_amount_in_rel', 'f_egored_average_amount_out_rel',
           'f_egored_nr_nodes_rel', 'f_egored_nr_edges_rel', 'max_cycle_length',
           'cycle_total_amount_USD']
'''

def add_graph_features(df, acc_df, selected_columns, left_column, suffix):
    # Adding "sender_" prefix for each column
    sender_columns = ['Account_node']
    sender_columns += selected_columns
    update_df = acc_df[sender_columns].copy()
    sender_columns = [suffix + column if (column != "Account_node") else column for column in sender_columns]
    update_df.columns = sender_columns
    
    # Create a DataFrame to hold the data to be updated
    trans_df = df.merge(update_df, left_on=left_column, right_on='Account_node')

    return trans_df

In [8]:
selected_columns = ['f_degree_in', 'f_degree_out', 'f_amount_in', 'f_amount_out',
           'f_nr_trans_in', 'f_nr_trans_out', 'f_ego_nr_nodes', 'f_ego_nr_edges',
           'f_average_amount_in', 'f_average_amount_out', 'f_ego_edge_density',
           'f_egored_degree_in', 'f_egored_degree_out', 'f_egored_amount_in',
           'f_egored_amount_out', 'f_egored_nr_trans_in', 'f_egored_nr_trans_out',
           'f_egored_nr_nodes', 'f_egored_nr_edges', 'f_egored_average_amount_in',
           'f_egored_average_amount_out', 'f_egored_edge_density',
           'f_egored_degree_in_rel', 'f_egored_degree_out_rel',
           'f_egored_amount_in_rel', 'f_egored_amount_out_rel',
           'f_egored_average_amount_in_rel', 'f_egored_average_amount_out_rel',
           'f_egored_nr_nodes_rel', 'f_egored_nr_edges_rel', 'max_cycle_length',
           'cycle_total_amount_USD']


trans_df = add_graph_features(df, acc_df, selected_columns, left_column='Sender_account', suffix='sender_')

In [9]:
trans_df.head()

Unnamed: 0,Transaction_id,Timestamp,Last_transaction_time_elapsed_minutes,Time,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency,Account_node,sender_f_degree_in,sender_f_degree_out,sender_f_amount_in,sender_f_amount_out,sender_f_nr_trans_in,sender_f_nr_trans_out,sender_f_ego_nr_nodes,sender_f_ego_nr_edges,sender_f_average_amount_in,sender_f_average_amount_out,sender_f_ego_edge_density,sender_f_egored_degree_in,sender_f_egored_degree_out,sender_f_egored_amount_in,sender_f_egored_amount_out,sender_f_egored_nr_trans_in,sender_f_egored_nr_trans_out,sender_f_egored_nr_nodes,sender_f_egored_nr_edges,sender_f_egored_average_amount_in,sender_f_egored_average_amount_out,sender_f_egored_edge_density,sender_f_egored_degree_in_rel,sender_f_egored_degree_out_rel,sender_f_egored_amount_in_rel,sender_f_egored_amount_out_rel,sender_f_egored_average_amount_in_rel,sender_f_egored_average_amount_out_rel,sender_f_egored_nr_nodes_rel,sender_f_egored_nr_edges_rel,sender_max_cycle_length,sender_cycle_total_amount_USD
0,T8572083,2023-07-22 09:51:28,0.0,09:51:28,9,51,2023-07-22,2023,7,22,9018,2388293593,3319.06,4193.964216,UK pounds,Euro,UK,Germany,Cross-border,0,Normal_Foward,Yes,9018,1,1,4193.964216,4193.964216,1,1,3,2,4193.964216,4193.964216,0.666667,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0,0.0
1,T3210515,2023-01-24 23:28:15,0.0,23:28:15,23,28,2023-01-24,2023,1,24,28511,3072405466,6371.25,8050.7115,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852
2,T4191568,2023-02-24 23:31:38,44643.383333,23:31:38,23,31,2023-02-24,2023,2,24,28511,3072405466,3878.0,4900.2408,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852
3,T5018227,2023-03-24 20:51:59,40160.35,20:51:59,20,51,2023-03-24,2023,3,24,28511,3072405466,4109.92,5193.294912,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852
4,T5938109,2023-04-24 19:38:10,44566.183333,19:38:10,19,38,2023-04-24,2023,4,24,28511,3072405466,7147.58,9031.682088,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852


In [10]:
del df

In [11]:
trans_df = trans_df.drop(['Timestamp', 'Time'], axis=1)

## 6.2 Add Receiver Graph Features

In [12]:
selected_columns = ['f_degree_in', 'f_degree_out', 'f_amount_in', 'f_amount_out',
           'f_nr_trans_in', 'f_nr_trans_out', 'f_ego_nr_nodes', 'f_ego_nr_edges',
           'f_average_amount_in', 'f_average_amount_out', 'f_ego_edge_density',
           'f_egored_degree_in', 'f_egored_degree_out', 'f_egored_amount_in',
           'f_egored_amount_out', 'f_egored_nr_trans_in', 'f_egored_nr_trans_out',
           'f_egored_nr_nodes', 'f_egored_nr_edges', 'f_egored_average_amount_in',
           'f_egored_average_amount_out', 'f_egored_edge_density',
           'f_egored_degree_in_rel', 'f_egored_degree_out_rel',
           'f_egored_amount_in_rel', 'f_egored_amount_out_rel',
           'f_egored_average_amount_in_rel', 'f_egored_average_amount_out_rel',
           'f_egored_nr_nodes_rel', 'f_egored_nr_edges_rel', 'max_cycle_length',
           'cycle_total_amount_USD']



trans_df = add_graph_features(trans_df, acc_df, selected_columns, left_column='Receiver_account', suffix='receiver_')

In [13]:
trans_df.head()

Unnamed: 0,Transaction_id,Last_transaction_time_elapsed_minutes,Hour,Minute,Date,Year,Month,Day,Sender_account,Receiver_account,Amount,Amount_USD,Payment_currency,Received_currency,Sender_bank_location,Receiver_bank_location,Payment_type,Is_laundering,Laundering_type,Different_Currency,Account_node_x,sender_f_degree_in,sender_f_degree_out,sender_f_amount_in,sender_f_amount_out,sender_f_nr_trans_in,sender_f_nr_trans_out,sender_f_ego_nr_nodes,sender_f_ego_nr_edges,sender_f_average_amount_in,sender_f_average_amount_out,sender_f_ego_edge_density,sender_f_egored_degree_in,sender_f_egored_degree_out,sender_f_egored_amount_in,sender_f_egored_amount_out,sender_f_egored_nr_trans_in,sender_f_egored_nr_trans_out,sender_f_egored_nr_nodes,sender_f_egored_nr_edges,sender_f_egored_average_amount_in,sender_f_egored_average_amount_out,sender_f_egored_edge_density,sender_f_egored_degree_in_rel,sender_f_egored_degree_out_rel,sender_f_egored_amount_in_rel,sender_f_egored_amount_out_rel,sender_f_egored_average_amount_in_rel,sender_f_egored_average_amount_out_rel,sender_f_egored_nr_nodes_rel,sender_f_egored_nr_edges_rel,sender_max_cycle_length,sender_cycle_total_amount_USD,Account_node_y,receiver_f_degree_in,receiver_f_degree_out,receiver_f_amount_in,receiver_f_amount_out,receiver_f_nr_trans_in,receiver_f_nr_trans_out,receiver_f_ego_nr_nodes,receiver_f_ego_nr_edges,receiver_f_average_amount_in,receiver_f_average_amount_out,receiver_f_ego_edge_density,receiver_f_egored_degree_in,receiver_f_egored_degree_out,receiver_f_egored_amount_in,receiver_f_egored_amount_out,receiver_f_egored_nr_trans_in,receiver_f_egored_nr_trans_out,receiver_f_egored_nr_nodes,receiver_f_egored_nr_edges,receiver_f_egored_average_amount_in,receiver_f_egored_average_amount_out,receiver_f_egored_edge_density,receiver_f_egored_degree_in_rel,receiver_f_egored_degree_out_rel,receiver_f_egored_amount_in_rel,receiver_f_egored_amount_out_rel,receiver_f_egored_average_amount_in_rel,receiver_f_egored_average_amount_out_rel,receiver_f_egored_nr_nodes_rel,receiver_f_egored_nr_edges_rel,receiver_max_cycle_length,receiver_cycle_total_amount_USD
0,T8572083,0.0,9,51,2023-07-22,2023,7,22,9018,2388293593,3319.06,4193.964216,UK pounds,Euro,UK,Germany,Cross-border,0,Normal_Foward,Yes,9018,1,1,4193.964216,4193.964216,1,1,3,2,4193.964216,4193.964216,0.666667,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0,0.0,2388293593,1,0,4193.964,0.0,1,0,2,1,4193.964216,0.0,0.5,0,0,0.0,0.0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0,0.0
1,T3210515,0.0,23,28,2023-01-24,2023,1,24,28511,3072405466,6371.25,8050.7115,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852,3072405466,18,34,2814767.0,5140855.0,186,359,49,52,15133.153766,14319.930502,1.061224,4,4,544728.04074,1358748.0,19,7,5,8,28669.896881,194106.89732,1.6,0.222222,0.117647,0.193525,0.264304,1.894509,13.555017,0.102041,0.153846,2,509996.402604
2,T4191568,44643.383333,23,31,2023-02-24,2023,2,24,28511,3072405466,3878.0,4900.2408,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852,3072405466,18,34,2814767.0,5140855.0,186,359,49,52,15133.153766,14319.930502,1.061224,4,4,544728.04074,1358748.0,19,7,5,8,28669.896881,194106.89732,1.6,0.222222,0.117647,0.193525,0.264304,1.894509,13.555017,0.102041,0.153846,2,509996.402604
3,T5018227,40160.35,20,51,2023-03-24,2023,3,24,28511,3072405466,4109.92,5193.294912,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852,3072405466,18,34,2814767.0,5140855.0,186,359,49,52,15133.153766,14319.930502,1.061224,4,4,544728.04074,1358748.0,19,7,5,8,28669.896881,194106.89732,1.6,0.222222,0.117647,0.193525,0.264304,1.894509,13.555017,0.102041,0.153846,2,509996.402604
4,T5938109,44566.183333,19,38,2023-04-24,2023,4,24,28511,3072405466,7147.58,9031.682088,UK pounds,UK pounds,UK,UK,Debit card,0,Normal_Plus_Mutual,No,28511,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1,1,90950.339376,51803.695476,1,7,2,2,90950.339376,7400.527925,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,142754.034852,3072405466,18,34,2814767.0,5140855.0,186,359,49,52,15133.153766,14319.930502,1.061224,4,4,544728.04074,1358748.0,19,7,5,8,28669.896881,194106.89732,1.6,0.222222,0.117647,0.193525,0.264304,1.894509,13.555017,0.102041,0.153846,2,509996.402604


In [14]:
trans_df = trans_df.drop(['Account_node_x', 'Account_node_y'], axis=1)

In [16]:
del acc_df

In [17]:
import pandas as pd

# Assuming your DataFrame is named 'launder_df'
# Save DataFrame to a pickle file
trans_df.to_feather('trans_df.feather')

In [19]:
del trans_df

In [20]:
# Load DataFrame from the pickle file
trans_df = pd.read_feather('trans_df.feather')

In [1]:
trans_df.info()

NameError: name 'trans_df' is not defined