## Money Laudering Patterns Data Set

### Pre Processing

In [None]:
# read the file
with open("data/HI-Small_Patterns.txt", "r") as f:
    lines = f.readlines()

patterns_data = []
current_laundering_id = 0
current_pattern_name = ""

# loop over the lines
for line in lines:
    line = line.strip()
    if not line:
        continue
    if line.startswith("BEGIN LAUNDERING ATTEMPT"):
        current_laundering_id += 1
        laundering_type = line.split(" - ")[1]
        current_pattern_name = laundering_type.split(":")[0].strip()
    elif line.startswith("END LAUNDERING ATTEMPT"):
        continue
    else:
        cols = line.split(",")
        cols = [col.strip() for col in cols]
        cols.append(laundering_type)
        cols.append(current_laundering_id)
        cols.append(current_pattern_name)
        patterns_data.append(cols)

# convert to dataframe
patterns = pd.DataFrame(patterns_data, columns=['timestamp', 'from_bank', 'from_account', 'to_bank', 'to_account', 'amount_received',
                                 'receiving_currency', 'amount_paid', 'payment_currency', 'payment_format', 'is_laundering', 'laundering_type', 'pattern_id', 'pattern_name'])

# convert columns to appropriate data types
patterns['amount_received'] = patterns['amount_received'].astype(float)
patterns['amount_paid'] = patterns['amount_paid'].astype(float)

In [None]:
#patterns.to_csv("data/patterns_dataframe.csv", index=False)

In [None]:
patterns = pd.read_csv("data/patterns_dataframe.csv")

In [None]:
# display the first few rows of the DataFrame
patterns[20:30]

Unnamed: 0,timestamp,from_bank,from_account,to_bank,to_account,amount_received,receiving_currency,amount_paid,payment_currency,payment_format,is_laundering,laundering_type,pattern_id,pattern_name
20,2022/09/03 10:20,24856,8090E8EB0,71,804ABCE90,637140.6,Rupee,637140.6,Rupee,ACH,1,CYCLE: Max 10 hops,2,CYCLE
21,2022/09/03 12:08,71,804ABCE90,213737,805494C30,621578.18,Rupee,621578.18,Rupee,ACH,1,CYCLE: Max 10 hops,2,CYCLE
22,2022/09/03 13:24,213737,805494C30,14290,801B949C0,7222.58,Euro,7222.58,Euro,ACH,1,CYCLE: Max 10 hops,2,CYCLE
23,2022/09/04 03:24,14290,801B949C0,10057,803DE1580,892031.21,Yen,892031.21,Yen,ACH,1,CYCLE: Max 10 hops,2,CYCLE
24,2022/09/04 09:44,10057,803DE1580,28628,80ACEE280,11364.12,Australian Dollar,11364.12,Australian Dollar,ACH,1,CYCLE: Max 10 hops,2,CYCLE
25,2022/09/04 15:51,28628,80ACEE280,1467,8013C4030,7945.55,US Dollar,7945.55,US Dollar,ACH,1,CYCLE: Max 10 hops,2,CYCLE
26,2022/09/01 00:04,119,811C597B0,48309,811C599A0,34254.65,Saudi Riyal,34254.65,Saudi Riyal,ACH,1,GATHER-SCATTER: Max 3-degree Fan-In,3,GATHER-SCATTER
27,2022/09/01 19:27,150240,812D22980,48309,811C599A0,5971.98,Saudi Riyal,5971.98,Saudi Riyal,ACH,1,GATHER-SCATTER: Max 3-degree Fan-In,3,GATHER-SCATTER
28,2022/09/04 05:06,222,811B83280,48309,811C599A0,50445.58,Saudi Riyal,50445.58,Saudi Riyal,ACH,1,GATHER-SCATTER: Max 3-degree Fan-In,3,GATHER-SCATTER
29,2022/09/04 05:03,48309,811C599A0,48309,811C599A0,48649.42,Saudi Riyal,48649.42,Saudi Riyal,ACH,1,GATHER-SCATTER: Max 3-degree Fan-In,3,GATHER-SCATTER


In [None]:
patterns.columns

Index(['timestamp', 'from_bank', 'from_account', 'to_bank', 'to_account',
       'amount_received', 'receiving_currency', 'amount_paid',
       'payment_currency', 'payment_format', 'is_laundering',
       'laundering_type', 'pattern_id', 'pattern_name'],
      dtype='object')

In [None]:
patterns.dtypes

timestamp              object
from_bank               int64
from_account           object
to_bank                 int64
to_account             object
amount_received       float64
receiving_currency     object
amount_paid           float64
payment_currency       object
payment_format         object
is_laundering           int64
laundering_type        object
pattern_id              int64
pattern_name           object
dtype: object

### Payment Format Deep Dive

In [None]:
# Get the count and percentage of each payment format
counts = patterns['payment_format'].value_counts().to_frame()
counts['percentage_of_transaction'] = counts['payment_format'] / len(patterns) * 100
print(counts)

         payment_format  percentage_of_transaction
ACH                3208                  99.968838
Bitcoin               1                   0.031162


### Pattern Analysis

In [None]:
# count the number of unique patterns
pattern_count = patterns['pattern_name'].nunique()
print("Number of unique patterns:", pattern_count)

Number of unique patterns: 8


In [None]:
# count the number of unique patterns_id for each pattern_name and sort the values in descending order
pattern_id_count = patterns.groupby('pattern_name')['pattern_id'].nunique().sort_values(ascending=False)
print(pattern_id_count)

pattern_name
CYCLE             54
GATHER-SCATTER    51
BIPARTITE         49
FAN-OUT           48
SCATTER-GATHER    44
STACK             43
RANDOM            41
FAN-IN            40
Name: pattern_id, dtype: int64


In [None]:
# count the number of unique from_account for each pattern_name and sort the values in descending order
from_account_count = patterns.groupby('pattern_name')['from_account'].nunique().sort_values(ascending=False) 
print(from_account_count)

pattern_name
STACK             446
GATHER-SCATTER    380
SCATTER-GATHER    339
FAN-IN            307
CYCLE             271
BIPARTITE         250
RANDOM            181
FAN-OUT            43
Name: from_account, dtype: int64
