In [1]:
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm
from multiprocessing import Manager
import csv
import time

In [2]:
csv_file_path = "aggregated_dataset.csv"
mfg_file_path = "../dataset/processed_data.csv"
# Store aggregated_dataset.csv file in pandas DataFrame
df_csv = pd.read_csv(csv_file_path)

# Store MFG transaction in pandas DataFrame
df_mfg = pd.read_csv(mfg_file_path)

# First-degree Neighbors

In [3]:
# Store list of index from aggregate_data.csv file to a set named D0

D0 = set(df_csv['index'].tolist())         # D0 is the set of 3422 Smart Contract Index

# From MFG Transactions , filter all those transactions where the Smart Contract index are either recieving money or sending money.

df_source_sc_target_all = df_mfg[df_mfg['source'].isin(D0)]
df_source_all_target_sc = df_mfg[df_mfg['target'].isin(D0)]

# The total count of these transactions give us Direct Transactions (txd)

txd = df_source_sc_target_all.shape[0] + df_source_all_target_sc.shape[0]

print(f"Total Direct Transactions (tx-direct) :{txd}")
print("")

# Find all adresses which are used in above transactions . These are either the addresses which recieve money from Smart Contract
# or the addresses which send money to Smart Contract . 
# Also , there are possiblity of Smart Contracts sending / recieving money among themselves.

unique_src_1 = df_source_sc_target_all.source
unique_tar_1 = df_source_sc_target_all.target
unique_src_2 = df_source_all_target_sc.source
unique_tar_2 = df_source_all_target_sc.target

F_list = unique_src_1.tolist()+unique_tar_1.tolist()+unique_src_2.tolist()+unique_tar_2.tolist()

# Remove Duplicate addresses , These are the First Degree Neighbors for all 3422 Smart Contracts

F_addresses = set(F_list)

F = len(F_addresses)

print(f"Total Set of First Degree Neighbours(F) :{F}")
print("")

# Now , We identify all those transactions in which these First Degree Neighbors are either at sending end or recieving end.

df_fn = df_mfg[(df_mfg['source'].isin(F_addresses)) | (df_mfg['target'].isin(F_addresses))]
efd = df_fn.shape[0]

print(f"Total Set of First Degree Transcations (Edges) :{efd}")

Total Direct Transactions (tx-direct) :485205

Total Set of First Degree Neighbours(F) :362012

Total Set of First Degree Transcations (Edges) :29418984


# Second-degree Neighbors

In [None]:
# Process a single first-degree neighbor
def process_first_degree_neighbor(neighbor):
    """Process transactions for a single first-degree neighbor."""
    result = {
        "second_degree_neighbors": [],  # List of second-degree neighbors (duplicates allowed)
        "transactions": []  # List of transactions for second-degree neighbors
    }
    # Filter transactions where the first-degree neighbor is source or target
    df_second_degree = df_mfg[(df_mfg['source'] == neighbor) | (df_mfg['target'] == neighbor)]
    
    # Collect second-degree neighbors
    second_degree_neighbors = df_second_degree['source'].tolist() + df_second_degree['target'].tolist()
    second_degree_neighbors = [n for n in second_degree_neighbors if n not in F_addresses and n not in D0]
    result["second_degree_neighbors"] = second_degree_neighbors
    
    # Store the transactions (source, target, value)
    result["transactions"].extend(df_second_degree[['source', 'target', 'value']].values.tolist())
    
    return result

# Main processing loop
if __name__ == "__main__":
    print("Processing first-degree neighbors one by one...")

    total_second_degree_neighbors = []
    total_second_degree_transactions = []

    for idx, neighbor in enumerate(tqdm(F_addresses, desc="Processing First-Degree Neighbors")):
        start_time = time.time()
        result = process_first_degree_neighbor(neighbor)
        total_second_degree_neighbors.extend(result["second_degree_neighbors"])
        total_second_degree_transactions.extend(result["transactions"])
        end_time = time.time()

        # Print debugging information
        if idx % 1000 == 0:
            print(f"Processed {idx} neighbors. Time for last neighbor: {end_time - start_time:.2f}s")

        # Save partial results every 10,000 neighbors
        if idx % 10000 == 0:
            with open("partial_second_neighbors.txt", "a") as f:
                for neighbor in total_second_degree_neighbors:
                    f.write(f"{neighbor}\n")
            with open("partial_esd.csv", "a", newline="") as f:
                writer = csv.writer(f)
                writer.writerows(total_second_degree_transactions)
            total_second_degree_neighbors.clear()  # Clear bufferz 
            total_second_degree_transactions.clear()

    # Final Save
    with open("second_neighbors.txt", "w") as f:
        for neighbor in total_second_degree_neighbors:
            f.write(f"{neighbor}\n")
    with open("esd.csv", "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["source", "target", "value"])  # Header
        writer.writerows(total_second_degree_transactions)

    print(f"Processing complete. Total neighbors: {len(total_second_degree_neighbors)}")
    print(f"Total second-degree transactions: {len(total_second_degree_transactions)}")



Processed 325000 neighbors. Time for last neighbor: 0.11s




Processed 326000 neighbors. Time for last neighbor: 0.11s




Processed 327000 neighbors. Time for last neighbor: 0.11s




Processed 328000 neighbors. Time for last neighbor: 0.11s




Processed 329000 neighbors. Time for last neighbor: 0.11s




Processed 330000 neighbors. Time for last neighbor: 0.10s




Processed 331000 neighbors. Time for last neighbor: 0.10s




Processed 332000 neighbors. Time for last neighbor: 0.12s




Processed 333000 neighbors. Time for last neighbor: 0.11s




Processed 334000 neighbors. Time for last neighbor: 0.11s




Processed 335000 neighbors. Time for last neighbor: 0.11s




Processed 336000 neighbors. Time for last neighbor: 0.13s




Processed 337000 neighbors. Time for last neighbor: 0.11s




Processed 338000 neighbors. Time for last neighbor: 0.12s




Processed 339000 neighbors. Time for last neighbor: 0.11s




Processed 340000 neighbors. Time for last neighbor: 0.12s




Processed 341000 neighbors. Time for last neighbor: 0.11s




Processed 342000 neighbors. Time for last neighbor: 0.12s




Processed 343000 neighbors. Time for last neighbor: 0.11s




Processed 344000 neighbors. Time for last neighbor: 0.13s




Processed 345000 neighbors. Time for last neighbor: 0.12s




Processed 346000 neighbors. Time for last neighbor: 0.11s




Processed 347000 neighbors. Time for last neighbor: 0.12s




Processed 348000 neighbors. Time for last neighbor: 0.11s




Processed 349000 neighbors. Time for last neighbor: 0.12s




Processed 350000 neighbors. Time for last neighbor: 0.12s




Processed 351000 neighbors. Time for last neighbor: 0.11s




Processed 352000 neighbors. Time for last neighbor: 0.11s




Processed 353000 neighbors. Time for last neighbor: 0.10s




Processed 354000 neighbors. Time for last neighbor: 0.10s




Processed 355000 neighbors. Time for last neighbor: 0.12s




Processed 356000 neighbors. Time for last neighbor: 0.10s




Processed 357000 neighbors. Time for last neighbor: 0.10s




Processed 358000 neighbors. Time for last neighbor: 0.11s




Processed 359000 neighbors. Time for last neighbor: 0.10s




Processed 360000 neighbors. Time for last neighbor: 0.10s




Processed 361000 neighbors. Time for last neighbor: 0.11s




Processed 362000 neighbors. Time for last neighbor: 0.10s


Processing First-Degree Neighbors: 100%|██████████| 362012/362012 [13:31:45<00:00,  7.43it/s]

Processing complete. Total neighbors: 7258
Total second-degree transactions: 13114





In [None]:
import gc
i = 0
 
# create a cycle and on each iteration x as a dictionary
# assigned to 1
def create_cycle():
    x = { }
    x[i+1] = x
    print(x)
 
# lists are cleared whenever a full collection or 
# collection of the highest generation (2) is run
collected = gc.collect() # or gc.collect(2)
print("Garbage collector: collected %d objects." % (collected))
 
print("Creating cycles...")
for i in range(10):
    create_cycle()
 
collected = gc.collect()
 
print("Garbage collector: collected %d objects." % (collected))