In [1]:
import os
import pandas as pd

# Paths to datasets
alladdress_path = "../dataset/graph analysis/TOIT graph data/alladdress"
ccg_folder_path = "../dataset/graph analysis/net-001/CCG"
best_dataset_path = "../dataset/best_dataset.csv"

# Step 1: Load the alladdress mapping
print("Loading address mapping...")
address_map = {}
try:
    with open(alladdress_path, 'r') as f:
        for line in f:
            try:
                address, index = line.strip().split('#')
                address_map[int(index)] = address  # Map index (as int) to address
            except ValueError:
                print(f"Error parsing line in alladdress: {line.strip()}")
except FileNotFoundError:
    print(f"File not found: {alladdress_path}")
except Exception as e:
    print(f"Unexpected error while loading alladdress: {e}")

print(f"Total addresses loaded from mapping: {len(address_map)}")
print(f"Sample address mappings: {list(address_map.items())[:5]}")

# Step 2: Process CCG files to extract targets and map them to indices in alladdress
print("\nProcessing CCG files for targets...")
mapped_indices = set()

try:
    for file_name in os.listdir(ccg_folder_path):
        file_path = os.path.join(ccg_folder_path, file_name)
        if os.path.isfile(file_path):
            print(f"Processing file: {file_name}")
            with open(file_path, 'r') as f:
                for line in f:
                    try:
                        _, target, _ = line.strip().split('#')  # Extract target
                        target_index = int(target)  # Convert target to integer

                        # Check if target index exists in address_map
                        if target_index in address_map:
                            mapped_indices.add(target_index)
                    except ValueError:
                        print(f"Error parsing line in {file_name}: {line.strip()}")
except FileNotFoundError:
    print(f"Folder not found: {ccg_folder_path}")
except Exception as e:
    print(f"Unexpected error while processing CCG files: {e}")

# Results
print(f"\nUnique mapped indices: {len(mapped_indices)}")

# Step 3: Load best_dataset.csv
print("Loading best_dataset.csv...")
try:
    best_dataset = pd.read_csv(best_dataset_path)

    # Normalize the 'address' column by removing the '0x' prefix
    print("Normalizing addresses...")
    best_dataset['normalized_address'] = best_dataset['address'].str[2:]  # Remove '0x'

    # Filter the best_dataset to include only addresses in the mapped indices
    print("Filtering dataset by mapped indices...")
    best_dataset['index'] = best_dataset['normalized_address'].map(
        {v: k for k, v in address_map.items()}
    )  # Map normalized_address to indices

    aggregated_dataset = best_dataset[
        best_dataset['index'].isin(mapped_indices)
    ].drop_duplicates()

    # Count total contracts and Ponzi contracts
    total_contracts = len(aggregated_dataset)
    ponzi_contracts = aggregated_dataset[aggregated_dataset['target'] == 1].shape[0]

    # Results
    print(f"\nAggregated Dataset Summary:")
    print(f"Total unique smart contracts: {total_contracts}")
    print(f"Number of Ponzi contracts (target=1): {ponzi_contracts}")
    print(f"Number of Non-Ponzi contracts (target=0): {total_contracts - ponzi_contracts}")

    # Save aggregated dataset to a new CSV
    aggregated_dataset.to_csv("aggregated_dataset.csv", index=False)
    print("Aggregated dataset saved to 'aggregated_dataset.csv'.")

except FileNotFoundError:
    print(f"File not found: {best_dataset_path}")
except Exception as e:
    print(f"Unexpected error: {e}")


Loading address mapping...
Total addresses loaded from mapping: 48298522
Sample address mappings: [(42313618, 'dd9fd6b6f8f7ea932997992bbe67eabb3e316f3c'), (42313619, '689c56aef474df92d44a1b70850f808488f9769c'), (42313620, 'b64ffdca47d6c3895608c4e05faba6e617b3a031'), (42313621, '816051e2203ca534c4336d8d6df71987fa3ae0bd'), (42313622, '5a4e849325e9b87bdb918bbe1bbafdea819bbefe')]

Processing CCG files for targets...
Processing file: 32
Processing file: 35
Processing file: 34
Processing file: 33
Processing file: 20
Processing file: 18
Processing file: 27
Processing file: 9
Processing file: 0
Processing file: 11
Processing file: 7
Processing file: 29
Processing file: 16
Processing file: 6
Processing file: 28
Processing file: 17
Processing file: 1
Processing file: 10
Processing file: 19
Processing file: 26
Processing file: 8
Processing file: 21
Processing file: 38
Processing file: 36
Processing file: 31
Processing file: 30
Processing file: 37
Processing file: 24
Processing file: 23
Processing

# First-Degree Neighbors Agreegation
mapped index to actual adress

In [4]:
import pandas as pd

In [None]:
# Step 1: Load the files
alladdress_input_file = "../dataset/graph analysis/TOIT graph data/alladdress"  # Replace with actual filename if different
alladdress_output_file = "../dataset/alladdress.csv"

In [3]:
# Read the alladdress document and split into two columns
with open(alladdress_input_file, "r") as f:
    alladdress_lines = f.readlines()

# Create a list of dictionaries for easy conversion to a DataFrame
alladdress_data = []
for line in alladdress_lines:
    if "#" in line:  # Ensure the line contains valid data
        address, index = line.strip().split("#")
        alladdress_data.append({"address": address, "index": int(index)})

# Convert to a pandas DataFrame and save as CSV
import pandas as pd
alladdress_df = pd.DataFrame(alladdress_data)
alladdress_df.to_csv(alladdress_output_file, index=False)
print(f"Alladdress file saved as: {alladdress_output_file}")

Alladdress file saved as: alladdress.csv


In [6]:
alladdress_csv = "../dataset/alladdress.csv"
txd_csv = "../dataset/first degree/txd.csv"
output_file = "../dataset/first degree/txd_to_address.csv"

In [7]:
alladdress_df = pd.read_csv(alladdress_csv)  
txfd = pd.read_csv(txd_csv)  

# Step 3: Create a mapping dictionary
index_to_address = dict(zip(alladdress_df["index"], alladdress_df["address"]))

# Step 4: Map source and target indices to addresses
txfd["source"] = txfd["source"].map(index_to_address)
txfd["target"] = txfd["target"].map(index_to_address)


In [8]:
txfd.to_csv(output_file, index=False)
print(f"Mapped transaction file saved at: {output_file}")

Mapped transaction file saved at: ../dataset/first degree/txd_to_address.csv


In [9]:
txfd.head()

Unnamed: 0.1,Unnamed: 0,source,target,value
0,0,109c4f2ccc82c4d77bde15f306707320294aea3f,881b0a4e9c55d08e31d8d3c022144d75a454211c,260.0
1,1,109c4f2ccc82c4d77bde15f306707320294aea3f,fd2605a2bf58fdbb90db1da55df61628b47f9e8c,82.0
2,2,109c4f2ccc82c4d77bde15f306707320294aea3f,834e9b529ac9fa63b39a06f8d8c9b0d6791fa5df,156.0
3,3,109c4f2ccc82c4d77bde15f306707320294aea3f,17580b766f7453525ca4c6a88b01b50570ea088c,206.0
4,4,109c4f2ccc82c4d77bde15f306707320294aea3f,f88a65846c19d8fc76fff545feaa7bbc7114f667,302.0
