In [13]:
import numpy as np
import pandas as pd
import networkx as nx

import arrow

from tqdm import tqdm

### Change here to select project output

In [14]:
projects = ['bayc', 'coolcats', 'cryptoadz', 'cyberkongz', 'hashmasks', 'mayc', 'meebits', 'mekaverse', 'svs']

### Store base data as a dataframe

In [15]:
def create_base_data(project):
    PATH_TO_DATA = './data/collated/' + project + '.csv'  # Change if needed
    column_names = ["row", "tx_hash", "token_address", "from_address", "to_address", "token_id", "blk_number", "blk_timestamp", "eth_value"]
    
    df = pd.read_csv(PATH_TO_DATA, delimiter=',', skiprows=1, names=column_names)
    
    df["from_address"] = df.from_address.apply(lambda x: x.strip())
    df["to_address"] = df.to_address.apply(lambda x: x.strip())
    
    return df

### Build time-based dataframes

In [16]:
def create_timed_data(df):
    ZERO_ADDRESS = '0x0000000000000000000000000000000000000000'
    column_names = ["date", "days_since_mint", "from_address", "to_address", "token_id", "blk_number", "eth_value"]
    
    df_time = pd.DataFrame(columns=column_names)
    
    for index, row in df.iterrows():
        blk_timestamp = row['blk_timestamp']
        date = arrow.get(blk_timestamp).datetime

        # Remove whitespace from address rows
        from_address = row['from_address']
        to_address = row['to_address']
        token_id = row['token_id']
        blk_number = row['blk_number']
        eth_value = row['eth_value']
        
        # Get days since mint and place a dummy value that we reference below
        if from_address == ZERO_ADDRESS:
            days_since_mint = 0
        else:
            days_since_mint = 1
            
        df_time = df_time.append({
            'date': date,
            'days_since_mint': days_since_mint,
            'from_address': from_address,
            'to_address': to_address,
            'token_id': token_id, 
            'blk_number': blk_number,
            'eth_value': eth_value,
        }, ignore_index=True)
        
    # Replace the dummy value by looking at the original mint date in the original dataframe
    for index, row in df_time.iterrows():
        days_since_mint = row['days_since_mint']
        
        if days_since_mint == 1:
            from_address = row['from_address']
            date_now = row['date']
            
            df_index = df.index[df['to_address'] == from_address]
            date_mint = min(df_time.loc[df_index, 'date'])

            date_diff = date_now - date_mint
            date_diff = date_diff.days
            
            df_time.at[df_index,'days_since_mint'] = date_diff
            
    df_time['days_since_mint'] = df_time['days_since_mint'].fillna(0)
    return df_time

### Build graph objects from time base dataframes

In [17]:
def build_graph_from_timed(df_time):    
    # Building a network per block
    # we will use a weighted and directed graph.
    graph = nx.DiGraph()

    plot_interval = 24 * 3600 * 10 # 1 day
    start_timestamp = df_time['date'].iloc[0]

    # loop over the pandas dataframe.
    for index, row in df_time.iterrows():

        # read the values from the dataframe.
        # token_id  blk_timestamp eth_value 
        date = row['date']
        from_address = row['from_address']
        to_address = row['to_address']
        token_id = row['token_id']
        blk_number = row['blk_number']
        eth_value = row['eth_value']

        # make sure both addresses are in the graph.
        if from_address not in graph:
            graph.add_node(from_address)
        if to_address not in graph:
            graph.add_node(to_address)

        # lookup value of both nodes at the time of this block
        # if (not from_address == '0x0000000000000000000000000000000000000000'):
        #     address = w3.toChecksumAddress(from_address)
        #     value_from = w3.eth.get_balance(address, block_identifier=block)
        # if (not to_address == '0x0000000000000000000000000000000000000000'):
        #     address = w3.toChecksumAddress(to_address)
        #     value_to = w3.eth.get_balance(address, block_identifier=block)

        # set the attributes on this node.
        # TODO: replace dummy values
        nx.set_node_attributes(graph, {from_address:100, to_address:100}, 'value')

        # keep track of how many trades a wallet has done.
        trades = nx.get_node_attributes(graph, "trades")
        if from_address in trades:
            nx.set_node_attributes(graph, {from_address:trades[from_address] + 1}, 'trades')
        else:
            nx.set_node_attributes(graph, {from_address:1}, 'trades')
        if to_address in trades:
            nx.set_node_attributes(graph, {to_address:trades[to_address] + 1}, 'trades')
        else:
            nx.set_node_attributes(graph, {to_address:1}, 'trades')

        # check if this NFT has already been sold and if yes, remove the old sale.
        # this might be a candidate for memoization - c.b.
        remove_edges = []
        for (u,v,d) in graph.edges.data():
            if d['token_id'] == token_id:
                remove_edges.append((u,v))
            # we need to remove them in a seperate step, since otherwise we change the datastructure that we are iterating over.
        for (u,v) in remove_edges:
            graph.remove_edge(u,v)

        # add an edge for the transaction.
        # TODO this will be changed
        value = eth_value  # currently we don't have weth value
        graph.add_edge(from_address, to_address, weight=value, token_id=token_id) # keep track of token id by adding it to the edge.
        
    return graph

In [18]:
for project in tqdm(projects):
    df_time = create_timed_data(create_base_data(project))
    g_time = build_graph_from_timed(df_time)
    
    np.save(f"./memory/{project}.npy", df_time)
    nx.write_gml(g_time, f"./memory/{project}.gml")

100%|████████████████████████████████████████████| 9/9 [49:10<00:00, 327.84s/it]
