In [55]:
import os

import numpy as np
import pandas as pd
import networkx as nx

import arrow
from tqdm import tqdm

# from dotenv import load_dotenv
# from coinbase.wallet.client import Client

# load_dotenv('.env')
# client = Client(os.environ['COINBASE_KEY'], os.environ['COINBASE_SECRET'])

### Change here to select project output

In [56]:
# projects = ['bayc', 'coolcats', 'cryptoadz', 'cyberkongz', 'hashmasks', 'mayc', 'meebits', 'mekaverse', 'svs']
projects = ['mekaverse']

### Store base data as a dataframe

In [57]:
def create_base_data(project):
    PATH_TO_DATA = './data/collated/' + project + '.csv'  # Change if needed
    column_names = ["row", "tx_hash", "token_address", "from_address", "to_address", "token_id", "blk_number", "blk_timestamp", "eth_value"]
    
    df = pd.read_csv(PATH_TO_DATA, delimiter=',', skiprows=1, names=column_names)
    
    df["from_address"] = df.from_address.apply(lambda x: x.strip())
    df["to_address"] = df.to_address.apply(lambda x: x.strip())
    
    return df

### Transaction data

In [58]:
def get_transaction_data(project):
    PATH_TO_DATA = f"./data/balances/{project}.csv"
    return pd.read_csv(PATH_TO_DATA)

errors = []

def lookup_transaction_value(df, block, account):
    value = 0
    
    if account == '0x0000000000000000000000000000000000000000':
        return value
    
    try:
        b = df[(df['block'] == block) & (df['address'] == account)]
        value = b['eth_value'].head(1).iat[0]
    except Exception as e:
        errors.append((block, account))
    return value

# Convert ETH value to USD at specified date
prices = {}
def get_usd_value(date, eth_value):
    if eth_value == 0:
        return eth_value

    if date in prices:
        return float(prices[date]) * eth_value
    else:
        prices[date] = client.get_spot_price(currency_pair='ETH-USD', date=date)['amount']
        return float(prices[date]) * eth_value

### Build time-based dataframes

In [59]:
def create_timed_data(df, df_transactions):
    ZERO_ADDRESS = '0x0000000000000000000000000000000000000000'
    column_names = [
        "date", 
        "days_since_mint", 
        "from_address", 
        "to_address", 
        "token_id", 
        "blk_number", 
        "eth_value",
        #"usd_value",
        "from_value", 
        "to_value",
    ]
    
    df_time = pd.DataFrame(columns=column_names)
    df_total = df.shape[0]
    
    for index, row in tqdm(df.iterrows(), total=df_total):
        blk_timestamp = row['blk_timestamp']
        date = arrow.get(blk_timestamp).datetime

        from_address = row['from_address']
        to_address = row['to_address']
        token_id = row['token_id']
        blk_number = row['blk_number']
        eth_value = row['eth_value']
        
        if from_address == ZERO_ADDRESS:
            days_since_mint = 0
            mint_date = date
        else:
            days_since_mint = date - mint_date
            
        #usd_value = get_usd_value(date, eth_value)
            
        from_value = lookup_transaction_value(df_transactions, blk_number, from_address)
        to_value = lookup_transaction_value(df_transactions, blk_number, to_address)
            
        df_time = df_time.append({
            'date': date,
            'days_since_mint': days_since_mint,
            'from_address': from_address,
            'to_address': to_address,
            'token_id': token_id, 
            'blk_number': blk_number,
            'eth_value': eth_value,
            #'usd_value': usd_value,
            'from_value': from_value,
            'to_value': to_value,
        }, ignore_index=True)

    return df_time

### Build graph objects from time base dataframes

In [60]:
def build_graph_from_timed(df_time):    
    # Building a network per block
    # we will use a weighted and directed graph.
    graph = nx.MultiDiGraph()

    # loop over the pandas dataframe.
    for index, row in tqdm(df_time.iterrows(), total=df_time.shape[0]):
        # read the values from the dataframe.
        # token_id  blk_timestamp eth_value 
        date = row['date']
        from_address = row['from_address']
        to_address = row['to_address']
        token_id = row['token_id']
        blk_number = row['blk_number']
        eth_value = row['eth_value']
        #usd_value = row['usd_value']
        from_value = row['from_value']
        to_value = row['to_value']
        
        # make sure both addresses are in the graph.
        if from_address not in graph:
            graph.add_node(from_address)
        if to_address not in graph:
            graph.add_node(to_address)

        # set the attributes on this node.
        nx.set_node_attributes(graph, {from_address: from_value, to_address: to_value}, 'eth_value')

        # keep track of how many trades a wallet has done.
        trades = nx.get_node_attributes(graph, "trades")
        if from_address in trades:
            nx.set_node_attributes(graph, {from_address:trades[from_address] + 1}, 'trades')
        else:
            nx.set_node_attributes(graph, {from_address:1}, 'trades')
        if to_address in trades:
            nx.set_node_attributes(graph, {to_address:trades[to_address] + 1}, 'trades')
        else:
            nx.set_node_attributes(graph, {to_address:1}, 'trades')

        # check if this NFT has already been sold and if yes, remove the old sale.
        # this might be a candidate for memoization - c.b.
        remove_edges = []
        for (u,v,d) in graph.edges.data():
            if d['token_id'] == token_id:
                remove_edges.append((u,v))
        # we need to remove them in a seperate step, since otherwise we change the datastructure that we are iterating over.
        for (u,v) in remove_edges:
            graph.remove_edge(u,v)

        # add an edge for the transaction.
        graph.add_edge(from_address, to_address, weight=eth_value, token_id=token_id) # keep track of token id by adding it to the edge.
        
    return graph

### Build time-based snapshots

In [61]:
def build_graph_snapshots(df_time):
    """Simpler version of below function which just returns the graph objects"""
    res = {}
    
    df_time['date_quantile'], bins = pd.qcut(df_time['date'], 10, labels=False, retbins=True)
    time_buckets = np.unique(df_time["date_quantile"].to_numpy())
    
    for time_bucket, label in zip(time_buckets, bins):
        selection = df_time[(df_time['date_quantile'] == time_bucket)]
        graph_snapshot = build_graph_from_timed(selection)
        res[label] = graph_snapshot
        
    return res

In [62]:
def build_df_snapshots(df_time):
    column_names = [
        "time_bucket", 
        "time_bucket_label",
        "number_of_nodes", 
        "avg_clustering", 
        "reciprocity", 
        "assortativity", 
        "assortativity_base", 
        "assortativity_out_out", 
        "assortativity_in_in", 
        "assortativity_in_out",
        "centrality_degree",
        "centrality_closeness", 
        "centrality_betweenness",
        "centrality_eigenvector",
        "avg_clustering_random",
        "assortativity_random"
    ]
    
    df_snapshots = pd.DataFrame(columns=column_names)
    
    df_time['date_quantile'], bins = pd.qcut(df_time['date'], 10, labels=False, retbins=True)
    time_buckets = np.unique(df_time["date_quantile"].to_numpy())
    
    for time_bucket, label in zip(time_buckets, bins):
        selection = df_time[(df_time['date_quantile'] == time_bucket)]
        graph_snapshot = build_graph_from_timed(selection)
        
        df_snapshots = df_snapshots.append({
            "time_bucket": time_bucket,
            "time_bucket_label": label,
            "number_of_nodes": graph_snapshot.number_of_nodes(),
            "reciprocity": nx.reciprocity(graph_snapshot),
            "assortativity": nx.degree_assortativity_coefficient(graph_snapshot),
            "assortativity_base": nx.degree_pearson_correlation_coefficient(graph_snapshot.to_undirected(), weight='weight'),
            "assortativity_out_out": nx.degree_pearson_correlation_coefficient(graph_snapshot, x='out', y='out', weight='weight'),
            "assortativity_in_in": nx.degree_pearson_correlation_coefficient(graph_snapshot, x='in', y='in', weight='weight'),
            "assortativity_in_out": nx.degree_pearson_correlation_coefficient(graph_snapshot, x='in', y='out', weight='weight'),
            "centrality_degree": nx.degree_centrality(graph_snapshot),
            "centrality_closeness": nx.closeness_centrality(graph_snapshot),
        }, ignore_index=True)
        
    return df_snapshots.sort_values(by=['time_bucket'])

In [63]:
for project in projects:
    df_transactions = get_transaction_data(project)
    df_time = create_timed_data(create_base_data(project), df_transactions)
    print(len(errors))
    
    g_time = build_graph_from_timed(df_time)
    
    np.save(f"./memory/{project}/full.npy", df_time)
    nx.write_gml(g_time, f"./memory/{project}/full.gml")

100%|██████████| 22262/22262 [04:34<00:00, 80.97it/s]


522


100%|██████████| 22262/22262 [03:08<00:00, 117.85it/s]


In [64]:
for project in projects:
    column_names = [
        "date", 
        "days_since_mint", 
        "from_address", 
        "to_address", 
        "token_id", 
        "blk_number", 
        "eth_value",
        "from_value", 
        "to_value",
    ]
    
    np_data = np.load(f"./memory/{project}/full.npy", allow_pickle=True)
    df_time = pd.DataFrame(data=np_data, columns=column_names)
    
    g_snapshots = build_graph_snapshots(df_time)
    df_snapshot_summary = build_df_snapshots(df_time)
    
    for i, snapshot in enumerate(g_snapshots.keys()):
        nx.write_gml(g_snapshots[snapshot], f"./memory/{project}/snapshots/{i}.gml")
    
    np.save(f"./memory/{project}/snapshots/summary.npy", df_time)

100%|██████████| 2302/2302 [00:02<00:00, 1101.13it/s]
100%|██████████| 2181/2181 [00:01<00:00, 1143.64it/s]
100%|██████████| 2205/2205 [00:02<00:00, 1006.82it/s]
100%|██████████| 2235/2235 [00:02<00:00, 927.45it/s] 
100%|██████████| 2213/2213 [00:02<00:00, 964.64it/s] 
100%|██████████| 2221/2221 [00:02<00:00, 972.62it/s] 
100%|██████████| 2226/2226 [00:02<00:00, 762.80it/s] 
100%|██████████| 2226/2226 [00:02<00:00, 793.12it/s] 
100%|██████████| 2291/2291 [00:02<00:00, 952.11it/s] 
100%|██████████| 2162/2162 [00:02<00:00, 1004.34it/s]
100%|██████████| 2302/2302 [00:02<00:00, 1069.46it/s]
100%|██████████| 2181/2181 [00:01<00:00, 1218.64it/s]
100%|██████████| 2205/2205 [00:01<00:00, 1184.35it/s]
100%|██████████| 2235/2235 [00:02<00:00, 997.97it/s] 
100%|██████████| 2213/2213 [00:01<00:00, 1148.90it/s]
100%|██████████| 2221/2221 [00:02<00:00, 1062.76it/s]
100%|██████████| 2226/2226 [00:02<00:00, 748.15it/s] 
100%|██████████| 2226/2226 [00:02<00:00, 899.07it/s] 
100%|██████████| 2291/2291 [

In [65]:
print(get_transaction_data('cyberkongz'))

         row     block        date  \
0          0  12253142  2021-04-16   
1          2  12253527  2021-04-16   
2          3  12253570  2021-04-16   
3          4  12253684  2021-04-16   
4          5  12253689  2021-04-16   
...      ...       ...         ...   
21229  21230  13711959  2021-11-30   
21230  21231  13711968  2021-11-30   
21231  21232  13711968  2021-11-30   
21232  21233  13711969  2021-11-30   
21233  21234  13711969  2021-11-30   

                                          address  eth_value     usd_value  
0      0x000000048797808be86aa9786c1f402671192d6b   1.341069   3375.509898  
1      0x721931508df2764fd4f70c53da646cb8aed16ace   1.231443   3099.578481  
2      0x721931508df2764fd4f70c53da646cb8aed16ace   1.193960   3005.233539  
3      0xfa6c54de608c9a0a2c2a3220bb7e42b95d1b910b   0.473936   1192.912132  
4      0xfa6c54de608c9a0a2c2a3220bb7e42b95d1b910b   0.432051   1087.484855  
...                                           ...        ...           ...  
2122