In [21]:
import time
import os
import json
import pandas as pd
import numpy as np
from datetime import date
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from itertools import chain

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import cpnet

In [6]:
# PATHs
# The directory where the raw transaction and punk data are stored
ORI_DATA_PATH = '../data/ori'

# The directory where the databases are stored
DATABASE_PATH = '../data/database'
if os.path.exists(DATABASE_PATH) is False:
    os.makedirs(DATABASE_PATH)

IMG_PATH = '../img'

In [7]:
tx_db = pd.read_csv(f'{DATABASE_PATH}/tx_db.csv', index_col=0)
tx_db.sort_values(by='date', inplace=True)

punk_db = pd.read_csv(f'{DATABASE_PATH}/punk_db.csv', index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

attributes_unique_list = set(list(chain(*list(punk_db.attributes))))

address_dict = json.load(open('{}/addresses.json'.format(DATABASE_PATH)))

In [8]:
tx_db_2021 = tx_db[tx_db['date'] > '2020-12-31']

tx_db_2021_human = tx_db_2021[tx_db_2021['skin_tone'] != 'Non-human']
tx_db_2021_human = tx_db_2021_human[(tx_db_2021_human['eth_price'] < 500) & (tx_db_2021_human['eth_price'] > 0)]
tx_db_2021_human

Unnamed: 0,date,from,to,eth_price,punk_id,type,gender,skin_tone,attr_count,attributes,skin_tone_color,img_url
9737,2021-01-01,1173,6060,11.44,4444,Human,Male,Dark,3,"['Normal Beard Black', 'Shaved Head', 'Mole']",#A4031F,https://www.larvalabs.com/cryptopunks/cryptopu...
6508,2021-01-02,3111,1211,5.30,9683,Human,Male,Dark,2,"['Goat', 'Top Hat']",#A4031F,https://www.larvalabs.com/cryptopunks/cryptopu...
9740,2021-01-02,6287,2234,5.47,9340,Human,Male,Light,3,"['Luxurious Beard', 'Mohawk Thin', 'Earring']",#F2A359,https://www.larvalabs.com/cryptopunks/cryptopu...
9739,2021-01-02,1293,5236,4.97,3410,Human,Male,Light,2,"['Shadow Beard', 'Headband']",#F2A359,https://www.larvalabs.com/cryptopunks/cryptopu...
8285,2021-01-02,412,4715,5.45,4861,Human,Male,Light,2,"['Normal Beard Black', 'Mohawk Thin']",#F2A359,https://www.larvalabs.com/cryptopunks/cryptopu...
...,...,...,...,...,...,...,...,...,...,...,...,...
12450,2022-07-26,980,1939,94.00,9099,Human,Male,Light,4,"['Cigarette', 'Mustache', 'Earring', 'Crazy Ha...",#F2A359,https://www.larvalabs.com/cryptopunks/cryptopu...
17440,2022-07-26,5318,4950,69.00,4430,Human,Female,Albino,2,"['Clown Eyes Green', 'Frumpy Hair']",#F2DC5D,https://www.larvalabs.com/cryptopunks/cryptopu...
17613,2022-07-26,3002,1044,94.90,5600,Human,Male,Albino,3,"['Frumpy Hair', 'Gold Chain', 'Small Shades']",#F2DC5D,https://www.larvalabs.com/cryptopunks/cryptopu...
13214,2022-07-27,3096,1028,69.69,6221,Human,Female,Dark,4,"['Green Eye Shadow', 'Half Shaved', 'Earring',...",#A4031F,https://www.larvalabs.com/cryptopunks/cryptopu...


In [9]:
tx_2021 = tx_db_2021_human[['date', 'from', 'to', 'eth_price']]
agg_data = tx_2021.groupby(['date','from','to']).sum().reset_index()
agg_data


Unnamed: 0,date,from,to,eth_price
0,2021-01-01,1173,6060,11.44
1,2021-01-02,412,4715,5.45
2,2021-01-02,1293,5236,4.97
3,2021-01-02,3107,5236,4.90
4,2021-01-02,3111,1211,5.30
...,...,...,...,...
11953,2022-07-26,980,1939,94.00
11954,2022-07-26,3002,1044,94.90
11955,2022-07-26,5318,4950,69.00
11956,2022-07-27,402,1955,104.95


In [14]:
import numpy as np
import pandas as pd
import networkx as nx
import community


def get_network_features(x):
    # Construct daily tx graph
    G = nx.from_pandas_edgelist(
        x, 'from', 'to', 'eth_price', nx.Graph())
    result_dic = dict()

    # Number of nodes and edges
    num_edges = len(x['index'].unique())
    num_nodes = len(set(list(x['from'])+list(x['to'])))

    result_dic['num_edges'] = [num_edges]
    result_dic['num_nodes'] = [num_nodes]

    # Degree mean & std
    degree = list(dict(G.degree()).values())
    degree_mean = np.mean(degree)
    degree_std = np.std(degree)

    result_dic['degree_mean'] = [degree_mean]
    result_dic['degree_std'] = [degree_std]

    # Top 10 degree mean & std
    degree.sort(reverse=True)
    top_degree = degree[:10]
    top10_degree_mean = np.mean(top_degree)
    top10_degree_std = np.std(top_degree)

    result_dic['top10_degree_mean'] = [top10_degree_mean]
    result_dic['top10_degree_std'] = [top10_degree_std]

    # Degree centrality mean & std
    degree_centrality = list(nx.degree_centrality(G).values())
    degree_centrality_mean = np.mean(degree_centrality)
    degree_centrality_std = np.std(degree_centrality)

    result_dic['degree_centrality_mean'] = [degree_centrality_mean]
    result_dic['degree_centrality_std'] = [degree_centrality_std]

    # Modularity
    modularity = community.modularity(community.best_partition(G), G)
    result_dic['modularity'] = [modularity]

    # Transitivity
    transitivity = nx.transitivity(G)
    result_dic['transitivity'] = [transitivity]

    # Eigenvector centrality mean & std
    eig_cen = list(nx.eigenvector_centrality(G, max_iter=20000).values())
    eigenvector_centrality_mean = np.mean(eig_cen)
    eigenvector_centrality_atd = np.std(eig_cen)

    result_dic['eigenvector_centrality_mean'] = [eigenvector_centrality_mean]
    result_dic['eigenvector_centrality_atd'] = [eigenvector_centrality_atd]

    # Closeness centrality mean & std
    close_cen = list(nx.closeness_centrality(G).values())
    closeness_centrality_mean = np.mean(close_cen)
    closeness_centrality_std = np.std(close_cen)

    result_dic['closeness_centrality_mean'] = [closeness_centrality_mean]
    result_dic['closeness_centrality_std'] = [closeness_centrality_std]

    # Number of components
    num_components = nx.number_connected_components(G)
    result_dic['num_components'] = [num_components]

    # Size of gaint component / num of nodes
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    gaintG = G.subgraph(Gcc[0])
    giant_com_ratio = (1.0*gaintG.number_of_nodes())/G.number_of_nodes()
    result_dic['giant_com_ratio'] = [giant_com_ratio]

    return pd.DataFrame(result_dic)

print('============ Network Features ============')
time_start = time.time()
# Get network features
network_fea = agg_data.reset_index().groupby('date').apply(get_network_features).reset_index()
network_fea.drop(columns=['level_1'], inplace=True)

# Calculate top10_degree_ratio
network_fea['top10_degree_ratio'] = network_fea['top10_degree_mean'] / network_fea['degree_mean']
# Rearrange columns
cols = list(network_fea.columns)
network_fea = network_fea[cols[:7] + [cols[-1]] + cols[7:-1]]

network_fea



Unnamed: 0,date,num_edges,num_nodes,degree_mean,degree_std,top10_degree_mean,top10_degree_std,top10_degree_ratio,degree_centrality_mean,degree_centrality_std,modularity,transitivity,eigenvector_centrality_mean,eigenvector_centrality_atd,closeness_centrality_mean,closeness_centrality_std,num_components,giant_com_ratio
0,2021-01-01,1,2,1.000000,0.000000,1.000000,0.000000,1.000000,1.000000,0.000000e+00,0.000000,0.0,0.707107,0.000000,1.000000,0.000000e+00,1,1.000000
1,2021-01-02,5,9,1.111111,0.314270,1.111111,0.314270,1.000000,0.138889,3.928371e-02,0.720000,0.0,0.189683,0.274102,0.148148,3.982558e-02,4,0.333333
2,2021-01-03,4,8,1.000000,0.000000,1.000000,0.000000,1.000000,0.142857,0.000000e+00,0.750000,0.0,0.353553,0.000000,0.142857,0.000000e+00,4,0.250000
3,2021-01-04,5,7,1.428571,1.049781,1.428571,1.049781,1.000000,0.238095,1.749636e-01,0.320000,0.0,0.303047,0.225875,0.360544,1.560924e-01,2,0.714286
4,2021-01-05,13,22,1.181818,0.489560,1.400000,0.663325,1.184615,0.056277,2.331240e-02,0.863905,0.0,0.087818,0.194274,0.064358,2.412360e-02,9,0.181818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,2022-07-22,2,4,1.000000,0.000000,1.000000,0.000000,1.000000,0.333333,0.000000e+00,0.500000,0.0,0.500000,0.000000,0.333333,0.000000e+00,2,0.500000
564,2022-07-23,2,4,1.000000,0.000000,1.000000,0.000000,1.000000,0.333333,0.000000e+00,0.500000,0.0,0.500000,0.000000,0.333333,0.000000e+00,2,0.500000
565,2022-07-25,5,8,1.250000,0.661438,1.250000,0.661438,1.000000,0.178571,9.449112e-02,0.560000,0.0,0.241484,0.258235,0.221429,9.449112e-02,3,0.500000
566,2022-07-26,3,6,1.000000,0.000000,1.000000,0.000000,1.000000,0.200000,2.775558e-17,0.666667,0.0,0.408248,0.000000,0.200000,2.775558e-17,3,0.333333


In [24]:
def get_core_neighbor(tx_df):
    # Construct daily tx graph
    G = nx.from_pandas_edgelist(
        tx_df, 'from', 'to', 'eth_price', nx.Graph())

    # Detecting discrete core-periphery structure
    alg = cpnet.BE()        # Load the Borgatti-Everett algorithm
    alg.detect(G)           # Feed the G as an input
    x = alg.get_coreness()  # Get the coreness of nodes
    c = alg.get_pair_id()   # Get the group membership of nodes

    # Calculate avg_core_neighbor
    core_addresses = [a for a in x if x[a] == 1]               # core addresses
    # get their degrees
    degree = list(dict(G.degree(core_addresses)).values())
    # average number of degree, i.e., avg_core_neighbor
    avg_core_neighbor = np.mean(degree)

    # # p-value of significant test
    # import warnings
    # warnings.filterwarnings('ignore')
    # sig_c, sig_x, significant, p_values = cpnet.qstest(
    #     c, x, G, alg, significance_level=0.05, num_of_rand_net=100, num_of_thread=16)

    return pd.DataFrame({'num_core': [len(core_addresses)],
                         'avg_core_neighbor': [avg_core_neighbor],
                         'core_addresses': [core_addresses]})


# Update nerwork_fea with num_core and avg_core_neighbor
network_fea_t = agg_data.reset_index().groupby('date').apply(get_core_neighbor).reset_index()
network_fea['num_core'] = network_fea_t['num_core']
network_fea['avg_core_neighbor'] = network_fea_t['avg_core_neighbor']
# network_fea['significance'] = network_fea_t['significance']

# How many days that each address is a core
core_addresses_list = [core_address for core_addresses in list(network_fea_t.core_addresses) for core_address in core_addresses]
core_days_cnt = pd.Series(core_addresses_list).value_counts(ascending=False).reset_index()
core_days_cnt.columns = ['address', 'core_days_cnt']


adjacency_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.


adjacency_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.


Mean of empty slice.


invalid value encountered in double_scalars



In [25]:
network_fea

Unnamed: 0,date,num_edges,num_nodes,degree_mean,degree_std,top10_degree_mean,top10_degree_std,top10_degree_ratio,degree_centrality_mean,degree_centrality_std,modularity,transitivity,eigenvector_centrality_mean,eigenvector_centrality_atd,closeness_centrality_mean,closeness_centrality_std,num_components,giant_com_ratio,num_core,avg_core_neighbor
0,2021-01-01,1,2,1.000000,0.000000,1.000000,0.000000,1.000000,1.000000,0.000000e+00,0.000000,0.0,0.707107,0.000000,1.000000,0.000000e+00,1,1.000000,1,1.00
1,2021-01-02,5,9,1.111111,0.314270,1.111111,0.314270,1.000000,0.138889,3.928371e-02,0.720000,0.0,0.189683,0.274102,0.148148,3.982558e-02,4,0.333333,4,1.25
2,2021-01-03,4,8,1.000000,0.000000,1.000000,0.000000,1.000000,0.142857,0.000000e+00,0.750000,0.0,0.353553,0.000000,0.142857,0.000000e+00,4,0.250000,6,1.00
3,2021-01-04,5,7,1.428571,1.049781,1.428571,1.049781,1.000000,0.238095,1.749636e-01,0.320000,0.0,0.303047,0.225875,0.360544,1.560924e-01,2,0.714286,5,1.60
4,2021-01-05,13,22,1.181818,0.489560,1.400000,0.663325,1.184615,0.056277,2.331240e-02,0.863905,0.0,0.087818,0.194274,0.064358,2.412360e-02,9,0.181818,20,1.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,2022-07-22,2,4,1.000000,0.000000,1.000000,0.000000,1.000000,0.333333,0.000000e+00,0.500000,0.0,0.500000,0.000000,0.333333,0.000000e+00,2,0.500000,2,1.00
564,2022-07-23,2,4,1.000000,0.000000,1.000000,0.000000,1.000000,0.333333,0.000000e+00,0.500000,0.0,0.500000,0.000000,0.333333,0.000000e+00,2,0.500000,2,1.00
565,2022-07-25,5,8,1.250000,0.661438,1.250000,0.661438,1.000000,0.178571,9.449112e-02,0.560000,0.0,0.241484,0.258235,0.221429,9.449112e-02,3,0.500000,1,3.00
566,2022-07-26,3,6,1.000000,0.000000,1.000000,0.000000,1.000000,0.200000,2.775558e-17,0.666667,0.0,0.408248,0.000000,0.200000,2.775558e-17,3,0.333333,3,1.00


In [36]:
fig = make_subplots(
    rows=2, 
    cols=2,
    subplot_titles=(
        "Number of components", 
        "Giant component size ratio", 
        "Modularity", 
        "Std. of degree centrality",
        # "Avg. of core node neighbor",
        # "Number of core nodes",
    ),
    horizontal_spacing=0.05, 
    vertical_spacing=0.09,
)

# LUSD
fig.add_trace(
    go.Scatter(x=network_fea['date'], y=network_fea['num_components'] ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=network_fea['date'], y=network_fea['giant_com_ratio'] ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(x=network_fea['date'], y=network_fea['modularity'] ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x=network_fea['date'], y=network_fea['degree_centrality_std'] ),
    row=2, col=2
)

# fig.add_trace(
#     go.Scatter(x=network_fea['date'], y=network_fea['avg_core_neighbor'] ),
#     row=3, col=1
# )

# fig.add_trace(
#     go.Scatter(x=network_fea['date'], y=network_fea['num_core'] ),
#     row=3, col=2
# )

fig.update_layout(height=600, width=1400, showlegend=False, margin=dict(l=20, r=20, t=20, b=20))
fig.show()

fig.write_image(f"{IMG_PATH}/analysis/fig9.png")