In [1]:
import requests
import pandas as pd
import numpy as np
from io import BytesIO
import matplotlib.pyplot as plt
import json
import os
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')

In [2]:
# DataFrame을 넣으면 모든 centrality를 계산해서 얻은 결과를 저장.
def cal_centrality(original_df):
    large_df = original_df[original_df['amount']!=0.0] # 토큰의 전송이 없는 transaction의 경우 제거
    large_df['count'] = large_df.groupby(['sender_address', 'receiver_address'])['sender_address'].transform('count') #  각 쌍끼리 거래가 몇번이나 이뤄졌는지 기록.

    # 빈도가 2 이상인 행만 필터링
    # df = large_df[large_df['count'] >= 2]

    # 네트워크 생성
    G = nx.from_pandas_edgelist(large_df, 'sender_address', 'receiver_address', edge_attr='amount', create_using=nx.DiGraph()) # 거래량으로 가중치를 부여.

    # Degree Centrality
    degree_centrality = nx.degree_centrality(G)
    
    # Betweenness Centrality
    betweenness_centrality = nx.betweenness_centrality(G)
    
    # Closeness Centrality
    closeness_centrality = nx.closeness_centrality(G)
    
    # Pagerank 감쇠계수는 보통 0.85
    pagerank = nx.pagerank(G, alpha=0.85) 

    centrality = {
    'degree_centrality' : degree_centrality, 
    'betweenness_centrality' : betweenness_centrality, 
    'closeness_centrality' : closeness_centrality, 
    'pagerank' : pagerank}

    df = pd.DataFrame(centrality)
    return df

In [3]:
# 메인 함수
def calculate_daily_centrality(df):
    # timestamp를 datetime 형식으로 변환
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # 날짜별로 그룹화
    grouped = df.groupby(df['timestamp'].dt.date)
    
    # 결과를 저장할 빈 DataFrame 생성
    result_df = pd.DataFrame()
    
    # tqdm을 사용하여 진행 상황 표시
    for date, group in tqdm(grouped):
        # 해당 날짜의 데이터에 대해 centrality 계산
        daily_centrality = cal_centrality(group)
        
        # 날짜 정보 추가
        daily_centrality['date'] = date
        
        # 결과를 메인 DataFrame에 추가
        result_df = pd.concat([result_df, daily_centrality], ignore_index=False)
    
    return result_df

In [4]:
ls

Centrality.ipynb                   SOL_Transfer_extraction_code.ipynb
SOL_2020-03-19_2020-06-30_TRX.csv  calculate_components.ipynb
SOL_2020-07-01_2020-09-30_TRX.csv  calculate_components2.ipynb
SOL_2020-10-01_2020-12-31_TRX.csv


In [17]:
file_name = 'SOL_2020-10-01_2020-12-31_TRX.csv'

In [18]:
df = pd.read_csv(file_name)

In [19]:
df

Unnamed: 0,timestamp,block_height,sender_address,receiver_address,amount,trx_hash,trx_gasPrice
0,2020-10-01 00:00:26,38521310,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,3500.0000,C4Ej7RBWkzEuGZuh15szWUjHStDm6AJaQB5uM3NNdPdW,0.000005
1,2020-10-01 00:00:26,38521311,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,1000.0000,9Mtexf1LnkJVw46aTNuevsFdMW4q8nPqC57PpmrN5brB,0.000005
2,2020-10-01 00:00:27,38521312,BzZeRXC4v3fqddMBYKLSP4Awswp5eSP3qUy9y9HsgXrG,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,300.0000,4JS4YcX4p81w51DjCHicERLnupD2Ek2FWpnvbrDwA7tH,0.000005
3,2020-10-01 00:00:27,38521313,BzZeRXC4v3fqddMBYKLSP4Awswp5eSP3qUy9y9HsgXrG,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,4000.0000,5xcBTif5PWsiVpEkXNHSsZPnLhg77apt3kcXJ7LoqYvg,0.000005
4,2020-10-01 00:00:28,38521315,6gfi6GSjrhqc5xDLtDkVrTR61Hi7GMNPmJknxvbqzb1x,6Mk9P4C77EskX51ov95z6RnFZ8jCpn7tbDVfeeTQbr28,1234.5886,HPfPhCfz9PpmydEpHD9eh3LvYDscKaHXVrUCJ313sbq1,0.000010
...,...,...,...,...,...,...,...
4513120,2020-12-31 23:59:17,58456725,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,2000.0000,5zHfWZc7YJrxCvfp2qGuaZsCfMSWhdctoujDWafpWyTS,0.000005
4513121,2020-12-31 23:59:17,58456725,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,2000.0000,5zHfWZc7YJrxCvfp2qGuaZsCfMSWhdctoujDWafpWyTS,0.000005
4513122,2020-12-31 23:59:44,58456788,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,1800.0000,CX5mVZBsa5N3xPomE9kJ7Lu7avk9nmAfJYuULhDJJ6CL,0.000005
4513123,2020-12-31 23:59:48,58456799,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,GKgXWmK8bC9CjeHrpbGR6Tv9fNQYVJsLT5QpuaHzfoHH,300.0000,Aco6J6UVLGqPhcdKHQsWN6pzJwN1owHu17GcYjn9HXj,0.000005


In [20]:
# 함수 사용
result = calculate_daily_centrality(df)

100%|██████████████████████████████████████████████████████████████████████████████| 92/92 [2:24:27<00:00, 94.22s/it]


In [21]:
result

Unnamed: 0,degree_centrality,betweenness_centrality,closeness_centrality,pagerank,date
FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,0.000813,1.216617e-04,0.299410,0.000193,2020-10-01
8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,0.000813,4.135340e-08,0.199652,0.000196,2020-10-01
BzZeRXC4v3fqddMBYKLSP4Awswp5eSP3qUy9y9HsgXrG,0.000813,1.216617e-04,0.299410,0.000193,2020-10-01
6gfi6GSjrhqc5xDLtDkVrTR61Hi7GMNPmJknxvbqzb1x,1.196828,3.582936e-01,0.598414,0.277564,2020-10-01
6Mk9P4C77EskX51ov95z6RnFZ8jCpn7tbDVfeeTQbr28,0.000407,0.000000e+00,0.299309,0.000110,2020-10-01
...,...,...,...,...,...
5vZUJEfQK16qLT1q693LVVQPKuPTrFDM4zrS1oq8nQNi,0.000179,7.311335e-05,0.253171,0.000058,2020-12-31
25ErA4HzoCTi1KTHh5b7MDBjdxEPuGbDcuCqz931P4nn,0.000179,4.280621e-05,0.385612,0.000047,2020-12-31
GsMzKMTsEdWAztitmoKGxCb58CpKkGehQ26H2FmTJvpQ,0.000179,0.000000e+00,0.385584,0.000047,2020-12-31
CoNug4D8ZP57NB7RZfvh7pGM1NNjb32PZAnZXREKJ9rB,0.000179,7.311335e-05,0.253171,0.000058,2020-12-31


In [22]:
result.to_csv(f'{file_name[:-8]}_centrality.csv')
# result.to_csv(f'{file_name[:-7]}centrality.csv')

In [None]:
2021년 wemix centrality 계산중

In [11]:
large_df = df[df['amount']!=0.0] # 토큰의 전송이 없는 transaction의 경우 제거
large_df['count'] = large_df.groupby(['sender_address', 'receiver_address'])['sender_address'].transform('count') #  각 쌍끼리 거래가 몇번이나 이뤄졌는지 기록.

# 빈도가 2 이상인 행만 필터링
df = large_df[large_df['count'] >= 2]

In [12]:
df

Unnamed: 0.1,Unnamed: 0,timestamp,block_height,sender_address,receiver_address,amount,trx_hash,trx_gasPrice,count
6,6,2021-01-02 03:10:08,47900804,0x5e97c2b7afab65ba054f2dea529d45614373f164,0x53a89792427370b1793e99edd6a8c7c342fc71d8,0.050211,0x70849bac2b0767d71bf46aea2f0351e0ee40e0c828d7...,0.0,6
7,7,2021-01-02 14:05:07,47940103,0x3ca2c2e883d4ef03eee21124edad2dda8e7bb6ce,0x2bf022788c06b9f94ec1a0eafb7f8dd1ef80e00a,55.000001,0xa98eb87b6d75a90f857931decd38dd69c7ae45ab7328...,0.0,5
8,8,2021-01-02 14:05:50,47940146,0x2bf022788c06b9f94ec1a0eafb7f8dd1ef80e00a,0x7018ac5505cd20d3593421a9e5c57ae1fca62733,55.000001,0xe11c90e820cf5761415f896244bc7d8cc6245f0d53d1...,0.0,5
11,11,2021-01-03 13:42:11,48025126,0x3ca2c2e883d4ef03eee21124edad2dda8e7bb6ce,0x87dc6e300b6c0eec69648a355488d2d7998860d9,95000.001491,0x96743ae04ab4a0e30ec25506a1fb45b2ee0123b87e0e...,0.0,3
12,12,2021-01-03 13:42:34,48025149,0x87dc6e300b6c0eec69648a355488d2d7998860d9,0x7018ac5505cd20d3593421a9e5c57ae1fca62733,95000.001491,0x31a5b15fd8ca13f60e6654f97a7a88d75065236bd796...,0.0,3
...,...,...,...,...,...,...,...,...,...
17856978,16681998,2021-12-31 23:59:54,79181261,0x00d929d5822f1e9ed9059bde07d7eceffeaca7ad,0xf5decaa8991639493ec79e0e81f12b9698e84163,0.229471,0x82229ebe99f2ed3027ff9e1472d4e7577df8cf2e2b41...,0.0,2
17856979,16681999,2021-12-31 23:59:54,79181261,0xd60b1fa18b851f03e4557a461a03e7d64a1e20cd,0xdc5d868d076663daca43e9b02926956a7fdd2294,0.005000,0x3d5197a08471a4289d686edf737c8c210d7042510ba2...,0.0,3
17856980,16682000,2021-12-31 23:59:54,79181261,0xd60b1fa18b851f03e4557a461a03e7d64a1e20cd,0x3cb3890004e8f5a6885c1d8d359e8c2946b504f3,4.791937,0x3d5197a08471a4289d686edf737c8c210d7042510ba2...,0.0,3
17856981,16682001,2021-12-31 23:59:55,79181262,0x3cb3890004e8f5a6885c1d8d359e8c2946b504f3,0x2c5ab11c28d5d82859e2f35ebd1c5f478ab85e18,0.876197,0x33cf9a202d7c75589a46c3830b3f15fbb641df0ca032...,0.0,3
