In [1]:
import requests
import pandas as pd
import numpy as np
from io import BytesIO
import matplotlib.pyplot as plt
import json
import os
from pandas import json_normalize
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')

In [2]:
import networkx as nx
import pandas as pd

def cal_centrality(original_df):
    large_df = original_df[original_df['amount'] != 0.0]  # 토큰의 전송이 없는 transaction의 경우 제거
    large_df['count'] = large_df.groupby(['sender_address', 'receiver_address'])['sender_address'].transform('count')  # 각 쌍끼리 거래가 몇 번이나 이뤄졌는지 기록

    # 네트워크 생성 (유향 그래프)
    G = nx.from_pandas_edgelist(large_df, 'sender_address', 'receiver_address', edge_attr='amount', create_using=nx.DiGraph())  # 거래량으로 가중치를 부여

    # In-degree Centrality
    indegree_centrality = nx.in_degree_centrality(G)
    
    # Out-degree Centrality
    outdegree_centrality = nx.out_degree_centrality(G)
    
    # Betweenness Centrality
    betweenness_centrality = nx.betweenness_centrality(G)
    
    # Closeness Centrality
    closeness_centrality = nx.closeness_centrality(G)
    
    # PageRank (감쇠계수는 보통 0.85)
    pagerank = nx.pagerank(G, alpha=0.85)

    # 중앙성 측정값들을 데이터프레임으로 변환
    centrality = {
        'indegree_centrality': indegree_centrality, 
        'outdegree_centrality': outdegree_centrality, 
        'betweenness_centrality': betweenness_centrality, 
        'closeness_centrality': closeness_centrality, 
        'pagerank': pagerank
    }

    # 중앙성 측정값들을 데이터프레임으로 변환
    df = pd.DataFrame(centrality)
    return df


In [3]:
# 메인 함수
def calculate_daily_centrality(df):
    # timestamp를 datetime 형식으로 변환
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # 날짜별로 그룹화
    grouped = df.groupby(df['timestamp'].dt.date)
    
    # 결과를 저장할 빈 DataFrame 생성
    result_df = pd.DataFrame()
    
    # tqdm을 사용하여 진행 상황 표시
    for date, group in tqdm(grouped):
        # 해당 날짜의 데이터에 대해 centrality 계산
        daily_centrality = cal_centrality(group)
        
        # 날짜 정보 추가
        daily_centrality['date'] = date
        
        # 결과를 메인 DataFrame에 추가
        result_df = pd.concat([result_df, daily_centrality], ignore_index=False)
    
    return result_df

In [4]:
ls

 Centrality.ipynb
 GATEIO_WEMIXUSDT_D.xlsx
 WEMIX_2022-10-24~2022-12-31_centrality.csv
 WEMIX_2022-11-24~2023-02-22_cal.csv
 WEMIX_2022-11-24~2023-02-23.csv
 WEMIX_2022_2023_sum_address.csv
'WEMIX_2023-01-01~2023-02-22_centrality(in_out).csv'
 WEMIX_2023-01-01~2023-02-22_centrality.csv
 WEMIX_Pagerank_top300.csv
 WEMIX_betweenness_top300.csv
 WEMIX_final_account.csv
 WEMIX_unique_wallet.csv
 duration_transivity_assortativity.ipynb
 final_moving_graph-Copy1.ipynb
 final_moving_graph.ipynb
 final_project_dataset.ipynb
 interactive_transaction_network2.html
 interactive_transaction_network3.html
 interactive_transaction_network4.html
 temp.html


In [5]:
file_name = 'WEMIX_2022-11-24~2023-02-23.csv'

In [6]:
df = pd.read_csv(file_name)

In [7]:
df

Unnamed: 0.1,Unnamed: 0,timestamp,block_height,sender_address,receiver_address,amount,trx_hash,trx_gasPrice
0,0,2022-10-24 00:00:09,104664828,0x75da6ae2c46b93565ac65b6a299ef055d437fd82,0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,237.495004,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
1,1,2022-10-24 00:00:09,104664828,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0x278b83fc5b5f66966e9a1afbdbf8da1f797e0d12,0.356243,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
2,2,2022-10-24 00:00:09,104664828,0x298b036f0e7f5bec69f3134ed36d87450c16c20d,0x2f1108ceb70fc13777c2cd6ba69a84f9e04badb6,1.771368,0x4aebf66adad5ed8d604505223826c04b439c35550d06...,0.0
3,3,2022-10-24 00:00:09,104664828,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0x71b59e4bc2995b57aa03437ed645ada7dd5b1890,0.356243,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
4,4,2022-10-24 00:00:09,104664828,0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,237.495004,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
...,...,...,...,...,...,...,...,...
2504081,2504081,2023-02-22 23:59:47,115201544,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,155.965151,0x9a3f76cade9e654b3ab9100c5e988624a15b104436aa...,0.0
2504082,2504082,2023-02-22 23:59:47,115201544,0x68d19a7cae31b12d9ea932cfec4104db0a766ca6,0xf50782a24afcb26acb85d086cf892bfffb5731b5,561.700241,0x9a3f76cade9e654b3ab9100c5e988624a15b104436aa...,0.0
2504083,2504083,2023-02-22 23:59:54,115201551,0x672dbfbd3953875f31ccbb8d9da3d3a38c3d191d,0x957807b362f882d75e473479cbd91ca9f45c4d64,1.582401,0x4d21644b88691ea67d386f35272ead21ba95066ff8a0...,0.0
2504084,2504084,2023-02-22 23:59:54,115201551,0x957807b362f882d75e473479cbd91ca9f45c4d64,0xdbcc06778c023a507fb6745918ec45d201616db1,1.582401,0x4d21644b88691ea67d386f35272ead21ba95066ff8a0...,0.0


In [8]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df[df['timestamp'] <= datetime(2023,2,23)]

In [9]:
df

Unnamed: 0.1,Unnamed: 0,timestamp,block_height,sender_address,receiver_address,amount,trx_hash,trx_gasPrice
0,0,2022-10-24 00:00:09,104664828,0x75da6ae2c46b93565ac65b6a299ef055d437fd82,0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,237.495004,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
1,1,2022-10-24 00:00:09,104664828,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0x278b83fc5b5f66966e9a1afbdbf8da1f797e0d12,0.356243,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
2,2,2022-10-24 00:00:09,104664828,0x298b036f0e7f5bec69f3134ed36d87450c16c20d,0x2f1108ceb70fc13777c2cd6ba69a84f9e04badb6,1.771368,0x4aebf66adad5ed8d604505223826c04b439c35550d06...,0.0
3,3,2022-10-24 00:00:09,104664828,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0x71b59e4bc2995b57aa03437ed645ada7dd5b1890,0.356243,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
4,4,2022-10-24 00:00:09,104664828,0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,237.495004,0x284d63e03b2f796402455f0095b346f2c3ae0aa7e1af...,0.0
...,...,...,...,...,...,...,...,...
2504081,2504081,2023-02-22 23:59:47,115201544,0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,155.965151,0x9a3f76cade9e654b3ab9100c5e988624a15b104436aa...,0.0
2504082,2504082,2023-02-22 23:59:47,115201544,0x68d19a7cae31b12d9ea932cfec4104db0a766ca6,0xf50782a24afcb26acb85d086cf892bfffb5731b5,561.700241,0x9a3f76cade9e654b3ab9100c5e988624a15b104436aa...,0.0
2504083,2504083,2023-02-22 23:59:54,115201551,0x672dbfbd3953875f31ccbb8d9da3d3a38c3d191d,0x957807b362f882d75e473479cbd91ca9f45c4d64,1.582401,0x4d21644b88691ea67d386f35272ead21ba95066ff8a0...,0.0
2504084,2504084,2023-02-22 23:59:54,115201551,0x957807b362f882d75e473479cbd91ca9f45c4d64,0xdbcc06778c023a507fb6745918ec45d201616db1,1.582401,0x4d21644b88691ea67d386f35272ead21ba95066ff8a0...,0.0


In [10]:
# 함수 사용
result = calculate_daily_centrality(df)

100%|█████████████████████████████████████████| 122/122 [17:16<00:00,  8.50s/it]


In [11]:
result

Unnamed: 0,indegree_centrality,outdegree_centrality,betweenness_centrality,closeness_centrality,pagerank,date
0x75da6ae2c46b93565ac65b6a299ef055d437fd82,0.000000,0.000124,0.000000,0.000000,0.000049,2022-10-24
0xc6a2ad8cc6e4a7e08fc37cc5954be07d499e7654,0.115475,0.082994,0.090618,0.133950,0.059237,2022-10-24
0x917eed7ae9e7d3b0d875dd393af93fff3fc301f8,0.001853,0.000865,0.004434,0.097496,0.001144,2022-10-24
0x278b83fc5b5f66966e9a1afbdbf8da1f797e0d12,0.000494,0.000124,0.000052,0.076511,0.000454,2022-10-24
0x298b036f0e7f5bec69f3134ed36d87450c16c20d,0.000000,0.000124,0.000000,0.000000,0.000049,2022-10-24
...,...,...,...,...,...,...
0x267caada1de35259ac5842f4d54a9438122dc415,0.000650,0.000000,0.000000,0.089301,0.000199,2023-02-22
0xa8be5d69f5229249d64a8b7f3cc4032464639a60,0.000650,0.000000,0.000000,0.100342,0.000241,2023-02-22
0x8b37af9d981599639c76a489c1009ebfafab88d0,0.000000,0.000650,0.000000,0.000000,0.000197,2023-02-22
0x45d492af8e39af0dc3e20c7eca6c422d4053bd13,0.000650,0.000000,0.000000,0.089301,0.000199,2023-02-22


In [12]:
result.to_csv(f'WEMIX_2023-01-01~2023-02-22_centrality(in_out).csv')
# result.to_csv(f'{file_name[:-7]}centrality.csv')

In [None]:
2021년 wemix centrality 계산중

In [11]:
large_df = df[df['amount']!=0.0] # 토큰의 전송이 없는 transaction의 경우 제거
large_df['count'] = large_df.groupby(['sender_address', 'receiver_address'])['sender_address'].transform('count') #  각 쌍끼리 거래가 몇번이나 이뤄졌는지 기록.

# 빈도가 2 이상인 행만 필터링
df = large_df[large_df['count'] >= 2]

In [12]:
df

Unnamed: 0.1,Unnamed: 0,timestamp,block_height,sender_address,receiver_address,amount,trx_hash,trx_gasPrice,count
6,6,2021-01-02 03:10:08,47900804,0x5e97c2b7afab65ba054f2dea529d45614373f164,0x53a89792427370b1793e99edd6a8c7c342fc71d8,0.050211,0x70849bac2b0767d71bf46aea2f0351e0ee40e0c828d7...,0.0,6
7,7,2021-01-02 14:05:07,47940103,0x3ca2c2e883d4ef03eee21124edad2dda8e7bb6ce,0x2bf022788c06b9f94ec1a0eafb7f8dd1ef80e00a,55.000001,0xa98eb87b6d75a90f857931decd38dd69c7ae45ab7328...,0.0,5
8,8,2021-01-02 14:05:50,47940146,0x2bf022788c06b9f94ec1a0eafb7f8dd1ef80e00a,0x7018ac5505cd20d3593421a9e5c57ae1fca62733,55.000001,0xe11c90e820cf5761415f896244bc7d8cc6245f0d53d1...,0.0,5
11,11,2021-01-03 13:42:11,48025126,0x3ca2c2e883d4ef03eee21124edad2dda8e7bb6ce,0x87dc6e300b6c0eec69648a355488d2d7998860d9,95000.001491,0x96743ae04ab4a0e30ec25506a1fb45b2ee0123b87e0e...,0.0,3
12,12,2021-01-03 13:42:34,48025149,0x87dc6e300b6c0eec69648a355488d2d7998860d9,0x7018ac5505cd20d3593421a9e5c57ae1fca62733,95000.001491,0x31a5b15fd8ca13f60e6654f97a7a88d75065236bd796...,0.0,3
...,...,...,...,...,...,...,...,...,...
17856978,16681998,2021-12-31 23:59:54,79181261,0x00d929d5822f1e9ed9059bde07d7eceffeaca7ad,0xf5decaa8991639493ec79e0e81f12b9698e84163,0.229471,0x82229ebe99f2ed3027ff9e1472d4e7577df8cf2e2b41...,0.0,2
17856979,16681999,2021-12-31 23:59:54,79181261,0xd60b1fa18b851f03e4557a461a03e7d64a1e20cd,0xdc5d868d076663daca43e9b02926956a7fdd2294,0.005000,0x3d5197a08471a4289d686edf737c8c210d7042510ba2...,0.0,3
17856980,16682000,2021-12-31 23:59:54,79181261,0xd60b1fa18b851f03e4557a461a03e7d64a1e20cd,0x3cb3890004e8f5a6885c1d8d359e8c2946b504f3,4.791937,0x3d5197a08471a4289d686edf737c8c210d7042510ba2...,0.0,3
17856981,16682001,2021-12-31 23:59:55,79181262,0x3cb3890004e8f5a6885c1d8d359e8c2946b504f3,0x2c5ab11c28d5d82859e2f35ebd1c5f478ab85e18,0.876197,0x33cf9a202d7c75589a46c3830b3f15fbb641df0ca032...,0.0,3
