In [1]:
import pandas as pd
import networkx as nx
from datetime import datetime, timedelta
from tqdm import tqdm

In [2]:
ls

Centrality.ipynb                          SOL_2020-10-01_2020-12-31_TRX.csv
SOL_2020-03-19_2020-06-30_TRX.csv         SOL_Transfer_extraction_code.ipynb
SOL_2020-03-19_2020-06-30_centrality.csv  calculate_components.ipynb
SOL_2020-07-01_2020-09-30_TRX.csv         calculate_components2.ipynb


In [11]:
file_name = 'SOL_2020-10-01_2020-12-31_TRX.csv'

df = pd.read_csv(file_name)
df

Unnamed: 0,timestamp,block_height,sender_address,receiver_address,amount,trx_hash,trx_gasPrice
0,2020-10-01 00:00:26,38521310,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,3500.0000,C4Ej7RBWkzEuGZuh15szWUjHStDm6AJaQB5uM3NNdPdW,0.000005
1,2020-10-01 00:00:26,38521311,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,1000.0000,9Mtexf1LnkJVw46aTNuevsFdMW4q8nPqC57PpmrN5brB,0.000005
2,2020-10-01 00:00:27,38521312,BzZeRXC4v3fqddMBYKLSP4Awswp5eSP3qUy9y9HsgXrG,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,300.0000,4JS4YcX4p81w51DjCHicERLnupD2Ek2FWpnvbrDwA7tH,0.000005
3,2020-10-01 00:00:27,38521313,BzZeRXC4v3fqddMBYKLSP4Awswp5eSP3qUy9y9HsgXrG,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,4000.0000,5xcBTif5PWsiVpEkXNHSsZPnLhg77apt3kcXJ7LoqYvg,0.000005
4,2020-10-01 00:00:28,38521315,6gfi6GSjrhqc5xDLtDkVrTR61Hi7GMNPmJknxvbqzb1x,6Mk9P4C77EskX51ov95z6RnFZ8jCpn7tbDVfeeTQbr28,1234.5886,HPfPhCfz9PpmydEpHD9eh3LvYDscKaHXVrUCJ313sbq1,0.000010
...,...,...,...,...,...,...,...
4513120,2020-12-31 23:59:17,58456725,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,2000.0000,5zHfWZc7YJrxCvfp2qGuaZsCfMSWhdctoujDWafpWyTS,0.000005
4513121,2020-12-31 23:59:17,58456725,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,2000.0000,5zHfWZc7YJrxCvfp2qGuaZsCfMSWhdctoujDWafpWyTS,0.000005
4513122,2020-12-31 23:59:44,58456788,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,FrDcCb3DS3nFJ41jG3kvnsKuwjJf8qmWa89QcL4Jt4MA,1800.0000,CX5mVZBsa5N3xPomE9kJ7Lu7avk9nmAfJYuULhDJJ6CL,0.000005
4513123,2020-12-31 23:59:48,58456799,8ZLcmKPxQ3151kNB5yXXYbFHnKLdkoLsEP4FtUWXgWkv,GKgXWmK8bC9CjeHrpbGR6Tv9fNQYVJsLT5QpuaHzfoHH,300.0000,Aco6J6UVLGqPhcdKHQsWN6pzJwN1owHu17GcYjn9HXj,0.000005


In [12]:
# timestamp를 datetime으로 변환
df['timestamp'] = pd.to_datetime(df['timestamp'])

# 시작일과 종료일 설정
start_date = df['timestamp'].min().date()
end_date = df['timestamp'].max().date()

# 결과를 저장할 리스트
results = []

# 진행 상황을 표시할 tqdm 객체 생성
total_days = (end_date - start_date).days + 1
pbar = tqdm(total=total_days, desc="Processing days")

# 각 날짜별로 계산
current_date = start_date
while current_date <= end_date:
    # 해당 날짜의 데이터만 필터링
    daily_df = df[df['timestamp'].dt.date == current_date]
    
    # 그래프 생성 (amount를 가중치로 사용)
    G = nx.from_pandas_edgelist(daily_df, 'sender_address', 'receiver_address', edge_attr='amount', create_using=nx.DiGraph())
    
    # 무방향 그래프로 변환 : Transitivity, Global Coeff, Articulation points을 계산하기 위해서
    G_undirected = G.to_undirected()
    
    # 그래프가 비어있지 않은 경우에만 계산
    if G.number_of_nodes() > 0:
        # 각 지표 계산
        reciprocity = nx.reciprocity(G)
        try:
            assortativity = nx.degree_assortativity_coefficient(G)
        except:
            assortativity = None  # 에러 발생 시 None으로 설정
        
        strongly_connected = list(nx.strongly_connected_components(G))
        weakly_connected = list(nx.weakly_connected_components(G))
        
        scc_count = len(strongly_connected)
        wcc_count = len(weakly_connected)
        
        # Largest SCC 정보
        if strongly_connected:
            largest_scc = max(strongly_connected, key=len)
            largest_scc_subgraph = G.subgraph(largest_scc)
            largest_scc_nodes = len(largest_scc)
            largest_scc_edges = largest_scc_subgraph.number_of_edges()
        else:
            largest_scc_size = largest_scc_nodes = largest_scc_edges = 0
        
        # Largest WCC 정보
        if weakly_connected:
            largest_wcc = max(weakly_connected, key=len)
            largest_wcc_subgraph = G.subgraph(largest_wcc)
            largest_wcc_nodes = len(largest_wcc)
            largest_wcc_edges = largest_wcc_subgraph.number_of_edges()
        else:
            largest_wcc_size = largest_wcc_nodes = largest_wcc_edges = 0

        # Transitivity
        transitivity = nx.transitivity(G_undirected)
        
        # Global clustering coefficient
        global_clustering = nx.average_clustering(G_undirected)
        
        # Articulation points
        articulation_points = list(nx.articulation_points(G_undirected))
        num_articulation_points = len(articulation_points)
        
        
        # 결과 저장
        results.append({
            'date': current_date,
            'reciprocity': reciprocity,
            'assortativity': assortativity,
            'strongly_connected_components': scc_count,
            'largest_scc_nodes': largest_scc_nodes,
            'largest_scc_edges': largest_scc_edges,
            'weakly_connected_components': wcc_count,
            'largest_wcc_nodes': largest_wcc_nodes,
            'largest_wcc_edges': largest_wcc_edges,
            'transitivity': transitivity,
            'global_clustering_coefficient': global_clustering,
            'num_articulation_points': num_articulation_points
        })
    
    # 다음 날짜로 이동
    current_date += timedelta(days=1)

    # 진행 상황 업데이트
    pbar.update(1)

# 진행 바 닫기
pbar.close()

# 결과를 데이터프레임으로 변환
result_df = pd.DataFrame(results)

# 결과 출력
print(result_df)

Processing days: 100%|███████████████████████████████████████████████████████████████| 92/92 [07:38<00:00,  4.98s/it]

          date  reciprocity  assortativity  strongly_connected_components  \
0   2020-10-01     0.994477      -0.916547                             56   
1   2020-10-02     0.988664      -0.782937                             56   
2   2020-10-03     0.993743      -0.793209                             45   
3   2020-10-04     0.998028      -0.811411                             20   
4   2020-10-05     0.979138      -0.771269                            122   
..         ...          ...            ...                            ...   
87  2020-12-27     0.412523      -0.636143                            217   
88  2020-12-28     0.395237      -0.629707                            179   
89  2020-12-29     0.427402      -0.626851                            175   
90  2020-12-30     0.446796      -0.647241                            208   
91  2020-12-31     0.525359      -0.661491                            250   

    largest_scc_nodes  largest_scc_edges  weakly_connected_components  \
0 




In [13]:
result_df

Unnamed: 0,date,reciprocity,assortativity,strongly_connected_components,largest_scc_nodes,largest_scc_edges,weakly_connected_components,largest_wcc_nodes,largest_wcc_edges,transitivity,global_clustering_coefficient,num_articulation_points
0,2020-10-01,0.994477,-0.916547,56,2945,5890,7,2945,5890,4.866943e-07,0.000414,21
1,2020-10-02,0.988664,-0.782937,56,1905,3810,3,1957,3865,0.000000e+00,0.000000,27
2,2020-10-03,0.993743,-0.793209,45,2473,4946,7,2476,4950,8.936005e-07,0.000909,15
3,2020-10-04,0.998028,-0.811411,20,2028,4056,9,2029,4057,0.000000e+00,0.000000,6
4,2020-10-05,0.979138,-0.771269,122,2159,4318,5,2178,4344,2.338511e-06,0.000781,16
...,...,...,...,...,...,...,...,...,...,...,...,...
87,2020-12-27,0.412523,-0.636143,217,7115,14832,8,7291,15011,1.012500e-07,0.000196,20
88,2020-12-28,0.395237,-0.629707,179,5641,11667,14,5788,11823,5.923076e-04,0.625710,21
89,2020-12-29,0.427402,-0.626851,175,8715,18282,10,8868,18438,7.168616e-08,0.000115,22
90,2020-12-30,0.446796,-0.647241,208,8015,16878,10,8194,17082,8.082340e-08,0.000246,23


In [14]:
result_df.to_csv(f'{file_name[:-8]}_1.csv')

In [7]:
# 2021년 1차 계산항목 계산중