In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from pandas import json_normalize
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
from datetime import datetime
import multiprocessing as mp
warnings.filterwarnings('ignore')

In [2]:
# 가중치 X
def cal_centrality(original_df):
    # 토큰 전송이 있는 거래만 필터링
    large_df = original_df[original_df['amount'] != 0.0]
    
    # 네트워크 생성 (유향 그래프)
    G = nx.from_pandas_edgelist(large_df, 'sender_address', 'receiver_address', edge_attr='amount', create_using=nx.DiGraph())
    
    # 중심성 계산
    centrality = {
        'degree_centrality': nx.degree_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G),
        'closeness_centrality': nx.closeness_centrality(G),
        'pagerank': nx.pagerank(G, alpha=0.85)
    }
    
    # 결과를 DataFrame으로 변환
    df = pd.DataFrame(centrality)
    return df

# 가중치 O
def cal_centrality_weight(original_df):
    # Filter transactions with non-zero amount
    large_df = original_df[original_df['amount'] != 0.0]
    
    # Create directed graph with weighted edges
    G = nx.from_pandas_edgelist(large_df, 'sender_address', 'receiver_address', edge_attr='amount', create_using=nx.DiGraph())
    
    # Calculate centrality measures with weights
    centrality = {
        'degree_centrality': nx.degree_centrality(G),
        'in_degree_centrality': nx.in_degree_centrality(G),
        'out_degree_centrality': nx.out_degree_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G, weight='amount'),
        'closeness_centrality': nx.closeness_centrality(G, distance='amount'),
        'pagerank': nx.pagerank(G, alpha=0.85, weight='amount')
    }
    
    # Convert results to DataFrame
    df = pd.DataFrame(centrality)
    return df

In [3]:
# 메인 함수
def calculate_daily_centrality(df):
    # timestamp를 datetime 형식으로 변환
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # 날짜별로 그룹화
    grouped = df.groupby(df['timestamp'].dt.date)
    
    # 결과를 저장할 빈 DataFrame 생성
    result_df = pd.DataFrame()
    
    # tqdm을 사용하여 진행 상황 표시
    for date, group in tqdm(grouped):
        # 해당 날짜의 데이터에 대해 centrality 계산
        daily_centrality = cal_centrality(group)
        
        # 날짜 정보 추가
        daily_centrality['date'] = date
        
        # 결과를 메인 DataFrame에 추가
        result_df = pd.concat([result_df, daily_centrality], ignore_index=False)
    
    return result_df

# 메인 함수 w 가중치
def calculate_daily_centrality_weight(df):
    # timestamp를 datetime 형식으로 변환
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # 날짜별로 그룹화
    grouped = df.groupby(df['timestamp'].dt.date)
    
    # 결과를 저장할 빈 DataFrame 생성
    result_df = pd.DataFrame()
    
    # tqdm을 사용하여 진행 상황 표시
    for date, group in tqdm(grouped):
        # 해당 날짜의 데이터에 대해 centrality 계산
        daily_centrality = cal_centrality_weight(group)
        
        # 날짜 정보 추가
        daily_centrality['date'] = date
        
        # 결과를 메인 DataFrame에 추가
        result_df = pd.concat([result_df, daily_centrality], ignore_index=False)
    
    return result_df

In [4]:
# Claude로 병렬처리로 바꾼 코드
def cal_centrality_weight(group):
    # Filter transactions with non-zero amount
    large_df = group[group['amount'] != 0.0]
    
    # Create directed graph with weighted edges
    G = nx.from_pandas_edgelist(large_df, 'sender_address', 'receiver_address', edge_attr='amount', create_using=nx.DiGraph())
    
    # Calculate centrality measures with weights
    centrality = {
        'degree_centrality': nx.degree_centrality(G),
        'in_degree_centrality': nx.in_degree_centrality(G),
        'out_degree_centrality': nx.out_degree_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G, weight='amount'),
        'closeness_centrality': nx.closeness_centrality(G, distance='amount'),
        'pagerank': nx.pagerank(G, alpha=0.85, weight='amount')
    }
    
    # Convert results to DataFrame
    df = pd.DataFrame(centrality)
    return df

def process_daily_centrality(args):
    date, group = args
    daily_centrality = cal_centrality_weight(group)
    daily_centrality['date'] = date
    return daily_centrality

def calculate_daily_centrality_weight(df):
    # timestamp를 datetime 형식으로 변환
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # 날짜별로 그룹화
    grouped = df.groupby(df['timestamp'].dt.date)
    
    # 멀티프로세싱 풀 생성
    pool = mp.Pool(processes=mp.cpu_count())
    
    # tqdm을 사용하여 진행 상황 표시
    results = list(tqdm(pool.imap(process_daily_centrality, grouped), total=len(grouped)))
    
    # 풀 종료
    pool.close()
    pool.join()
    
    # 결과를 하나의 DataFrame으로 결합
    result_df = pd.concat(results, ignore_index=False)
    
    return result_df

In [5]:
ls

9.11_SOL_2020-03-19_2020_12_31_calculate_componentes.csv
9.11_SOL_2020-03-26_2020-12-31_centrality.csv
Centrality.ipynb
Centrality_weight.ipynb
SOL_2020-03-19_2020-06-30_TRX.csv
SOL_2020-07-01_2020-09-30_TRX.csv
SOL_2020-10-01_2020-12-31_TRX.csv
SOL_Transfer_extraction_code.ipynb
calculate_components.ipynb
calculate_components2.ipynb


In [6]:
# 함수 사용
file_list = os.listdir()
file_list = [file for file in file_list if 'TRX' in file]
file_list.sort()

In [8]:
# weight를 가한 cent 계산중.
for file in file_list:
    df = pd.read_csv(f'{file}')  # 데이터 로드
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    result = calculate_daily_centrality_weight(df)
    result.to_csv(f'{file[:-8]}_centrality_weight.csv')  # 결과 저장

100%|███████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 149.95it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 90/90 [10:59<00:00,  7.33s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 92/92 [59:18<00:00, 38.68s/it]
