In [None]:
import datetime
import threading
import pandas as pd
from web3 import Web3
from pprint import pprint as pp
# 여러개의 CSV를 처리하기 위해서 사용
from multiprocessing import Process, Pool
# 한 CSV에서 탐색하기 위해서 사용
from threading import Thread, Lock
from secrete import *

base_URL = "https://mainnet.infura.io/v3/"
base_Input_PATH = "./input/"
base_Output_PATH = "./output/"
MAX_Chunk_Number = 362
MAX_Quiry = 100000
MAX_Thread_Quiry = 20000
INFURA_URL_Limit_List = {key: 0 for key in INFURA_URL_List}
used_urls = set()

In [None]:
def is_eoa(w3, address, lock):
    try:
        checksum_address = w3.to_checksum_address(address)
        uri = (w3.provider.endpoint_uri).replace(base_URL,"")
        with lock:
            INFURA_URL_Limit_List[uri] += 1
        return w3.eth.get_code(checksum_address) == b''
    except Exception as e:
        Exception(f"{uri} key is expired")
        

def find_available_url(lock):
    with lock:
        for url in INFURA_URL_List:
            if url not in used_urls and MAX_Quiry - INFURA_URL_Limit_List[url] >= MAX_Thread_Quiry:
                used_urls.add(url)
                return url

        raise Exception("All API Keys expired or reached limit")


def work_thread(ith, chunk_df, return_df, lock):
    url_INFURA = find_available_url(lock)
    w3 = Web3(Web3.HTTPProvider(base_URL + url_INFURA))
    print(f"{threading.current_thread().name} Start")
    print(f"Using API-Key:{url_INFURA}")

    # Convert the addresses to checksum format after handling NaN values
    chunk_df = chunk_df.copy()
    chunk_df['from_address'] = chunk_df['from_address'].apply(w3.to_checksum_address)
    chunk_df['to_address'] = chunk_df['to_address'].apply(w3.to_checksum_address)

    try:
        eoa_df = chunk_df[
            (chunk_df['from_address'].apply(lambda x: is_eoa(w3, x, lock))) & 
            (chunk_df['to_address'].apply(lambda x: is_eoa(w3, x, lock)))
        ]
        return_df.insert(ith, eoa_df)
        used_urls.remove(url_INFURA)
    except Exception as e:
        raise Exception(e)
            
def refine_INFURA(nProcess: str, file_Name: str):
    print(f"Process{nProcess} start reading {file_Name}")
    # 1 process 4 INFURA_URL
    file_Path = base_Input_PATH+file_Name
    chunk_df = pd.read_csv(file_Path)
    lock = Lock()

    # Drop rows with NaN or empty values in 'from_address' or 'to_address'
    chunk_df = chunk_df.dropna(subset=['from_address', 'to_address'])
    thread_List = []
    output_df_list = []

    addition_Count = 10000
    start = 0
    lock = Lock()

    for nThread in range(4):
        end = start + addition_Count

        thread_List.append(Thread(target=work_thread, args=(nThread, chunk_df.loc[start: end], output_df_list, lock)))
        start += addition_Count

        try:
            thread_List[nThread].start()
        except Exception as e:
            raise Exception(f"All API Key expired\nLast FileName: {file_Name}")

    print(f"All Thread Started at {datetime.datetime.now()}")

    for thread in thread_List:
        thread.join()

    print("All Thread Complete")
    eoa_df = output_df_list[0]
    for i in range(1, 4):
        eoa_df = pd.concat([eoa_df, output_df_list[i]])

    # Save the filtered dataframe to result_1.csv
    now = datetime.datetime.now().strftime("%Y.%m.%d")
    eoa_df.to_csv(f"{base_Output_PATH}reulst_{file_Name}({now}).csv", index=False)

    print(f"Process{nProcess} Done\nOutput: reulst_{file_Name}({now}).csv")
    pp(INFURA_URL_Limit_List)

In [None]:
if __name__ == "__main__":
    try:
        pp(INFURA_URL_Limit_List)
        for i in range(9, 11):
            file_name = "chunk_"+str(i)+".csv"
            nProcess = 1
            refine_INFURA(nProcess, file_name)
    except Exception as e:
        print(e)