### Collecting and preprocessing transactions table from the Etherscan API requests 

In [1]:
TIMEZONE = 3 # this is for converting timestamp to UTC+0 datetime
fold = 0
df1 = pd.read_pickle(f'data/external/v2/resp/df{fold}.pkl')
print(df1.shape)

# function to get all transactions
def get_trnx(resp, wallet):
    # read response to table
    out = json.loads(resp.text)['result']
    if out == 'Max rate limit reached':
        return pd.DataFrame()
    df = pd.DataFrame(out)
    if len(df) == 0:
        return df
    feats = ['timeStamp','from','to','value','isError']
    df = df[feats]
    # preprocessing
    df['from'] = df['from'].str.lower()
    df['to']   = df['to'].str.lower()
    df.insert(1,'wallet',wallet)
    df.insert(2,'wallet_add',df['to'])
    df.loc[df.wallet_add == wallet, 'wallet_add'] = df.loc[df.wallet_add == wallet, 'from'] 
    df.insert(3,'direction',1)
    df.loc[df['from'] == wallet, 'direction'] = 1
    df.loc[df['to'] == wallet, 'direction'] = -1
    del df['from']
    del df['to']
    df = df.rename(columns = {
        'timeStamp': 'timestamp',
        'isError': 'is_error',
    })
    # filtering
    # df = df[df.is_error == '0']
    # df = df[df['from'] != df['to']]
    # del df['is_error']
    return df

# main loop
chunk_num = 5
df1['chunk'] = [i % chunk_num for i in range(len(df1))]
for chunk in range(chunk_num):
    df11 = df1[df1.chunk == chunk]
    df = pd.DataFrame()
    for wallet,resp in tqdm(zip(df11.wallet, df11.resp)):
        df0 = get_trnx(resp, wallet)
        df = pd.concat([df,df0])
    
    # features converting
    df['value'] = df.value.astype(np.float)/10**18
    df['timestamp'] = df.timestamp.astype(int)
    df['is_error'] = df.is_error.astype(int)
    df['dt'] = df.timestamp.map(lambda x: dt.fromtimestamp(x-TIMEZONE*3600))
    
    # merge with voting addreses  (wallet and wallet_add)
    df2 = pd.read_csv('data/data_rounds.csv')
    
    df3 = (df2.groupby(['source_wallet','round_name']).size().unstack() > 0).astype(int)
    cols1 = df3.columns
    cols2 = ['flg_'+c for c in cols1]
    df3.columns = cols2
    df3.reset_index(names = 'wallet', inplace = True)
    df = df.merge(df3, how = 'left')
    for f in cols2:
        df[f] = df[f].fillna(0).astype(int)
    
    df3 = (df2.groupby(['source_wallet','round_name']).size().unstack() > 0).astype(int)
    cols1 = df3.columns
    cols2 = ['flg_add_'+c for c in cols1]
    df3.columns = cols2
    df3.reset_index(names = 'wallet_add', inplace = True)
    df = df.merge(df3, how = 'left')
    for f in cols2:
        df[f] = df[f].fillna(0).astype(int)
        
    # save to file
    df.to_csv(f'data/external/v2/trnx/df{fold}{chunk}.csv', index = False)
    print(df.shape)

21it [00:00, 187.35it/s]

(20347, 2)


4070it [02:04, 32.71it/s]
22it [00:00, 201.88it/s]

(434536, 19)


4070it [01:52, 36.04it/s]
22it [00:00, 194.65it/s]

(383823, 19)


4069it [02:02, 33.29it/s]
14it [00:00, 131.03it/s]

(414649, 19)


4069it [01:50, 36.70it/s]
11it [00:00, 102.06it/s]

(373370, 19)


4069it [02:12, 30.68it/s]


(471802, 19)
