In [1]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema


# import local files if necessary
sys.path.append('..//src')
import training_data as td
importlib.reload(td)

# load dotenv
load_dotenv()

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')


def load_config(file_path='config.yaml'):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)
config = load_config()

def cw_filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df


#### Load the datasets

In [28]:
importlib.reload(td)
config = load_config()


# retrieve prices data
prices_df = td.retrieve_prices_data()

# fill gaps in prices data
prices_df,_ = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])
logger.info(f"Prices data shape: {prices_df.shape}")

# retrieve transfers data
transfers_df = td.retrieve_transfers_data(
    config['modeling']['training_period_start'],
    config['modeling']['modeling_period_start'],
    config['modeling']['modeling_period_end']
    )
logger.info(f"Transfers data shape: {transfers_df.shape}")

# compile profits_df
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning'])
logger.info(f"Profits data shape: {profits_df.shape}")


# identify sharks
shark_coins_df = td.classify_shark_coins(profits_df, config['modeling'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['modeling'])


# assess shark performance
shark_agg_performance_df,shark_wallets_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
shark_agg_performance_df

[09/Sep/2024 17:02:02] INFO [dreams_core.core.retrieve_prices_data:42] retrieved prices data with shape (120763, 3)
[09/Sep/2024 17:02:02] INFO [dreams_core.core.fill_prices_gaps:126] 382 coins had no gaps, 19 coins had gaps filled, and 38 coins were dropped due to large gaps.
[09/Sep/2024 17:02:02] INFO [dreams_core.core.<module>:10] Prices data shape: (110929, 3)
[09/Sep/2024 17:02:44] INFO [dreams_core.core.retrieve_transfers_data:414] retrieved transfers_df with shape (18608530, 5) after 41.5 seconds.
[09/Sep/2024 17:02:44] INFO [dreams_core.core.<module>:18] Transfers data shape: (18608530, 5)
[09/Sep/2024 17:02:44] INFO [dreams_core.core.prepare_profits_data:455] Preparing profits_df data...
[09/Sep/2024 17:03:25] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 15.59 seconds
[09/Sep/2024 17:03:32] INFO [dreams_core.core.clean_profits_df:674] Finished cleaning profits_df after 6.72 seconds. Removed 489 coin-wallet pairs that breached profit or

Unnamed: 0_level_0,count_wallets,median_inflows,median_profits,mean_inflows,min_inflows,max_inflows,percentile_25_inflows,percentile_75_inflows,mean_profits,min_profits,max_profits,percentile_25_profits,percentile_75_profits,total_inflows,total_profits,median_return,return_aggregate
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
False,111440,0.0,0.0,43619.0119088,0,402592498.787,0,1097.63044432,14931.4522836,-84562702.326,315400175.944,0,0.0,4860902687.12,1663961042.48,0.0,0.342315234348
True,448,31561.382051,783.389219077,1699612.51009,0,132772421.593,0,104639.45183,1878128.7092,-6942715.80409,548965068.98,0,32119.4762875,761426404.519,841401661.721,0.0248211316542,1.10503346972


In [29]:
shark_wallets_performance_df.sort_values('usd_inflows_cumulative',ascending=False).head(10)

Unnamed: 0,wallet_address,is_shark,usd_inflows_cumulative,profits_cumulative
16329,0x28c6c06298d514db089934071355e5743bf21d60,False,402592498.787,80299992.3823
90326,0xe2fe530c047f2d85298b07d9333c05737f1435fb,False,269297657.994,315400175.944
35411,0x58edf78281334335effa23101bbe3371b6a36a51,False,250229135.848,10770002.818
98922,0xf89d7b9c864f589bbf53a82105107622b35eaa40,False,177189780.308,241009249.468
35991,0x5a52e96bacdabb82fd05763e25335261b270efcb,False,168794743.192,45759316.2816
89134,0xdfd5293d8e347dfe59e90efd55b2956a1343963d,False,143315487.199,11045928.9722
5240,0x0d0707963952f2fba59dd06f2b425ace40b492fe,False,137376125.549,12959228.1507
99275,0xf977814e90da44bfa03b6295a0616a897441acec,True,132772421.593,548965068.98
13509,0x21a31ee1afc51d94c2efccaa2092ad1028285549,False,128841640.853,9163758.30236
85374,0xd6216fc19db775df9774a6e33526131da7d19a2c,True,117337172.67,3753172.08586


In [33]:
w = '0x28c6c06298d514db089934071355e5743bf21d60'
# transfers_df[transfers_df['wallet_address']==w]
profits_df[profits_df['wallet_address']==w].max()
# # shark_coins_df[shark_coins_df['wallet_address']==w]
# shark_wallets_df[shark_wallets_df['wallet_address']==w]

coin_id                         f7b278de-7fa7-4f87-ba5d-3eb2e57d933a
wallet_address            0x28c6c06298d514db089934071355e5743bf21d60
date                                             2024-03-31 00:00:00
net_transfers                                          606423282.325
balance                                                977855538.477
price                                                  4.56376579198
profits_change                                         3138643.67412
profits_cumulative                                     5590237.22904
usd_balance                                            30833820.4018
usd_net_transfers                                      19612255.6234
usd_inflows                                            19612255.6234
usd_inflows_cumulative                                  79992850.072
total_return                                           40.3629086599
dtype: object

In [14]:
# Filter transfers for the modeling period
modeling_period_transfers_df = transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start']) &
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

# Create profits_df for the modeling period
modeling_period_profits_df = td.prepare_profits_data(modeling_period_transfers_df, prices_df)
modeling_period_profits_df = td.calculate_wallet_profitability(modeling_period_profits_df)

# Retrieve profit state at the end of the period for each coin-wallet pair
modeling_end_profits_df = modeling_period_profits_df[
    modeling_period_profits_df['date'] == config['modeling']['modeling_period_end']
]

# Aggregate wallet-level metrics by summing usd inflows and profits
modeling_end_wallet_profits_df = modeling_end_profits_df.groupby('wallet_address')[
    ['usd_inflows_cumulative', 'profits_cumulative']
].sum()

# Classify wallets by shark status and compare their performance
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_wallet_profits_df,
    on='wallet_address',
    how='left'
)


[09/Sep/2024 16:53:44] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 4.03 seconds


In [19]:
modeling_end_wallet_profits_df.reset_index()

Unnamed: 0,wallet_address,usd_inflows_cumulative,profits_cumulative
0,0x0000000000000000000000000000000000000001,0.00132535833114,0.00105268487959
1,0x00000000000006b2ab6decbc6fc7ec6bd2fbc720,1.46705726742,-0.878846106571
2,0x000000000000521fa5ddd7611a38f9dc280cd2f2,0.000289556318089,-4.96377600215e-05
3,0x0000000000007f150bd6f54c40a34d7c3d5e9f56,117.990262157,18.2636326088
4,0x000000000000b6b4c2dc4f3f12159df0163f67e9,0.0392738983218,0.0901029569179
...,...,...,...
908869,zz7XM61sP3VWK8QmhjwgmVaYEyXqpvH54JRt3c2FnVx,200.21834898,-1.92102487523
908870,zzJxpd4Xr3Q8G53dewhyebdsE9gJGbcQGWCW9Tmfmea,52.6498393388,-12.2509873357
908871,zzWFN2FKBA51RkMRJsy1hhyjAheiwJrMSCdw9TQkern,526.32336112,-37.6587698153
908872,zzpbk1H74zNrNUttDwgyoYFC1Adyz8rxwAY52eqBtTJ,8.89761859291,5.3321738643


In [22]:
w = 'BmPLYFnk2wSSQZMd3TZoajeu62fF5fFrAkioi8vxZbHo'

# transfers_df[transfers_df['wallet_address']==w]
# profits_df[profits_df['wallet_address']==w]
# shark_coins_df[shark_coins_df['wallet_address']==w]
# shark_wallets_df[shark_wallets_df['wallet_address']==w]
shark_wallets_df[shark_wallets_df['wallet_address']==w]

Unnamed: 0,wallet_address,total_coins,shark_coins,shark_rate,is_shark


In [23]:

df = modeling_end_wallet_profits_df.reset_index()
df[df['wallet_address']==w]

Unnamed: 0,wallet_address,usd_inflows_cumulative,profits_cumulative
760091,BmPLYFnk2wSSQZMd3TZoajeu62fF5fFrAkioi8vxZbHo,640177680.248,-89447957.2596


In [26]:
shark_performance_df[shark_performance_df['wallet_address']==w]

Unnamed: 0,wallet_address,is_shark,usd_inflows_cumulative,profits_cumulative


In [47]:
# Classify wallets by shark status and compare their performance
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)

# Replace NaNs with 0s for wallets that had no inflows and profits in the modeling period
shark_performance_df['usd_inflows_cumulative'] = shark_performance_df['usd_inflows_cumulative'].fillna(0)
shark_performance_df['profits_cumulative'] = shark_performance_df['profits_cumulative'].fillna(0)


# Remove wallet_address for aggregation
shark_performance_df = shark_performance_df.groupby('is_shark').agg(
    count_wallets=('wallet_address', 'size'),
    median_inflows=('usd_inflows_cumulative', 'median'),
    median_profits=('profits_cumulative', 'median'),
    mean_inflows=('usd_inflows_cumulative', 'mean'),
    min_inflows=('usd_inflows_cumulative', 'min'),
    max_inflows=('usd_inflows_cumulative', 'max'),
    percentile_25_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x.dropna(), 25) if len(x) > 1 else np.nan),
    percentile_75_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x.dropna(), 75) if len(x) > 1 else np.nan),
    mean_profits=('profits_cumulative', 'mean'),
    min_profits=('profits_cumulative', 'min'),
    max_profits=('profits_cumulative', 'max'),
    percentile_25_profits=('profits_cumulative', lambda x: np.percentile(x.dropna(), 25) if len(x) > 1 else np.nan),
    percentile_75_profits=('profits_cumulative', lambda x: np.percentile(x.dropna(), 75) if len(x) > 1 else np.nan),
    total_inflows=('usd_inflows_cumulative', 'sum'),
    total_profits=('profits_cumulative', 'sum')
)

# Calculate median return
shark_performance_df['median_return'] = np.divide(
    shark_performance_df['median_profits'], 
    shark_performance_df['median_inflows'], 
    out=np.zeros_like(shark_performance_df['median_profits']), 
    where=shark_performance_df['median_inflows'] != 0
)

# Calculate aggregate return
shark_performance_df['return_aggregate'] = np.divide(
    shark_performance_df['total_profits'], 
    shark_performance_df['total_inflows'], 
    out=np.zeros_like(shark_performance_df['total_profits']), 
    where=shark_performance_df['total_inflows'] != 0
)

shark_performance_df

Unnamed: 0_level_0,count_wallets,median_inflows,median_profits,mean_inflows,min_inflows,max_inflows,percentile_25_inflows,percentile_75_inflows,mean_profits,min_profits,max_profits,percentile_25_profits,percentile_75_profits,total_inflows,total_profits,median_return,return_aggregate
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
False,111440,0.0,0.0,43619.0119088,0,402592498.787,0,1097.63044432,14931.4522836,-84562702.326,315400175.944,0,0.0,4860902687.12,1663961042.48,0.0,0.342315234348
True,448,31561.382051,783.389219077,1699612.51009,0,132772421.593,0,104639.45183,1878128.7092,-6942715.80409,548965068.98,0,32119.4762875,761426404.519,841401661.721,0.0248211316542,1.10503346972


### Sharkwork

In [3]:
importlib.reload(td)
config = load_config()

# create shark dfs
shark_coins_df = td.classify_shark_coins(profits_df, config['modeling'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['modeling'])

# assess shark performance
shark_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
shark_performance_df

[09/Sep/2024 15:14:04] INFO [dreams_core.core.classify_shark_coins:737] creation of shark_coins_df complete.


In [28]:
# Filter transfers for the modeling period
modeling_period_transfers_df = transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start']) &
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

# Create profits_df for the modeling period
modeling_period_profits_df = td.prepare_profits_data(modeling_period_transfers_df, prices_df)
modeling_period_profits_df = td.calculate_wallet_profitability(modeling_period_profits_df)

# Retrieve wallet-level profit state at the end of the period
modeling_end_profits_df = modeling_period_profits_df[
    modeling_period_profits_df['date'] == config['modeling']['modeling_period_end']
]
modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[
    ['usd_inflows_cumulative', 'profits_cumulative']
].sum()

# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df, 
    on='wallet_address', 
    how='left'
)

modeling_end_metrics_df

[09/Sep/2024 16:14:24] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 3.93 seconds


Unnamed: 0_level_0,usd_inflows_cumulative,profits_cumulative
wallet_address,Unnamed: 1_level_1,Unnamed: 2_level_1
0x0000000000000000000000000000000000000001,0.00132535833114,0.00105268487959
0x00000000000006b2ab6decbc6fc7ec6bd2fbc720,1.46705726742,-0.878846106571
0x000000000000521fa5ddd7611a38f9dc280cd2f2,0.000289556318089,-4.96377600215e-05
0x0000000000007f150bd6f54c40a34d7c3d5e9f56,117.990262157,18.2636326088
0x000000000000b6b4c2dc4f3f12159df0163f67e9,0.0392738983218,0.0901029569179
...,...,...
zz7XM61sP3VWK8QmhjwgmVaYEyXqpvH54JRt3c2FnVx,200.21834898,-1.92102487523
zzJxpd4Xr3Q8G53dewhyebdsE9gJGbcQGWCW9Tmfmea,52.6498393388,-12.2509873357
zzWFN2FKBA51RkMRJsy1hhyjAheiwJrMSCdw9TQkern,526.32336112,-37.6587698153
zzpbk1H74zNrNUttDwgyoYFC1Adyz8rxwAY52eqBtTJ,8.89761859291,5.3321738643


In [42]:
w = '0x00000000000a78c8727b6ae386f004e7e37a4875'

# modeling_period_transfers_df[modeling_period_transfers_df['wallet_address']==w]
modeling_period_profits_df[modeling_period_profits_df['wallet_address']==w]

Unnamed: 0,coin_id,wallet_address,date,net_transfers,balance,price,profits_change,profits_cumulative,usd_balance,usd_net_transfers,usd_inflows,usd_inflows_cumulative,total_return


In [38]:
print(modeling_end_metrics_df.shape)
modeling_end_metrics_df.isna().sum()

(908874, 2)


usd_inflows_cumulative    0
profits_cumulative        0
dtype: int64

In [36]:
# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df, 
    on='wallet_address', 
    how='left'
)
shark_performance_df



Unnamed: 0,wallet_address,is_shark,usd_inflows_cumulative,profits_cumulative
0,0x000000000000c35e4364deffa9059dbadaefd4f8,False,,
1,0x00000000000a78c8727b6ae386f004e7e37a4875,False,,
2,0x000000000077cdff30a1b5d7c12f3587f921e519,False,,
3,0x000000000085cd7bd617419ce5ff21c722ab2d38,False,,
4,0x0000000000a6f0986c92cf1ec4d2e77afbe1466d,False,168559.586596,381965.986102
...,...,...,...,...
111883,zhVtHwNE8QGtTzN6tX7Ed24wDLKUuBCj7U5gMqxFD82,False,17400.5910248,-2321.39329356
111884,zsZrkzcUBNzDZFHasSmBQ4gb9jYwFa3rLqRuTFsZ5Qm,False,,
111885,zuqaoapDTJhShSV4Us9AwTh6ibPWksTLAuTgmfZ2oTA,False,,
111886,zxVSE7ToQZbA3c7tha48T7cPbVDtui9NCXZNKXFqtjr,False,,


In [32]:
w = '0x00000000000a78c8727b6ae386f004e7e37a4875'

transfers_df[transfers_df['wallet_address']==w]

Unnamed: 0,coin_id,wallet_address,date,net_transfers,balance
5930153,3f4da2ea-7bb0-4707-a329-80ea98797deb,0x00000000000a78c8727b6ae386f004e7e37a4875,2024-01-20,86027.7756137,86027.7756137
5930154,3f4da2ea-7bb0-4707-a329-80ea98797deb,0x00000000000a78c8727b6ae386f004e7e37a4875,2024-01-21,-86027.7756137,0.0
5930155,3f4da2ea-7bb0-4707-a329-80ea98797deb,0x00000000000a78c8727b6ae386f004e7e37a4875,2024-02-29,0.0,0.0
5930156,3f4da2ea-7bb0-4707-a329-80ea98797deb,0x00000000000a78c8727b6ae386f004e7e37a4875,2024-03-01,0.0,0.0
5930157,3f4da2ea-7bb0-4707-a329-80ea98797deb,0x00000000000a78c8727b6ae386f004e7e37a4875,2024-03-31,0.0,0.0


In [30]:
shark_performance_df.isna().sum()

wallet_address                0
is_shark                      0
usd_inflows_cumulative    79081
profits_cumulative        79081
dtype: int64

In [20]:
# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df, 
    on='wallet_address', 
    how='left'
)

shark_performance_df

Unnamed: 0,wallet_address,is_shark,usd_inflows_cumulative,profits_cumulative
0,0x000000000000c35e4364deffa9059dbadaefd4f8,False,,
1,0x00000000000a78c8727b6ae386f004e7e37a4875,False,,
2,0x000000000077cdff30a1b5d7c12f3587f921e519,False,,
3,0x000000000085cd7bd617419ce5ff21c722ab2d38,False,,
4,0x0000000000a6f0986c92cf1ec4d2e77afbe1466d,False,168559.586596,381965.986102
...,...,...,...,...
111883,zhVtHwNE8QGtTzN6tX7Ed24wDLKUuBCj7U5gMqxFD82,False,17400.5910248,-2321.39329356
111884,zsZrkzcUBNzDZFHasSmBQ4gb9jYwFa3rLqRuTFsZ5Qm,False,,
111885,zuqaoapDTJhShSV4Us9AwTh6ibPWksTLAuTgmfZ2oTA,False,,
111886,zxVSE7ToQZbA3c7tha48T7cPbVDtui9NCXZNKXFqtjr,False,,


In [18]:
print(modeling_end_metrics_df.shape)
modeling_end_metrics_df.isna().sum()

(908874, 2)


usd_inflows_cumulative    0
profits_cumulative        0
dtype: int64

In [9]:
# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df, 
    on='wallet_address', 
    how='left'
)

shark_performance_df.shape

(111888, 4)

In [11]:
shark_performance_df.head()

Unnamed: 0_level_0,count_wallets,median_inflows,median_profits,mean_inflows,min_inflows,max_inflows,percentile_25_inflows,percentile_75_inflows,mean_profits,min_profits,max_profits,percentile_25_profits,percentile_75_profits,total_inflows,total_profits
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
False,111440,9657.96140487,262.539596629,149690.594867,8.02810174282e-16,402592498.787,,,51241.3710616,-84562702.326,315400175.944,,,4860902687.12,1663961042.48
True,448,61165.4009611,12949.3049988,2279719.77401,2.10705042778e-12,132772421.593,,,2519166.65186,-6942715.80409,548965068.98,,,761426404.519,841401661.721


In [12]:

# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df, 
    on='wallet_address', 
    how='left'
)

# # Remove wallet_address for aggregation
# shark_performance_df.groupby('is_shark').agg(
#     count_wallets=('wallet_address', 'size'),
#     median_inflows=('usd_inflows_cumulative', 'median'),
#     median_profits=('profits_cumulative', 'median'),
#     mean_inflows=('usd_inflows_cumulative', 'mean'),
#     min_inflows=('usd_inflows_cumulative', 'min'),
#     max_inflows=('usd_inflows_cumulative', 'max'),
#     percentile_25_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x, 25)),
#     percentile_75_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x, 75)),
#     mean_profits=('profits_cumulative', 'mean'),
#     min_profits=('profits_cumulative', 'min'),
#     max_profits=('profits_cumulative', 'max'),
#     percentile_25_profits=('profits_cumulative', lambda x: np.percentile(x, 25)),
#     percentile_75_profits=('profits_cumulative', lambda x: np.percentile(x, 75)),
#     total_inflows=('usd_inflows_cumulative', 'sum'),
#     total_profits=('profits_cumulative', 'sum')
# )

# # # Calculate aggregate return
# # shark_performance_df['return_aggregate'] = np.divide(
# #     shark_performance_df['total_profits'], 
# #     shark_performance_df['total_inflows'], 
# #     out=np.zeros_like(shark_performance_df['total_profits']), 
# #     where=shark_performance_df['total_inflows'] != 0
# # )

shark_performance_df.head()

Unnamed: 0,wallet_address,is_shark,usd_inflows_cumulative,profits_cumulative
0,0x000000000000c35e4364deffa9059dbadaefd4f8,False,,
1,0x00000000000a78c8727b6ae386f004e7e37a4875,False,,
2,0x000000000077cdff30a1b5d7c12f3587f921e519,False,,
3,0x000000000085cd7bd617419ce5ff21c722ab2d38,False,,
4,0x0000000000a6f0986c92cf1ec4d2e77afbe1466d,False,168559.586596,381965.986102


In [34]:
# calculate total inflows and total profits
# modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_profits_df, 
    on='wallet_address', 
    how='left'
)
shark_performance_df = shark_performance_df.groupby('is_shark').sum()
shark_performance_df['return_aggregate'] = shark_performance_df['profits_cumulative'] / shark_performance_df['usd_inflows_cumulative']

Unnamed: 0_level_0,wallet_address,usd_inflows_cumulative,profits_cumulative
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0x000000000000c35e4364deffa9059dbadaefd4f80x00...,19352465806.3,12626288983.6
True,0x000000d40b595b94918a28b27d1e2c66f43a51d30x00...,2524867560.5,2968641511.65


In [35]:
# calculate total inflows and total profits
# modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_profits_df, 
    on='wallet_address', 
    how='left'
)
shark_performance_df.groupby('is_shark').sum()

In [27]:
w = '0x0000000000000000000000000000000000000002'
profits_df[profits_df['wallet_address'==w]]

KeyError: False

In [20]:
print(len(shark_wallets_df['wallet_address']))
len(shark_wallets_df['wallet_address'].drop_duplicates())

111888


111888

In [17]:

modeling_period_profits_df.head()

Unnamed: 0,coin_id,wallet_address,date,net_transfers,balance,price,profits_change,profits_cumulative,usd_balance,usd_net_transfers,usd_inflows,usd_inflows_cumulative,total_return
2,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000000009e50a7ddb7a7b0e2ee6604fd120e49,2024-03-31,0,0.002920774,27.5991647724,0.0587838849452,0.0587841752714,0.0806109228888,0,0,0.0218267476174,2.69321734515
4,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x000000004685666c7653cc148f566f0511901b37,2024-03-31,0,2.38776413,27.5991647724,60.6366406422,60.6366406422,65.9002956614,0,0,5.26365501921,11.5198736279
6,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000000a991c429ee2ec6df19d40fe0c80088b8,2024-03-31,0,216.04104,27.5991647724,5486.30526016,5486.30526016,5962.55226055,0,0,476.24700039,11.5198736279
10,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000023c10000eecb940000b914cdfd76cc83d1,2024-03-31,0,37.55949184,27.5991647724,91.1447501104,267.338970969,1036.61060406,0,0,769.271633089,0.347522200832
12,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000047bb99ea4d791bb749d970de71ee0b1a34,2024-03-31,0,12.498298146,27.5991647724,323.420276906,323.420276906,344.942589906,0,0,21.5223129995,15.0272081311


In [None]:
def filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df

c=

filter_df(modeling_period_profits_df,c,w)

In [13]:
transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start'])
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

TypeError: 'Series' object is not callable

In [None]:
# Calculate and clean profits data for the modeling period only
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)

modeling_period_profits_df 

In [58]:
importlib.reload(td)
config = load_config()

def calculate_modeling_period_profitability(profits_df, training_period_end, modeling_period_end):
    """
    Calculate profitability during the modeling period by finding the difference in total_profits between
    the end of the training period and the end of the modeling period.
    
    Parameters:
        profits_df (DataFrame): DataFrame containing wallet profitability data.
        training_period_end (str): End date of the training period.
        modeling_period_end (str): End date of the modeling period.
        
    Returns:
        modeling_period_profits_df (DataFrame): DataFrame with wallet_address, coin_id, and profitability change.
    """
    # Step 1: Filter for the end of the training period and the modeling period
    training_profits_df = profits_df[profits_df['date'] == training_period_end][['wallet_address', 'coin_id', 'profits_cumulative']]
    modeling_profits_df = profits_df[profits_df['date'] == modeling_period_end][['wallet_address', 'coin_id', 'profits_cumulative']]

    # Step 2: Merge the two DataFrames on wallet_address and coin_id
    merged_profits_df = pd.merge(
        training_profits_df,
        modeling_profits_df,
        on=['wallet_address', 'coin_id'],
        suffixes=('_training', '_modeling')
    )

    # Step 3: Calculate the difference in total profits between the two periods
    merged_profits_df['profit_during_modeling'] = merged_profits_df['profits_cumulative_modeling'] - merged_profits_df['profits_cumulative_training']

    return merged_profits_df[['wallet_address', 'coin_id', 'profit_during_modeling']]


training_period_end = config['modeling']['training_period_end']
modeling_period_end = config['modeling']['modeling_period_end']
modeling_period_profits_df = calculate_modeling_period_profitability(profits_df, training_period_end, modeling_period_end)
modeling_period_profits_df

Unnamed: 0,wallet_address,coin_id,profit_during_modeling
0,0x000000000005af2ddc1a93a03e9b7014064d3b8d,0b9d343d-4e25-4d22-b49c-fa17509a0333,-5.17081809181e-08
1,0x000000000035b5e5ad9019092c665357240f594e,0b9d343d-4e25-4d22-b49c-fa17509a0333,-6.72206351936e-07
2,0x00000000003b3cc22af3ae1eac0440bcee416b40,0b9d343d-4e25-4d22-b49c-fa17509a0333,-0.00262284576889
3,0x00000000009726632680fb29d3f7a9734e3010e2,0b9d343d-4e25-4d22-b49c-fa17509a0333,-234.011423626
4,0x00000000009e50a7ddb7a7b0e2ee6604fd120e49,0b9d343d-4e25-4d22-b49c-fa17509a0333,-0.233809693934
...,...,...,...
1888161,0xffe91fda27c3d39663d3adc16d3ac4bce17a1f0a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,534.286148136
1888162,0xffed43322e064fce09bef0e949701da17f067569,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,0
1888163,0xfff07d6cb3d1e67563f3bfa335c94db34f59c0a4,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,82.2364102505
1888164,0xfff2246f89868eb0e06e5a28a84ff53d2652266a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,75.0893117248


In [6]:
def assess_megashark_modeling_period_performance(modeling_period_profits_df, shark_wallets_df):
    """
    Assess the performance of megasharks during the modeling period by comparing their profitability changes
    to non-megasharks.
    
    Parameters:
        modeling_period_profits_df (DataFrame): DataFrame containing the change in profitability during the modeling period.
        shark_wallets_df (DataFrame): DataFrame with wallets classified as megasharks.
        
    Returns:
        megasharks_performance_df (DataFrame): DataFrame comparing megashark performance vs. non-megasharks.
    """
    # Step 1: Filter for megasharks and non-megasharks
    megasharks_df = shark_wallets_df[shark_wallets_df['is_megashark']]
    non_megasharks_df = shark_wallets_df[~shark_wallets_df['is_megashark']]

    # Step 2: Merge megasharks with modeling period profits data
    megasharks_performance_df = pd.merge(
        megasharks_df[['wallet_address']],
        modeling_period_profits_df,
        on='wallet_address',
        how='inner'
    )

    # Step 3: Merge non-megasharks with modeling period profits data
    non_megasharks_performance_df = pd.merge(
        non_megasharks_df[['wallet_address']],
        modeling_period_profits_df,
        on='wallet_address',
        how='inner'
    )

    # Step 4: Calculate average performance for megasharks and non-megasharks
    megasharks_avg_profit_change = megasharks_performance_df['profit_during_modeling'].mean()
    non_megasharks_avg_profit_change = non_megasharks_performance_df['profit_during_modeling'].mean()

    # Step 5: Create a comparison DataFrame
    performance_comparison_df = pd.DataFrame({
        'group': ['megasharks', 'non-megasharks'],
        'avg_profit_change': [megasharks_avg_profit_change, non_megasharks_avg_profit_change]
    })

    return performance_comparison_df

performance_comparison_df = assess_megashark_modeling_period_performance(modeling_period_profits_df, shark_wallets_df)
performance_comparison_df

NameError: name 'modeling_period_profits_df' is not defined

In [None]:
def calculate_modeling_period_rate_of_return(profits_df, training_period_end, modeling_period_end):
    """
    Calculate the rate of return (ROR) during the modeling period for each wallet-coin pair.

    Parameters:
        profits_df (DataFrame): DataFrame containing wallet balances, inflows, and profits data.
        training_period_end (str): End date of the training period.
        modeling_period_end (str): End date of the modeling period.

    Returns:
        ror_df (DataFrame): DataFrame with wallet_address, coin_id, and rate of return (ROR).
    """
    # Step 1: Filter for the end of the training period and the end of the modeling period
    training_profits_df = profits_df[profits_df['date'] == training_period_end][['wallet_address', 'coin_id', 'balance', 'usd_inflows_cumulative']]
    modeling_profits_df = profits_df[profits_df['date'] == modeling_period_end][['wallet_address', 'coin_id', 'balance', 'price']]

    # Step 2: Merge the two DataFrames on wallet_address and coin_id
    merged_profits_df = pd.merge(
        training_profits_df,
        modeling_profits_df,
        on=['wallet_address', 'coin_id'],
        suffixes=('_training', '_modeling')
    )

    # Step 3: Calculate the total inflows, outflows, and price changes during the modeling period
    merged_profits_df['net_inflows'] = merged_profits_df['usd_inflows_cumulative']  # Assumed to be cumulative inflows
    merged_profits_df['final_value'] = merged_profits_df['balance_modeling'] * merged_profits_df['price']

    # Step 4: Calculate net investment (initial balance + inflows)
    merged_profits_df['net_investment'] = merged_profits_df['balance_training'] + merged_profits_df['net_inflows']

    # Step 5: Calculate the rate of return (ROR)
    merged_profits_df['rate_of_return'] = ((merged_profits_df['final_value'] - merged_profits_df['net_investment']) / merged_profits_df['net_investment']) * 100

    return merged_profits_df[['wallet_address', 'coin_id', 'rate_of_return']]


Unnamed: 0,wallet_address,coin_id,rate_of_return
0,0x000000000005af2ddc1a93a03e9b7014064d3b8d,0b9d343d-4e25-4d22-b49c-fa17509a0333,173.483384026
1,0x000000000035b5e5ad9019092c665357240f594e,0b9d343d-4e25-4d22-b49c-fa17509a0333,173.483384026
2,0x00000000003b3cc22af3ae1eac0440bcee416b40,0b9d343d-4e25-4d22-b49c-fa17509a0333,-71.3555514489
3,0x00000000009726632680fb29d3f7a9734e3010e2,0b9d343d-4e25-4d22-b49c-fa17509a0333,-100
4,0x00000000009e50a7ddb7a7b0e2ee6604fd120e49,0b9d343d-4e25-4d22-b49c-fa17509a0333,-21.8428128321
...,...,...,...
1888161,0xffe91fda27c3d39663d3adc16d3ac4bce17a1f0a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,-98.9627594542
1888162,0xffed43322e064fce09bef0e949701da17f067569,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,
1888163,0xfff07d6cb3d1e67563f3bfa335c94db34f59c0a4,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,-98.9632455914
1888164,0xfff2246f89868eb0e06e5a28a84ff53d2652266a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,-98.9651619607
