In [3]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema


# import local files if necessary
sys.path.append('..//src')
from utils import load_config, cw_filter_df
import training_data as td
importlib.reload(td)

# load dotenv
load_dotenv()

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')


def load_config(file_path='config.yaml'):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)
config = load_config()

def cw_filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df


ModuleNotFoundError: No module named 'src'

#### Load the datasets

In [2]:
importlib.reload(td)
config = load_config()


# retrieve prices data
prices_df = td.retrieve_prices_data()

# fill gaps in prices data
prices_df,_ = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])
logger.info(f"Prices data shape: {prices_df.shape}")

# retrieve transfers data
transfers_df = td.retrieve_transfers_data(
    config['training_data']['training_period_start'],
    config['training_data']['modeling_period_start'],
    config['training_data']['modeling_period_end']
    )
logger.info(f"Transfers data shape: {transfers_df.shape}")

# compile profits_df
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning'])
logger.info(f"Profits data shape: {profits_df.shape}")


# identify sharks
shark_coins_df = td.classify_shark_coins(profits_df, config['training_data'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['training_data'])


# assess shark performance
shark_agg_performance_df,shark_wallets_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
shark_agg_performance_df

[10/Sep/2024 11:39:24] INFO [dreams_core.core.retrieve_prices_data:42] retrieved prices data with shape (120763, 3)
[10/Sep/2024 11:39:25] INFO [dreams_core.core.fill_prices_gaps:126] 382 coins had no gaps, 19 coins had gaps filled, and 38 coins were dropped due to large gaps.
[10/Sep/2024 11:39:25] INFO [dreams_core.core.<module>:10] Prices data shape: (110929, 3)
[10/Sep/2024 11:39:53] INFO [dreams_core.core.retrieve_transfers_data:414] retrieved transfers_df with shape (21636858, 5) after 28.4 seconds.
[10/Sep/2024 11:39:53] INFO [dreams_core.core.<module>:18] Transfers data shape: (21636858, 5)
[10/Sep/2024 11:39:53] INFO [dreams_core.core.prepare_profits_data:455] Preparing profits_df data...
[10/Sep/2024 11:40:44] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 19.12 seconds
[10/Sep/2024 11:40:56] INFO [dreams_core.core.clean_profits_df:686] Finished cleaning profits_df after 12.41 seconds.
[10/Sep/2024 11:40:56] INFO [dreams_core.core.<modul

Unnamed: 0_level_0,count_wallets,median_inflows,median_profits,mean_inflows,min_inflows,max_inflows,percentile_25_inflows,percentile_75_inflows,mean_profits,min_profits,...,median_return,return_aggregate,nonzero_count_wallets,nonzero_median_inflows,nonzero_median_profits,nonzero_percentile_25_inflows,nonzero_percentile_75_inflows,nonzero_percentile_25_profits,nonzero_percentile_75_profits,nonzero_median_return
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,101982,0.0,0,15243.8295517,0,25503660.8657,0,0.0,-12057.740522,-545140012.694,...,0,-0.790991560297,25060,9037.12519559,-2168.74764284,2455.18951417,28994.2497224,-9225.05429859,-106.86000325,-0.23998202923
True,608,8679.89287175,0,128760.043852,0,40567932.8443,0,49333.5117674,-118139.602783,-53839795.8331,...,0,-0.917517571823,345,36747.4716483,-11980.012919,13002.171937,95991.5681558,-38924.5672152,-2195.87737448,-0.326009175097


In [63]:
print(len(shark_coins_df.groupby(['coin_id','wallet_address']).size()))

len(shark_coins_df.groupby(['coin_id','wallet_address']).size().drop_duplicates())

95568


1

In [74]:
duplicates = shark_coins_df.duplicated(subset=['coin_id', 'wallet_address'], keep=False)
set(duplicates)

{False}

In [48]:
def assess_coin_shark_metrics_df(shark_coins_df):
    """
    creates a series of coin-keyed metrics based on shark behavior
    """
    # Step 1: Coin-Level Metrics - Counting the number of sharks per coin
    coin_shark_count = shark_coins_df.groupby('coin_id')['is_shark'].sum().reset_index()
    coin_shark_count.columns = ['coin_id', 'num_sharks']

    # Step 2: Total inflows by sharks for each coin
    coin_shark_inflows = shark_coins_df[shark_coins_df['is_shark']].groupby('coin_id')['usd_inflows_cumulative'].sum().reset_index()
    coin_shark_inflows.columns = ['coin_id', 'total_shark_inflows']

    # Step 3: Merge the coin-level shark metrics
    coin_shark_metrics_df = pd.merge(coin_shark_count, coin_shark_inflows, on='coin_id', how='left')

    return coin_shark_metrics_df

Unnamed: 0,coin_id,num_sharks,total_shark_inflows
0,0b9d343d-4e25-4d22-b49c-fa17509a0333,428,40882358.1574
1,0db96a94-082b-4e13-a315-860850e9ff4f,186,27178344.7173
2,0e1c102e-2e7d-4aed-af2d-1526c2e0720a,15,16064740.7561
3,0eedc336-a78e-4b25-957e-57117227ef78,576,73903604.6467
4,0f96fb26-1ee9-4232-ae0e-c768f38070b3,50,3704418.38065
...,...,...,...
101,eeccf0b6-aaaa-464c-a23e-f2fc9e73a350,89,8368216.99833
102,f0420cea-5dc1-42ac-b1bc-f6e48b7804f1,236,83918538.5148
103,f64ac466-300d-43d4-8c36-ef26a7a48977,0,
104,f68b64ae-61d5-4dd6-b448-4ae9c754bd07,260,56928923.2692
