In [1]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema


# import local files if necessary
sys.path.append('..//src')
import training_data as td
importlib.reload(td)

# load dotenv
load_dotenv()

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')


def load_config(file_path='config.yaml'):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)
config = load_config()

def cw_filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df


#### Load the datasets

In [2]:
importlib.reload(td)
config = load_config()


# retrieve prices data
prices_df = td.retrieve_prices_data()

# fill gaps in prices data
prices_df,_ = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])
logger.info(f"Prices data shape: {prices_df.shape}")

# retrieve transfers data
transfers_df = td.retrieve_transfers_data(
    config['modeling']['training_period_start'],
    config['modeling']['modeling_period_start'],
    config['modeling']['modeling_period_end']
    )
logger.info(f"Transfers data shape: {transfers_df.shape}")

# compile profits_df
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning'])
logger.info(f"Profits data shape: {profits_df.shape}")


# identify sharks
shark_coins_df = td.classify_shark_coins(profits_df, config['modeling'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['modeling'])


# assess shark performance
shark_agg_performance_df,shark_wallets_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
shark_agg_performance_df

[10/Sep/2024 09:15:02] INFO [dreams_core.core.retrieve_prices_data:42] retrieved prices data with shape (120763, 3)
[10/Sep/2024 09:15:02] INFO [dreams_core.core.fill_prices_gaps:126] 382 coins had no gaps, 19 coins had gaps filled, and 38 coins were dropped due to large gaps.
[10/Sep/2024 09:15:02] INFO [dreams_core.core.<module>:10] Prices data shape: (110929, 3)
[10/Sep/2024 09:16:02] INFO [dreams_core.core.retrieve_transfers_data:414] retrieved transfers_df with shape (32489209, 5) after 59.7 seconds.
[10/Sep/2024 09:16:02] INFO [dreams_core.core.<module>:18] Transfers data shape: (32489209, 5)
[10/Sep/2024 09:16:02] INFO [dreams_core.core.prepare_profits_data:455] Preparing profits_df data...
[10/Sep/2024 09:17:52] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 35.33 seconds


In [48]:
def assess_coin_shark_metrics_df(shark_coins_df):
    """
    creates a series of coin-keyed metrics based on shark behavior
    """
    # Step 1: Coin-Level Metrics - Counting the number of sharks per coin
    coin_shark_count = shark_coins_df.groupby('coin_id')['is_shark'].sum().reset_index()
    coin_shark_count.columns = ['coin_id', 'num_sharks']

    # Step 2: Total inflows by sharks for each coin
    coin_shark_inflows = shark_coins_df[shark_coins_df['is_shark']].groupby('coin_id')['usd_inflows_cumulative'].sum().reset_index()
    coin_shark_inflows.columns = ['coin_id', 'total_shark_inflows']

    # Step 3: Merge the coin-level shark metrics
    coin_shark_metrics_df = pd.merge(coin_shark_count, coin_shark_inflows, on='coin_id', how='left')

    return coin_shark_metrics_df

Unnamed: 0,coin_id,num_sharks,total_shark_inflows
0,0b9d343d-4e25-4d22-b49c-fa17509a0333,428,40882358.1574
1,0db96a94-082b-4e13-a315-860850e9ff4f,186,27178344.7173
2,0e1c102e-2e7d-4aed-af2d-1526c2e0720a,15,16064740.7561
3,0eedc336-a78e-4b25-957e-57117227ef78,576,73903604.6467
4,0f96fb26-1ee9-4232-ae0e-c768f38070b3,50,3704418.38065
...,...,...,...
101,eeccf0b6-aaaa-464c-a23e-f2fc9e73a350,89,8368216.99833
102,f0420cea-5dc1-42ac-b1bc-f6e48b7804f1,236,83918538.5148
103,f64ac466-300d-43d4-8c36-ef26a7a48977,0,
104,f68b64ae-61d5-4dd6-b448-4ae9c754bd07,260,56928923.2692


In [46]:
sample_shark_coins_profits_df

Unnamed: 0,coin_id,wallet_address,date,usd_inflows_cumulative,profits_cumulative,total_return
0,coin_1,wallet_1,2024-02-15,5000,3000,1.66666666667
1,coin_1,wallet_2,2024-02-20,15000,8000,1.875
2,coin_2,wallet_1,2024-02-18,8000,6000,1.33333333333
3,coin_2,wallet_3,2024-02-25,20000,9000,2.22222222222
4,coin_3,wallet_2,2024-02-22,5000,4000,1.25


In [45]:
shark_coins_df

Unnamed: 0,coin_id,wallet_address,usd_inflows_cumulative,profits_cumulative,is_profits_shark,total_return,is_returns_shark,is_shark
0,coin_1,wallet_2,15000,8000,True,1.875,True,True
1,coin_2,wallet_3,20000,9000,True,2.22222222222,True,True


In [42]:


def sample_shark_coins_profits_df():
    """
    Sample DataFrame for testing classify_shark_coins function
    """
    data = {
        'coin_id': ['coin_1', 'coin_1', 'coin_2', 'coin_2', 'coin_3'],
        'wallet_address': ['wallet_1', 'wallet_2', 'wallet_1', 'wallet_3', 'wallet_2'],
        'date': ['2024-02-15', '2024-02-20', '2024-02-18', '2024-02-25', '2024-02-22'],
        'usd_inflows_cumulative': [5000, 15000, 8000, 20000, 5000],
        'profits_cumulative': [3000, 8000, 6000, 9000, 4000],
    }
    df = pd.DataFrame(data)
    df['total_return'] = df['profits_cumulative'] / df['usd_inflows_cumulative']
    return df


def sample_shark_coins_modeling_config():
    """
    Sample configuration for testing classify_shark_coins function    
    """
    return {
        'modeling_period_start': '2024-03-01',
        'shark_minimum_inflows': 10000,
        'shark_total_profits_threshold': 5000,
        'shark_total_return_threshold': 0.5
    }
sample_shark_coins_profits_df = sample_shark_coins_profits_df()
sample_shark_coins_modeling_config = sample_shark_coins_modeling_config()


# def test_shark_coins_returns_classification(sample_shark_coins_profits_df, sample_shark_coins_modeling_config):
#     """
#     Test 3: Verify that wallets are classified as returns sharks correctly.
#     """
shark_coins_df = td.classify_shark_coins(sample_shark_coins_profits_df, sample_shark_coins_modeling_config)
is_returns_shark_w2 = shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_2']['is_returns_shark'].values[0]
is_returns_shark_w3 = shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_3']['is_returns_shark'].values[0]
assert is_returns_shark_w2, "Wallet_2 should be classified as a returns shark."
assert not is_returns_shark_w3, "Wallet_3 should not be classified as a returns shark."


# def test_shark_coins_combined_shark_classification(sample_shark_coins_profits_df, sample_shark_coins_modeling_config):
#     """
#     Test 4: Ensure wallets are classified as sharks if they meet either profits or returns criteria.
#     """
#     shark_coins_df = td.classify_shark_coins(sample_shark_coins_profits_df, sample_shark_coins_modeling_config)
#     is_shark = shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_2']['is_shark'].values[0]
#     assert is_shark, "Wallet_2 should be classified as a shark."


# def test_shark_coins_modeling_period_filtering(sample_shark_coins_profits_df, sample_shark_coins_modeling_config):
#     """
#     Test 5: Verify that aggregates in shark_coins_df exclude data from the modeling period.
#     """
#     # Run the classify_shark_coins function
#     shark_coins_df = td.classify_shark_coins(sample_shark_coins_profits_df, sample_shark_coins_modeling_config)

#     # Manually calculate expected values for wallet_2 and wallet_3 (both should exclude modeling period data)
#     assert shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_2']['profits_cumulative'].values[0] == 12000, "Profits for wallet_2 should be 12000"
#     assert shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_3']['profits_cumulative'].values[0] == 9000, "Profits for wallet_3 should be 9000"
#     assert shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_2']['total_return'].values[0] == 0.6, "Return for wallet_2 should be 0.6"
#     assert shark_coins_df[shark_coins_df['wallet_address'] == 'wallet_3']['total_return'].values[0] == 0.45, "Return for wallet_3 should be 0.45"




[10/Sep/2024 11:06:51] INFO [dreams_core.core.classify_shark_coins:752] creation of shark_coins_df complete.


AssertionError: Wallet_3 should not be classified as a returns shark.

In [29]:
sample_shark_coins_profits_df

Unnamed: 0,coin_id,wallet_address,date,usd_inflows_cumulative,profits_cumulative,total_return
0,coin_1,wallet_1,2024-02-15,5000,3000,0.3
1,coin_1,wallet_2,2024-02-20,15000,8000,0.5
2,coin_2,wallet_1,2024-02-18,8000,6000,0.6
3,coin_2,wallet_3,2024-02-25,20000,9000,0.9


In [28]:
shark_coins_df

Unnamed: 0,coin_id,wallet_address,usd_inflows_cumulative,profits_cumulative,is_profits_shark,total_return,is_returns_shark,is_shark
0,coin_1,wallet_2,15000,8000,True,0.5,True,True
1,coin_2,wallet_3,20000,9000,True,0.9,True,True
