In [1]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema


# import local files if necessary
sys.path.append('..//src')
import training_data as td
importlib.reload(td)

# load dotenv
load_dotenv()

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')


def load_config(file_path='config.yaml'):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)
config = load_config()


#### Load the datasets

In [2]:
importlib.reload(td)
config = load_config()


# retrieve prices data
prices_df = td.retrieve_prices_data()

# fill gaps in prices data
prices_df,prices_outcomes_df = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])
logger.info(f"Prices data shape: {prices_df.shape}")

# Retrieve or load transfers data
# if 'transfers_df' not in locals():
transfers_df = td.retrieve_transfers_data(
    config['modeling']['training_period_start'],
    config['modeling']['modeling_period_start'],
    config['modeling']['modeling_period_end']
    )
logger.info(f"Transfers data shape: {transfers_df.shape}")


# Calculate and clean profits data
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning']['profitability_filter'])
logger.info(f"Profits data shape: {profits_df.shape}")

[09/Sep/2024 15:12:18] INFO [dreams_core.core.retrieve_prices_data:42] retrieved prices data with shape (120763, 3)
[09/Sep/2024 15:12:18] INFO [dreams_core.core.fill_prices_gaps:126] 382 coins had no gaps, 19 coins had gaps filled, and 38 coins were dropped due to large gaps.
[09/Sep/2024 15:12:18] INFO [dreams_core.core.<module>:10] Prices data shape: (110929, 3)
[09/Sep/2024 15:13:09] INFO [dreams_core.core.retrieve_transfers_data:414] retrieved transfers_df with shape (18608530, 5) after 51.2 seconds.
[09/Sep/2024 15:13:09] INFO [dreams_core.core.<module>:19] Transfers data shape: (18608530, 5)
[09/Sep/2024 15:13:52] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 16.27 seconds
[09/Sep/2024 15:13:59] INFO [dreams_core.core.clean_profits_df:674] Finished cleaning profits_df after 6.88 seconds. Removed 489 coin-wallet pairs that breached profit or loss threshold of $10.0M
[09/Sep/2024 15:13:59] INFO [dreams_core.core.<module>:26] Profits data sha

### Sharkwork

In [3]:
importlib.reload(td)
config = load_config()

shark_coins_df = td.classify_shark_coins(profits_df, config['modeling'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['modeling'])

[09/Sep/2024 15:14:04] INFO [dreams_core.core.classify_shark_coins:737] creation of shark_coins_df complete.


In [10]:
# create transfers_df for only the modeling period
modeling_period_transfers_df = transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start'])
    & (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

# create profits_df for the modeling period only
modeling_period_profits_df = td.prepare_profits_data(modeling_period_transfers_df, prices_df)
modeling_period_profits_df = td.calculate_wallet_profitability(modeling_period_profits_df)

# retrieve profit state at the end of the period for each coin-wallet pair
modeling_end_profits_df = modeling_period_profits_df[modeling_period_profits_df['date']==config['modeling']['modeling_period_end']]

[09/Sep/2024 15:18:46] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 3.82 seconds


In [12]:
# calculate total inflows and total profits
# aggregate wallet-level metrics buy summing all coins' usd inflows and profits for each wallet address
modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()

shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df, 
    on='wallet_address', 
    how='left'
)
shark_performance_df = shark_performance_df.groupby('is_shark').sum()
shark_performance_df['return_aggregate'] = shark_performance_df['profits_cumulative'] / shark_performance_df['usd_inflows_cumulative']

shark_performance_df

Unnamed: 0_level_0,wallet_address,usd_inflows_cumulative,profits_cumulative,return_aggregate
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,0x000000000000c35e4364deffa9059dbadaefd4f80x00...,4860902687.12,1663961042.48,0.342315234348
True,0x000000d40b595b94918a28b27d1e2c66f43a51d30x00...,761426404.519,841401661.721,1.10503346972


In [34]:
# calculate total inflows and total profits
# modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_profits_df, 
    on='wallet_address', 
    how='left'
)
shark_performance_df = shark_performance_df.groupby('is_shark').sum()
shark_performance_df['return_aggregate'] = shark_performance_df['profits_cumulative'] / shark_performance_df['usd_inflows_cumulative']

Unnamed: 0_level_0,wallet_address,usd_inflows_cumulative,profits_cumulative
is_shark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0x000000000000c35e4364deffa9059dbadaefd4f80x00...,19352465806.3,12626288983.6
True,0x000000d40b595b94918a28b27d1e2c66f43a51d30x00...,2524867560.5,2968641511.65


In [35]:
# calculate total inflows and total profits
# modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_profits_df, 
    on='wallet_address', 
    how='left'
)
shark_performance_df.groupby('is_shark').sum()

In [27]:
w = '0x0000000000000000000000000000000000000002'
profits_df[profits_df['wallet_address'==w]]

KeyError: False

In [20]:
print(len(shark_wallets_df['wallet_address']))
len(shark_wallets_df['wallet_address'].drop_duplicates())

111888


111888

In [17]:

modeling_period_profits_df.head()

Unnamed: 0,coin_id,wallet_address,date,net_transfers,balance,price,profits_change,profits_cumulative,usd_balance,usd_net_transfers,usd_inflows,usd_inflows_cumulative,total_return
2,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000000009e50a7ddb7a7b0e2ee6604fd120e49,2024-03-31,0,0.002920774,27.5991647724,0.0587838849452,0.0587841752714,0.0806109228888,0,0,0.0218267476174,2.69321734515
4,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x000000004685666c7653cc148f566f0511901b37,2024-03-31,0,2.38776413,27.5991647724,60.6366406422,60.6366406422,65.9002956614,0,0,5.26365501921,11.5198736279
6,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000000a991c429ee2ec6df19d40fe0c80088b8,2024-03-31,0,216.04104,27.5991647724,5486.30526016,5486.30526016,5962.55226055,0,0,476.24700039,11.5198736279
10,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000023c10000eecb940000b914cdfd76cc83d1,2024-03-31,0,37.55949184,27.5991647724,91.1447501104,267.338970969,1036.61060406,0,0,769.271633089,0.347522200832
12,04f6120a-f0dd-4260-bb2b-b8f827fdba61,0x00000047bb99ea4d791bb749d970de71ee0b1a34,2024-03-31,0,12.498298146,27.5991647724,323.420276906,323.420276906,344.942589906,0,0,21.5223129995,15.0272081311


In [None]:
def filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df

c=

filter_df(modeling_period_profits_df,c,w)

In [13]:
transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start'])
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

TypeError: 'Series' object is not callable

In [None]:
# Calculate and clean profits data for the modeling period only
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)

modeling_period_profits_df 

In [58]:
importlib.reload(td)
config = load_config()

def calculate_modeling_period_profitability(profits_df, training_period_end, modeling_period_end):
    """
    Calculate profitability during the modeling period by finding the difference in total_profits between
    the end of the training period and the end of the modeling period.
    
    Parameters:
        profits_df (DataFrame): DataFrame containing wallet profitability data.
        training_period_end (str): End date of the training period.
        modeling_period_end (str): End date of the modeling period.
        
    Returns:
        modeling_period_profits_df (DataFrame): DataFrame with wallet_address, coin_id, and profitability change.
    """
    # Step 1: Filter for the end of the training period and the modeling period
    training_profits_df = profits_df[profits_df['date'] == training_period_end][['wallet_address', 'coin_id', 'profits_cumulative']]
    modeling_profits_df = profits_df[profits_df['date'] == modeling_period_end][['wallet_address', 'coin_id', 'profits_cumulative']]

    # Step 2: Merge the two DataFrames on wallet_address and coin_id
    merged_profits_df = pd.merge(
        training_profits_df,
        modeling_profits_df,
        on=['wallet_address', 'coin_id'],
        suffixes=('_training', '_modeling')
    )

    # Step 3: Calculate the difference in total profits between the two periods
    merged_profits_df['profit_during_modeling'] = merged_profits_df['profits_cumulative_modeling'] - merged_profits_df['profits_cumulative_training']

    return merged_profits_df[['wallet_address', 'coin_id', 'profit_during_modeling']]


training_period_end = config['modeling']['training_period_end']
modeling_period_end = config['modeling']['modeling_period_end']
modeling_period_profits_df = calculate_modeling_period_profitability(profits_df, training_period_end, modeling_period_end)
modeling_period_profits_df

Unnamed: 0,wallet_address,coin_id,profit_during_modeling
0,0x000000000005af2ddc1a93a03e9b7014064d3b8d,0b9d343d-4e25-4d22-b49c-fa17509a0333,-5.17081809181e-08
1,0x000000000035b5e5ad9019092c665357240f594e,0b9d343d-4e25-4d22-b49c-fa17509a0333,-6.72206351936e-07
2,0x00000000003b3cc22af3ae1eac0440bcee416b40,0b9d343d-4e25-4d22-b49c-fa17509a0333,-0.00262284576889
3,0x00000000009726632680fb29d3f7a9734e3010e2,0b9d343d-4e25-4d22-b49c-fa17509a0333,-234.011423626
4,0x00000000009e50a7ddb7a7b0e2ee6604fd120e49,0b9d343d-4e25-4d22-b49c-fa17509a0333,-0.233809693934
...,...,...,...
1888161,0xffe91fda27c3d39663d3adc16d3ac4bce17a1f0a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,534.286148136
1888162,0xffed43322e064fce09bef0e949701da17f067569,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,0
1888163,0xfff07d6cb3d1e67563f3bfa335c94db34f59c0a4,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,82.2364102505
1888164,0xfff2246f89868eb0e06e5a28a84ff53d2652266a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,75.0893117248


In [6]:
def assess_megashark_modeling_period_performance(modeling_period_profits_df, shark_wallets_df):
    """
    Assess the performance of megasharks during the modeling period by comparing their profitability changes
    to non-megasharks.
    
    Parameters:
        modeling_period_profits_df (DataFrame): DataFrame containing the change in profitability during the modeling period.
        shark_wallets_df (DataFrame): DataFrame with wallets classified as megasharks.
        
    Returns:
        megasharks_performance_df (DataFrame): DataFrame comparing megashark performance vs. non-megasharks.
    """
    # Step 1: Filter for megasharks and non-megasharks
    megasharks_df = shark_wallets_df[shark_wallets_df['is_megashark']]
    non_megasharks_df = shark_wallets_df[~shark_wallets_df['is_megashark']]

    # Step 2: Merge megasharks with modeling period profits data
    megasharks_performance_df = pd.merge(
        megasharks_df[['wallet_address']],
        modeling_period_profits_df,
        on='wallet_address',
        how='inner'
    )

    # Step 3: Merge non-megasharks with modeling period profits data
    non_megasharks_performance_df = pd.merge(
        non_megasharks_df[['wallet_address']],
        modeling_period_profits_df,
        on='wallet_address',
        how='inner'
    )

    # Step 4: Calculate average performance for megasharks and non-megasharks
    megasharks_avg_profit_change = megasharks_performance_df['profit_during_modeling'].mean()
    non_megasharks_avg_profit_change = non_megasharks_performance_df['profit_during_modeling'].mean()

    # Step 5: Create a comparison DataFrame
    performance_comparison_df = pd.DataFrame({
        'group': ['megasharks', 'non-megasharks'],
        'avg_profit_change': [megasharks_avg_profit_change, non_megasharks_avg_profit_change]
    })

    return performance_comparison_df

performance_comparison_df = assess_megashark_modeling_period_performance(modeling_period_profits_df, shark_wallets_df)
performance_comparison_df

NameError: name 'modeling_period_profits_df' is not defined

In [None]:
def calculate_modeling_period_rate_of_return(profits_df, training_period_end, modeling_period_end):
    """
    Calculate the rate of return (ROR) during the modeling period for each wallet-coin pair.

    Parameters:
        profits_df (DataFrame): DataFrame containing wallet balances, inflows, and profits data.
        training_period_end (str): End date of the training period.
        modeling_period_end (str): End date of the modeling period.

    Returns:
        ror_df (DataFrame): DataFrame with wallet_address, coin_id, and rate of return (ROR).
    """
    # Step 1: Filter for the end of the training period and the end of the modeling period
    training_profits_df = profits_df[profits_df['date'] == training_period_end][['wallet_address', 'coin_id', 'balance', 'usd_inflows_cumulative']]
    modeling_profits_df = profits_df[profits_df['date'] == modeling_period_end][['wallet_address', 'coin_id', 'balance', 'price']]

    # Step 2: Merge the two DataFrames on wallet_address and coin_id
    merged_profits_df = pd.merge(
        training_profits_df,
        modeling_profits_df,
        on=['wallet_address', 'coin_id'],
        suffixes=('_training', '_modeling')
    )

    # Step 3: Calculate the total inflows, outflows, and price changes during the modeling period
    merged_profits_df['net_inflows'] = merged_profits_df['usd_inflows_cumulative']  # Assumed to be cumulative inflows
    merged_profits_df['final_value'] = merged_profits_df['balance_modeling'] * merged_profits_df['price']

    # Step 4: Calculate net investment (initial balance + inflows)
    merged_profits_df['net_investment'] = merged_profits_df['balance_training'] + merged_profits_df['net_inflows']

    # Step 5: Calculate the rate of return (ROR)
    merged_profits_df['rate_of_return'] = ((merged_profits_df['final_value'] - merged_profits_df['net_investment']) / merged_profits_df['net_investment']) * 100

    return merged_profits_df[['wallet_address', 'coin_id', 'rate_of_return']]


Unnamed: 0,wallet_address,coin_id,rate_of_return
0,0x000000000005af2ddc1a93a03e9b7014064d3b8d,0b9d343d-4e25-4d22-b49c-fa17509a0333,173.483384026
1,0x000000000035b5e5ad9019092c665357240f594e,0b9d343d-4e25-4d22-b49c-fa17509a0333,173.483384026
2,0x00000000003b3cc22af3ae1eac0440bcee416b40,0b9d343d-4e25-4d22-b49c-fa17509a0333,-71.3555514489
3,0x00000000009726632680fb29d3f7a9734e3010e2,0b9d343d-4e25-4d22-b49c-fa17509a0333,-100
4,0x00000000009e50a7ddb7a7b0e2ee6604fd120e49,0b9d343d-4e25-4d22-b49c-fa17509a0333,-21.8428128321
...,...,...,...
1888161,0xffe91fda27c3d39663d3adc16d3ac4bce17a1f0a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,-98.9627594542
1888162,0xffed43322e064fce09bef0e949701da17f067569,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,
1888163,0xfff07d6cb3d1e67563f3bfa335c94db34f59c0a4,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,-98.9632455914
1888164,0xfff2246f89868eb0e06e5a28a84ff53d2652266a,f7b278de-7fa7-4f87-ba5d-3eb2e57d933a,-98.9651619607
