In [None]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema


# import local files if necessary
sys.path.append('..//src')
import training_data as td
importlib.reload(td)

# load dotenv
load_dotenv()

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')


def load_config(file_path='config.yaml'):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)
config = load_config()

def cw_filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df


#### Load the datasets

In [None]:
importlib.reload(td)
config = load_config()


# retrieve prices data
prices_df = td.retrieve_prices_data()

# fill gaps in prices data
prices_df,_ = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])
logger.info(f"Prices data shape: {prices_df.shape}")

# retrieve transfers data
transfers_df = td.retrieve_transfers_data(
    config['modeling']['training_period_start'],
    config['modeling']['modeling_period_start'],
    config['modeling']['modeling_period_end']
    )
logger.info(f"Transfers data shape: {transfers_df.shape}")

# compile profits_df
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning'])
logger.info(f"Profits data shape: {profits_df.shape}")


# identify sharks
shark_coins_df = td.classify_shark_coins(profits_df, config['modeling'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['modeling'])


# assess shark performance
shark_agg_performance_df,shark_wallets_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
shark_agg_performance_df

In [None]:
shark_wallets_performance_df.sort_values('usd_inflows_cumulative',ascending=False).head(10)

In [None]:
w = '0x28c6c06298d514db089934071355e5743bf21d60'
# transfers_df[transfers_df['wallet_address']==w]
profits_df[profits_df['wallet_address']==w].max()
# # shark_coins_df[shark_coins_df['wallet_address']==w]
# shark_wallets_df[shark_wallets_df['wallet_address']==w]

In [None]:
# Filter transfers for the modeling period
modeling_period_transfers_df = transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start']) &
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

# Create profits_df for the modeling period
modeling_period_profits_df = td.prepare_profits_data(modeling_period_transfers_df, prices_df)
modeling_period_profits_df = td.calculate_wallet_profitability(modeling_period_profits_df)

# Retrieve profit state at the end of the period for each coin-wallet pair
modeling_end_profits_df = modeling_period_profits_df[
    modeling_period_profits_df['date'] == config['modeling']['modeling_period_end']
]

# Aggregate wallet-level metrics by summing usd inflows and profits
modeling_end_wallet_profits_df = modeling_end_profits_df.groupby('wallet_address')[
    ['usd_inflows_cumulative', 'profits_cumulative']
].sum()

# Classify wallets by shark status and compare their performance
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_wallet_profits_df,
    on='wallet_address',
    how='left'
)


In [None]:
modeling_end_wallet_profits_df.reset_index()

In [None]:
w = 'BmPLYFnk2wSSQZMd3TZoajeu62fF5fFrAkioi8vxZbHo'

# transfers_df[transfers_df['wallet_address']==w]
# profits_df[profits_df['wallet_address']==w]
# shark_coins_df[shark_coins_df['wallet_address']==w]
# shark_wallets_df[shark_wallets_df['wallet_address']==w]
shark_wallets_df[shark_wallets_df['wallet_address']==w]

In [None]:

df = modeling_end_wallet_profits_df.reset_index()
df[df['wallet_address']==w]

In [None]:
shark_performance_df[shark_performance_df['wallet_address']==w]

In [None]:
# Classify wallets by shark status and compare their performance
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)

# Replace NaNs with 0s for wallets that had no inflows and profits in the modeling period
shark_performance_df['usd_inflows_cumulative'] = shark_performance_df['usd_inflows_cumulative'].fillna(0)
shark_performance_df['profits_cumulative'] = shark_performance_df['profits_cumulative'].fillna(0)


# Remove wallet_address for aggregation
shark_performance_df = shark_performance_df.groupby('is_shark').agg(
    count_wallets=('wallet_address', 'size'),
    median_inflows=('usd_inflows_cumulative', 'median'),
    median_profits=('profits_cumulative', 'median'),
    mean_inflows=('usd_inflows_cumulative', 'mean'),
    min_inflows=('usd_inflows_cumulative', 'min'),
    max_inflows=('usd_inflows_cumulative', 'max'),
    percentile_25_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x.dropna(), 25) if len(x) > 1 else np.nan),
    percentile_75_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x.dropna(), 75) if len(x) > 1 else np.nan),
    mean_profits=('profits_cumulative', 'mean'),
    min_profits=('profits_cumulative', 'min'),
    max_profits=('profits_cumulative', 'max'),
    percentile_25_profits=('profits_cumulative', lambda x: np.percentile(x.dropna(), 25) if len(x) > 1 else np.nan),
    percentile_75_profits=('profits_cumulative', lambda x: np.percentile(x.dropna(), 75) if len(x) > 1 else np.nan),
    total_inflows=('usd_inflows_cumulative', 'sum'),
    total_profits=('profits_cumulative', 'sum')
)

# Calculate median return
shark_performance_df['median_return'] = np.divide(
    shark_performance_df['median_profits'],
    shark_performance_df['median_inflows'],
    out=np.zeros_like(shark_performance_df['median_profits']),
    where=shark_performance_df['median_inflows'] != 0
)

# Calculate aggregate return
shark_performance_df['return_aggregate'] = np.divide(
    shark_performance_df['total_profits'],
    shark_performance_df['total_inflows'],
    out=np.zeros_like(shark_performance_df['total_profits']),
    where=shark_performance_df['total_inflows'] != 0
)

shark_performance_df

### Sharkwork

In [None]:
importlib.reload(td)
config = load_config()

# create shark dfs
shark_coins_df = td.classify_shark_coins(profits_df, config['modeling'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['modeling'])

# assess shark performance
shark_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
shark_performance_df

In [None]:
# Filter transfers for the modeling period
modeling_period_transfers_df = transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start']) &
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

# Create profits_df for the modeling period
modeling_period_profits_df = td.prepare_profits_data(modeling_period_transfers_df, prices_df)
modeling_period_profits_df = td.calculate_wallet_profitability(modeling_period_profits_df)

# Retrieve wallet-level profit state at the end of the period
modeling_end_profits_df = modeling_period_profits_df[
    modeling_period_profits_df['date'] == config['modeling']['modeling_period_end']
]
modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[
    ['usd_inflows_cumulative', 'profits_cumulative']
].sum()

# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)

modeling_end_metrics_df

In [None]:
w = '0x00000000000a78c8727b6ae386f004e7e37a4875'

# modeling_period_transfers_df[modeling_period_transfers_df['wallet_address']==w]
modeling_period_profits_df[modeling_period_profits_df['wallet_address']==w]

In [None]:
print(modeling_end_metrics_df.shape)
modeling_end_metrics_df.isna().sum()

In [None]:
# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)
shark_performance_df



In [None]:
w = '0x00000000000a78c8727b6ae386f004e7e37a4875'

transfers_df[transfers_df['wallet_address']==w]

In [None]:
shark_performance_df.isna().sum()

In [None]:
# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)

shark_performance_df

In [None]:
print(modeling_end_metrics_df.shape)
modeling_end_metrics_df.isna().sum()

In [None]:
# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)

shark_performance_df.shape

In [None]:
shark_performance_df.head()

In [None]:

# Classify wallets by shark status and merge with metrics
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_metrics_df,
    on='wallet_address',
    how='left'
)

# # Remove wallet_address for aggregation
# shark_performance_df.groupby('is_shark').agg(
#     count_wallets=('wallet_address', 'size'),
#     median_inflows=('usd_inflows_cumulative', 'median'),
#     median_profits=('profits_cumulative', 'median'),
#     mean_inflows=('usd_inflows_cumulative', 'mean'),
#     min_inflows=('usd_inflows_cumulative', 'min'),
#     max_inflows=('usd_inflows_cumulative', 'max'),
#     percentile_25_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x, 25)),
#     percentile_75_inflows=('usd_inflows_cumulative', lambda x: np.percentile(x, 75)),
#     mean_profits=('profits_cumulative', 'mean'),
#     min_profits=('profits_cumulative', 'min'),
#     max_profits=('profits_cumulative', 'max'),
#     percentile_25_profits=('profits_cumulative', lambda x: np.percentile(x, 25)),
#     percentile_75_profits=('profits_cumulative', lambda x: np.percentile(x, 75)),
#     total_inflows=('usd_inflows_cumulative', 'sum'),
#     total_profits=('profits_cumulative', 'sum')
# )

# # # Calculate aggregate return
# # shark_performance_df['return_aggregate'] = np.divide(
# #     shark_performance_df['total_profits'],
# #     shark_performance_df['total_inflows'],
# #     out=np.zeros_like(shark_performance_df['total_profits']),
# #     where=shark_performance_df['total_inflows'] != 0
# # )

shark_performance_df.head()

In [None]:
# calculate total inflows and total profits
# modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_profits_df,
    on='wallet_address',
    how='left'
)
shark_performance_df = shark_performance_df.groupby('is_shark').sum()
shark_performance_df['return_aggregate'] = shark_performance_df['profits_cumulative'] / shark_performance_df['usd_inflows_cumulative']

In [None]:
# calculate total inflows and total profits
# modeling_end_metrics_df = modeling_end_profits_df.groupby('wallet_address')[['usd_inflows_cumulative','profits_cumulative']].sum()
shark_performance_df = shark_wallets_df[['wallet_address', 'is_shark']].merge(
    modeling_end_profits_df,
    on='wallet_address',
    how='left'
)
shark_performance_df.groupby('is_shark').sum()

In [None]:
w = '0x0000000000000000000000000000000000000002'
profits_df[profits_df['wallet_address'==w]]

In [None]:
print(len(shark_wallets_df['wallet_address']))
len(shark_wallets_df['wallet_address'].drop_duplicates())

In [None]:

modeling_period_profits_df.head()

In [None]:
def filter_df(df, coin_id, wallet_address):
    filtered_df = df[
        (df['coin_id'] == coin_id) &
        (df['wallet_address'] == wallet_address)
    ]
    return filtered_df

c=

filter_df(modeling_period_profits_df,c,w)

In [None]:
transfers_df[
    (transfers_df['date'] >= config['modeling']['modeling_period_start'])
    (transfers_df['date'] <= config['modeling']['modeling_period_end'])
]

In [None]:
# Calculate and clean profits data for the modeling period only
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)

modeling_period_profits_df

In [None]:
importlib.reload(td)
config = load_config()

def calculate_modeling_period_profitability(profits_df, training_period_end, modeling_period_end):
    """
    Calculate profitability during the modeling period by finding the difference in total_profits between
    the end of the training period and the end of the modeling period.

    Parameters:
        profits_df (DataFrame): DataFrame containing wallet profitability data.
        training_period_end (str): End date of the training period.
        modeling_period_end (str): End date of the modeling period.

    Returns:
        modeling_period_profits_df (DataFrame): DataFrame with wallet_address, coin_id, and profitability change.
    """
    # Step 1: Filter for the end of the training period and the modeling period
    training_profits_df = profits_df[profits_df['date'] == training_period_end][['wallet_address', 'coin_id', 'profits_cumulative']]
    modeling_profits_df = profits_df[profits_df['date'] == modeling_period_end][['wallet_address', 'coin_id', 'profits_cumulative']]

    # Step 2: Merge the two DataFrames on wallet_address and coin_id
    merged_profits_df = pd.merge(
        training_profits_df,
        modeling_profits_df,
        on=['wallet_address', 'coin_id'],
        suffixes=('_training', '_modeling')
    )

    # Step 3: Calculate the difference in total profits between the two periods
    merged_profits_df['profit_during_modeling'] = merged_profits_df['profits_cumulative_modeling'] - merged_profits_df['profits_cumulative_training']

    return merged_profits_df[['wallet_address', 'coin_id', 'profit_during_modeling']]


training_period_end = config['modeling']['training_period_end']
modeling_period_end = config['modeling']['modeling_period_end']
modeling_period_profits_df = calculate_modeling_period_profitability(profits_df, training_period_end, modeling_period_end)
modeling_period_profits_df

In [None]:
def assess_megashark_modeling_period_performance(modeling_period_profits_df, shark_wallets_df):
    """
    Assess the performance of megasharks during the modeling period by comparing their profitability changes
    to non-megasharks.

    Parameters:
        modeling_period_profits_df (DataFrame): DataFrame containing the change in profitability during the modeling period.
        shark_wallets_df (DataFrame): DataFrame with wallets classified as megasharks.

    Returns:
        megasharks_performance_df (DataFrame): DataFrame comparing megashark performance vs. non-megasharks.
    """
    # Step 1: Filter for megasharks and non-megasharks
    megasharks_df = shark_wallets_df[shark_wallets_df['is_megashark']]
    non_megasharks_df = shark_wallets_df[~shark_wallets_df['is_megashark']]

    # Step 2: Merge megasharks with modeling period profits data
    megasharks_performance_df = pd.merge(
        megasharks_df[['wallet_address']],
        modeling_period_profits_df,
        on='wallet_address',
        how='inner'
    )

    # Step 3: Merge non-megasharks with modeling period profits data
    non_megasharks_performance_df = pd.merge(
        non_megasharks_df[['wallet_address']],
        modeling_period_profits_df,
        on='wallet_address',
        how='inner'
    )

    # Step 4: Calculate average performance for megasharks and non-megasharks
    megasharks_avg_profit_change = megasharks_performance_df['profit_during_modeling'].mean()
    non_megasharks_avg_profit_change = non_megasharks_performance_df['profit_during_modeling'].mean()

    # Step 5: Create a comparison DataFrame
    performance_comparison_df = pd.DataFrame({
        'group': ['megasharks', 'non-megasharks'],
        'avg_profit_change': [megasharks_avg_profit_change, non_megasharks_avg_profit_change]
    })

    return performance_comparison_df

performance_comparison_df = assess_megashark_modeling_period_performance(modeling_period_profits_df, shark_wallets_df)
performance_comparison_df

In [None]:
def calculate_modeling_period_rate_of_return(profits_df, training_period_end, modeling_period_end):
    """
    Calculate the rate of return (ROR) during the modeling period for each wallet-coin pair.

    Parameters:
        profits_df (DataFrame): DataFrame containing wallet balances, inflows, and profits data.
        training_period_end (str): End date of the training period.
        modeling_period_end (str): End date of the modeling period.

    Returns:
        ror_df (DataFrame): DataFrame with wallet_address, coin_id, and rate of return (ROR).
    """
    # Step 1: Filter for the end of the training period and the end of the modeling period
    training_profits_df = profits_df[profits_df['date'] == training_period_end][['wallet_address', 'coin_id', 'balance', 'usd_inflows_cumulative']]
    modeling_profits_df = profits_df[profits_df['date'] == modeling_period_end][['wallet_address', 'coin_id', 'balance', 'price']]

    # Step 2: Merge the two DataFrames on wallet_address and coin_id
    merged_profits_df = pd.merge(
        training_profits_df,
        modeling_profits_df,
        on=['wallet_address', 'coin_id'],
        suffixes=('_training', '_modeling')
    )

    # Step 3: Calculate the total inflows, outflows, and price changes during the modeling period
    merged_profits_df['net_inflows'] = merged_profits_df['usd_inflows_cumulative']  # Assumed to be cumulative inflows
    merged_profits_df['final_value'] = merged_profits_df['balance_modeling'] * merged_profits_df['price']

    # Step 4: Calculate net investment (initial balance + inflows)
    merged_profits_df['net_investment'] = merged_profits_df['balance_training'] + merged_profits_df['net_inflows']

    # Step 5: Calculate the rate of return (ROR)
    merged_profits_df['rate_of_return'] = ((merged_profits_df['final_value'] - merged_profits_df['net_investment']) / merged_profits_df['net_investment']) * 100

    return merged_profits_df[['wallet_address', 'coin_id', 'rate_of_return']]
