In [20]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema

# load dotenv
load_dotenv()


# import local files if necessary
sys.path.append('..//src')
from utils import load_config, cw_filter_df
import training_data as td
importlib.reload(td)
import feature_engineering as fe
importlib.reload(fe)
import coin_wallet_metrics as cwm
importlib.reload(cwm)


# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

#### Load the datasets

In [21]:
importlib.reload(td)
config = load_config()


# retrieve prices data
prices_df = td.retrieve_prices_data()

# fill gaps in prices data
prices_df,_ = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])
logger.info(f"Prices data shape: {prices_df.shape}")

# retrieve transfers data
transfers_df = td.retrieve_transfers_data(
    config['training_data']['training_period_start'],
    config['training_data']['modeling_period_start'],
    config['training_data']['modeling_period_end']
    )

# compile profits_df
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning'])


# identify sharks
shark_coins_df = td.classify_shark_coins(profits_df, config['training_data'])
shark_wallets_df = td.classify_shark_wallets(shark_coins_df,config['training_data'])


# # assess shark performance
# shark_agg_performance_df,shark_wallets_performance_df = td.calculate_shark_performance(transfers_df, prices_df, shark_wallets_df, config)
# metrics = ['count_wallets', 'return_aggregate', 'nonzero_count_wallets', 'nonzero_median_return', 'midrange_count_wallets', 'midrange_median_return', 'midrange_return_aggregate']
# shark_agg_performance_df[shark_agg_performance_df['metric'].isin(metrics)]

[11/Sep/2024 21:10:59] INFO [dreams_core.core.retrieve_prices_data:42] retrieved prices data with shape (120763, 3)
[11/Sep/2024 21:10:59] INFO [dreams_core.core.fill_prices_gaps:126] 382 coins had no gaps, 19 coins had gaps filled, and 38 coins were dropped due to large gaps.
[11/Sep/2024 21:10:59] INFO [dreams_core.core.<module>:10] Prices data shape: (110929, 3)
[11/Sep/2024 21:11:26] INFO [dreams_core.core.retrieve_transfers_data:414] retrieved transfers_df with shape (23823401, 5) after 27.0 seconds.
[11/Sep/2024 21:11:27] INFO [dreams_core.core.prepare_profits_data:455] Preparing profits_df data...
[11/Sep/2024 21:12:19] INFO [dreams_core.core.calculate_wallet_profitability:635] Generated profits df after 19.30 seconds
[11/Sep/2024 21:12:42] INFO [dreams_core.core.clean_profits_df:706] Finished cleaning profits_df after 23.04 seconds.
[11/Sep/2024 21:12:50] INFO [dreams_core.core.classify_shark_coins:772] creation of shark_coins_df complete.


In [23]:
importlib.reload(td)
importlib.reload(cwm)
config = load_config()

# generate inputs for generate_buysell_metrics_df()
cohort_wallets = shark_wallets_df[shark_wallets_df['is_shark']==True]['wallet_address'].unique()
cohort_coins = shark_coins_df['coin_id'].unique()

buysell_metrics_df = cwm.generate_buysell_metrics_df(profits_df,config['training_data']['training_period_end'],cohort_wallets,cohort_coins)

[11/Sep/2024 21:15:57] INFO [dreams_core.core.generate_buysell_metrics_df:29] Preparing buysell_metrics_df...


999
139


[11/Sep/2024 21:16:01] INFO [dreams_core.core.generate_buysell_metrics_df:76] Generated buysell_metrics_df after 4.13 seconds.


In [102]:
config = load_config('config.yaml')
config_metrics = load_config('config_metrics.yaml')
importlib.reload(fe)

flattened_buysell_metrics_df = fe.flatten_coin_date_df(buysell_metrics_df,config_metrics)
flattened_buysell_metrics_df.T

KeyError: 'modeling'

In [94]:
load_config('../notebooks/config.yaml')

{'modeling': {'moon_threshold': 0.3,
  'dump_threshold': -0.2,
  'rolling_window_duration': 7,
  'rolling_lookback_periods': 8},
 'training_data': {'training_period_start': '2024-01-01',
  'training_period_end': '2024-04-30',
  'modeling_period_start': '2024-05-01',
  'modeling_period_end': '2024-5-07',
  'shark_coin_minimum_inflows': 5000,
  'shark_coin_profits_threshold': 20000,
  'shark_coin_return_threshold': 0.5,
  'shark_wallet_type': 'is_shark',
  'shark_wallet_min_coins': 4,
  'shark_wallet_min_shark_rate': 0.4},
 'data_cleaning': {'profitability_filter': 15000000,
  'inflows_filter': 10000000,
  'max_gap_days': 2}}

In [100]:
config = load_config('config.yaml')
config_metrics = load_config('config_metrics.yaml')

{'modeling': {'rolling_window_duration': 7, 'rolling_lookback_periods': 8},
 'metrics': {'buyers_new': {'aggregations': ['sum', 'mean', 'max'],
   'rolling': True},
  'total_bought': {'aggregations': ['sum', 'mean']},
  'total_holders': {'aggregations': ['mean', 'max']}}}