In [1]:
import sys
import os
import time
import logging
import datetime
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema

# load dotenv
load_dotenv()

# import local files if necessary
sys.path.append('..//src')
from utils import load_config, cw_filter_df
import training_data as td
importlib.reload(td)
import feature_engineering as fe
importlib.reload(fe)
import coin_wallet_metrics as cwm
importlib.reload(cwm)
import modeling as m
importlib.reload(m)
import insights as i
importlib.reload(i)

# load configs
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

In [2]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')


### Training Data (profits_df) Generation

In [3]:
logger.setLevel(logging.INFO)

# retrieve and clean prices data
prices_df = td.retrieve_prices_data()
prices_df,_ = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])

# retrieve transfers data
transfers_df = td.retrieve_transfers_data(
    config['training_data']['training_period_start'],
    config['training_data']['modeling_period_start'],
    config['training_data']['modeling_period_end']
    )

# compile profits_df
profits_df = td.prepare_profits_data(transfers_df, prices_df)
profits_df = td.calculate_wallet_profitability(profits_df)
profits_df,_ = td.clean_profits_df(profits_df, config['data_cleaning'])



[15/Sep/2024 01:03:41] INFO [dreams_core.core.retrieve_prices_data:42] retrieved prices data with shape (120763, 3)
[15/Sep/2024 01:03:41] INFO [dreams_core.core.fill_prices_gaps:126] 382 coins had no gaps, 19 coins had gaps filled, and 38 coins were dropped due to large gaps.
[15/Sep/2024 01:04:03] INFO [dreams_core.core.retrieve_transfers_data:333] retrieved transfers_df with shape (15703125, 5) after 21.8 seconds.
[15/Sep/2024 01:04:03] INFO [dreams_core.core.prepare_profits_data:374] Preparing profits_df data...
[15/Sep/2024 01:04:34] INFO [dreams_core.core.calculate_wallet_profitability:554] Generated profits df after 11.98 seconds
[15/Sep/2024 01:04:47] INFO [dreams_core.core.clean_profits_df:625] Finished cleaning profits_df after 12.68 seconds.


### Metrics and Feature Engineering

In [12]:
modeling_config[model]

'/outputs/flattened_outputs/'

In [14]:
print(modeling_config['modeling']['modeling_folder'])
print(os.path.join(modeling_config['modeling']['modeling_folder'],'/outputs/flattened_outputs/'))

..//modeling
/outputs/flattened_outputs/


In [24]:
modeling_config['modeling']['train_test_split']

'sharks cohort'

In [4]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')


# identify cohort
cohort_summary_df = td.classify_wallet_cohort(profits_df, config['wallet_cohorts']['sharks'])

# generate and flatten buysell_metrics
cohort_wallets = cohort_summary_df[cohort_summary_df['in_cohort']==True]['wallet_address']
buysell_metrics_df = cwm.generate_buysell_metrics_df(profits_df,config['training_data']['training_period_end'],cohort_wallets)

# flatten, save, and preprocess the flattened df
flattened_output_directory = os.path.join(modeling_config['modeling']['modeling_folder'],'/outputs/flattened_outputs/')
cohort_name = list(config['wallet_cohorts'].keys())[0]
metric_description = f"{cohort_name}_cohort"

flattened_buysell_metrics_df = fe.flatten_coin_date_df(buysell_metrics_df,metrics_config,config['training_data']['training_period_end'])
flattened_df, flattened_filepath = fe.save_flattened_outputs(flattened_buysell_metrics_df, flattened_output_directory, metric_description, config['training_data']['modeling_period_start'])
preprocessed_df, preprocessed_filepath = fe.preprocess_coin_df(flattened_filepath, modeling_config, metrics_config)

# create the training data df
input_directory = f"{preprocessed_filepath.split('preprocessed_outputs/')[0]}preprocessed_outputs/"
input_filenames = [
    preprocessed_filepath.split('preprocessed_outputs/')[1]
]
training_data_df = fe.create_training_data_df(input_directory, input_filenames)

# create the target variable df
target_variable_df,_ = fe.create_target_variables_mooncrater(prices_df, config['training_data'], modeling_config)

# merge the two into the final model input df
model_input_df = fe.prepare_model_input_df(training_data_df, target_variable_df, modeling_config['modeling']['target_column'])

# split the df into train and test sets
X_train, X_test, y_train, y_test = m.split_model_input(
    model_input_df,
    modeling_config['modeling']['target_column'],
    modeling_config['modeling']['train_test_split'],
    modeling_config['modeling']['train_test_split']
)

[15/Sep/2024 01:04:53] INFO [dreams_core.core.classify_wallet_cohort:714] Wallet cohort classification complete. 759/68423 eligible wallets were added to the cohort.
[15/Sep/2024 01:04:53] INFO [dreams_core.core.generate_buysell_metrics_df:32] Preparing buysell_metrics_df...
[15/Sep/2024 01:04:56] INFO [dreams_core.core.generate_buysell_metrics_df:93] Generated buysell_metrics_df after 2.60 seconds.
[15/Sep/2024 01:04:56] INFO [dreams_core.core.flatten_coin_date_df:84] Flattening columns ['buyers_new', 'buyers_repeat', 'sellers_new', 'sellers_repeat', 'total_bought', 'total_sold', 'total_net_transfers', 'total_holders', 'total_balance'] into coin-level features...
[15/Sep/2024 01:04:56] INFO [dreams_core.core.flatten_coin_date_df:100] Flattened input df into coin-level features with shape (93, 204) after 0.24 seconds.
[15/Sep/2024 01:04:56] INFO [dreams_core.core.preprocess_coin_df:423] Preprocessed file saved at: ..//modeling/outputs/preprocessed_outputs/buysell_metrics_0.1_2024-09-15

## Full Workflow

In [18]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')
config_folder = '../config'




def run_experiments(method, config_folder, modeling_folder, max_evals=2):
    """
    Runs experiments using a specified search method (grid or random), builds models,
    and logs the results of each experiment.

    Args:
    - method (str): 'grid' or 'random' to select the search method.
    - config_folder (str): Path to the folder containing all configuration files.
    - modeling_folder (str): Path to the folder where models, logs, and results will be saved.
    - max_evals (int): Number of iterations for Random search (default is 50).
    """

    # 1. Generate the experiment configurations
    configurations = i.generate_experiment_configurations(config_folder, method=method, max_evals=max_evals)

    # 2. Iterate through each configuration
    for config in configurations:
        
        # 2.1 Prepare the full configuration by applying overrides from the current experiment config
        config, metrics_config, modeling_config = i.prepare_configs(config, config_folder)
        
        # 2.2 Retrieve or rebuild profits_df based on config changes
        profits_df = i.rebuild_profits_df_if_necessary(config, modeling_folder)
        
        # 2.3 Build the configured model input data (train/test data)
        X_train, X_test, y_train, y_test = i.build_configured_model_input(profits_df, metrics_config, modeling_config)

        # 2.4 Train the model using the current configuration and log the results
        model, model_id = i.train_model(X_train, y_train, modeling_folder, modeling_config['hyperparameters'])
        
        # 2.5 Evaluate the model's performance on the test set
        metrics = i.evaluate_model(model, X_test, y_test, model_id, modeling_folder)

        # 2.6 Log the experiment results for this configuration
        i.log_experiment_results(modeling_folder, model_id, config, metrics)

    # 3. Compare all experiments and analyze the best-performing configuration
    i.analyze_experiments(modeling_folder)

{'metrics': {'buyers_new': {'aggregations': {'sum': {'scaling': 'standard'},
    'mean': {'scaling': 'standard'},
    'std': {'scaling': 'standard'}},
   'rolling': {'stats': {'sum': {'scaling': 'standard'},
     'mean': {'scaling': 'standard'}},
    'comparisons': {'pct_change': {'scaling': 'None'}},
    'window_duration': 7,
    'lookback_periods': 8}},
  'buyers_repeat': {'aggregations': {'sum': {'scaling': 'standard'},
    'mean': {'scaling': 'standard'},
    'std': {'scaling': 'standard'}},
   'rolling': {'stats': {'sum': {'scaling': 'standard'},
     'mean': {'scaling': 'standard'}},
    'comparisons': {'pct_change': {'scaling': 'None'}},
    'window_duration': 7,
    'lookback_periods': 8}},
  'sellers_new': {'aggregations': {'sum': {'scaling': 'standard'},
    'mean': {'scaling': 'standard'},
    'std': {'scaling': 'standard'}},
   'rolling': {'stats': {'sum': {'scaling': 'standard'},
     'mean': {'scaling': 'standard'}},
    'comparisons': {'pct_change': {'scaling': 'None'}},

In [12]:
model_input_df.describe()

Unnamed: 0,buyers_new_sum_buysell_metrics_0.1,buyers_new_mean_buysell_metrics_0.1,buyers_new_std_buysell_metrics_0.1,buyers_new_sum_7d_period_1_buysell_metrics_0.1,buyers_new_max_7d_period_1_buysell_metrics_0.1,buyers_new_change_7d_period_1_buysell_metrics_0.1,buyers_new_pct_change_7d_period_1_buysell_metrics_0.1,buyers_new_sum_7d_period_2_buysell_metrics_0.1,buyers_new_max_7d_period_2_buysell_metrics_0.1,buyers_new_change_7d_period_2_buysell_metrics_0.1,...,total_bought_change_7d_period_5_buysell_metrics_0.1,total_bought_sum_7d_period_6_buysell_metrics_0.1,total_bought_change_7d_period_6_buysell_metrics_0.1,total_bought_sum_7d_period_7_buysell_metrics_0.1,total_bought_change_7d_period_7_buysell_metrics_0.1,total_bought_sum_7d_period_8_buysell_metrics_0.1,total_bought_change_7d_period_8_buysell_metrics_0.1,total_sold_sum_buysell_metrics_0.1,total_buyers_sum_buysell_metrics_0.1,is_moon
count,93.0,93.0,93.0,93.0,93.0,93.0,93.0,93.0,93.0,93.0,...,93.0,93.0,93.0,93.0,93.0,93.0,93.0,93.0,93.0,93.0
mean,-2.6263340367500002e-17,0.203420467186,-2.3875763970400003e-18,1.33333333333,0.752688172043,0.0645161290323,-1.79211469534,2.62365591398,1.27956989247,-0.0645161290323,...,7983273.21942,69041421.4221,-11182688.3553,141507103.282,2043508.78243,474450902.64,-229108127.947,5.551115123130001e-17,98.247311828,0.645161290323
std,1.0054200939,0.228655264337,1.0054200939,2.65122339822,1.0596955467,0.60444821429,38.1851551314,6.22217978458,3.3794956898,0.804963564027,...,53669669.8503,646200542.882,112641876.554,1343544649.83,19642348.743,4562977937.95,2209460197.45,1.0054200939,118.222308635,0.481057740598
min,-0.894460163916,0.0,-0.813049345613,0.0,0.0,-2.0,-100.0,0.0,0.0,-3.0,...,-365513.152776,0.0,-1084542828.3,0.0,-253121.312169,0.0,-21307260180.5,-0.110742309808,1.0,0.0
25%,-0.72388300347,0.0387931034483,-0.659823412344,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-100.3102,0.0,0.0,0.0,-0.0865474795992,-0.110741274949,15.0,0.0
50%,-0.306916611269,0.133620689655,-0.347080756174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,223.759006,0.0,10.3299089614,0.0,267.190882673,0.0,-0.110734536865,56.0,1.0
75%,0.223767887897,0.254310344828,0.142876474657,2.0,1.0,0.0,0.0,3.0,1.0,0.0,...,0.0,26774.9619113,0.0,51132.5211365,0.0,50532.9912945,0.0,-0.110692745727,119.0,1.0
max,3.50263997203,1.0,3.87313449822,18.0,5.0,3.0,200.0,45.0,24.0,3.0,...,427471519.59,6232416496.01,51777442.8932,12958361069.0,189428155.026,44004957553.1,1308284.0692,9.5813823922,483.0,1.0
