In [None]:
# pyright: reportMissingImports=false
# pyright: reportMissingModuleSource=false

import uuid
import random
import hashlib
import os
import sys
import gc
import time
import logging
import re
import pdb
from pathlib import Path
import datetime
from datetime import datetime,timedelta
import json
import warnings
import yaml
from typing import Dict,Union,List,Any,Tuple
import pytest
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
import scipy
from scipy import stats
from sklearn.model_selection import ParameterGrid, ParameterSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    explained_variance_score,
    mean_absolute_percentage_error,
    roc_auc_score
)
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from scipy.signal import argrelextrema
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
import seaborn as sns
import progressbar

# load_dotenv(Path("../../../Local/.env"))

# Custom format function for displaying |numbers/
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

# Suppress warnings
warnings.filterwarnings("ignore", message="MallocStackLogging")

# silence pygame donation request
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
os.environ['LOGGING_FILE']="../../../Local/logs/wallet_modeling.log"
os.environ['ALERT_SOUND_FILEPATH']="../../../Local/assets/sounds/mixkit-alert-bells-echo-765.wav"

# Dark mode charts
plt.rcParams['figure.facecolor'] = '#181818'  # Custom background color (dark gray in this case)
plt.rcParams['axes.facecolor'] = '#181818'
plt.rcParams['text.color'] = '#afc6ba'
plt.rcParams['axes.labelcolor'] = '#afc6ba'
plt.rcParams['xtick.color'] = '#afc6ba'
plt.rcParams['ytick.color'] = '#afc6ba'
plt.rcParams['axes.titlecolor'] = '#afc6ba'

# import local modules
# pyright: reportMissingImports=false
sys.path.append('..//src')
import utils as u
import training_data.data_retrieval as dr
import training_data.profits_row_imputation as pri
import coin_wallet_metrics.coin_wallet_metrics as cwm
import coin_wallet_metrics.indicators as ind
import feature_engineering.feature_generation as fg
import feature_engineering.time_windows_orchestration as tw
import feature_engineering.flattening as flt
import feature_engineering.data_splitting as ds
import feature_engineering.target_variables as tv
import feature_engineering.preprocessing as prp
import modeling as m
import insights.analysis as ia
import insights.experiments as exp

# Wallet modeling
import wallet_modeling.wallet_modeling_orchestrator as wmo
import wallet_modeling.wallet_training_data as wtd
import wallet_modeling.model_reporting as wmr
import wallet_modeling.wallet_model as wm
import wallet_modeling.experiments_manager as wem
from wallet_modeling.wallets_config_manager import WalletsConfig

# Wallet features
import wallet_features.clustering_features as wcl
import wallet_features.market_cap_features as wmc
import wallet_features.market_timing_features as wmt
import wallet_features.performance_features as wpf
import wallet_features.trading_features as wtf
import wallet_features.transfers_features as wts
import wallet_features.features_orchestrator as wfo

# Wallet insights
import wallet_insights.wallet_model_evaluation as wime
import wallet_insights.wallet_validation_analysis as wiwv
import wallet_insights.coin_validation_analysis as wicv
import wallet_insights.coin_validation_model as wicm


# reload all modules
modules = [u, dr, pri, cwm, ind, fg, tw, flt, ds, tv, prp, m, ia, exp,
           wmo, wtd, wmr, wm, wem,
           wcl, wmc, wmt, wpf, wtf, wts, wfo,
           wime, wiwv, wicv, wicm]
[importlib.reload(module) for module in modules]

# load all configs
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')
wallets_config = WalletsConfig.load_from_yaml('../config/wallets_config.yaml')
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))

# configure logger
logger = dc.setup_logger()
# logger = u.setup_local_logging(logger)
logger.setLevel(logging.INFO)

logger.info("Good morning, let's get to work")

In [None]:
u.export_code(
    code_directories=[
        # 'training_data',
        'wallet_features',
        # 'wallet_modeling',
        # 'wallet_insights'
    ],
    # include_config = True,
    # ipynb_notebook = 'DDA-456 wallet validation performance.ipynb'
)

u.obj_mem()

# Wallet Model Construction

## Training Data Sequence

### retrieve training datasets

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))

# Complete Pre-Training Profits/Market Data
# -----------------------------------------
# Retrieve training period datasets and save them to temp/wallet_modeling_dfs
_,_,_ = wmo.retrieve_period_datasets(
    wallets_config['training_data']['training_period_start'],
    wallets_config['training_data']['training_period_end'],
    parquet_prefix = 'training')


### define cohort and clean training datasets (loadable parquet)

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))



# Add Indicators to Market Data
# ----------------------------------------------------------
# Load relevant parquet dfs with pre-training history
training_market_data_df_full = pd.read_parquet("temp/wallet_modeling_dfs/training_market_data_df_full.parquet")

# Generate indicators and save file
_ = wmo.generate_training_indicators_df(training_market_data_df_full,wallets_metrics_config)



# Identify Wallet Cohort
# ----------------------------------------------------------
# Remove market data from prior to the starting balance date
training_market_data_df = training_market_data_df_full[training_market_data_df_full['date']
                                        >=wallets_config['training_data']['training_starting_balance_date']]
u.assert_period(training_market_data_df,
                wallets_config['training_data']['training_period_start'],
                wallets_config['training_data']['training_period_end'])
del training_market_data_df_full
gc.collect()

# Retrieve full profits history
training_profits_df_full = pd.read_parquet("temp/wallet_modeling_dfs/training_profits_df_full.parquet")

# Define wallet cohort and return cohort-filtered training_profits_df
training_profits_df, training_wallet_cohort = wmo.define_training_wallet_cohort(training_profits_df_full,
                                                                                training_market_data_df)
u.assert_period(training_profits_df,
                wallets_config['training_data']['training_period_start'],
                wallets_config['training_data']['training_period_end'])
del training_profits_df_full
gc.collect()


# Generate Cohort-Filtered Profits Data for Training Windows
# ----------------------------------------------------------
# Generate wallet_cohort-filtered profits_df for all training windows
training_windows_profits_dfs = wmo.split_training_window_profits_dfs(
                                                        training_profits_df,
                                                        training_market_data_df,training_wallet_cohort)
training_profits_df.to_parquet("temp/wallet_modeling_dfs/training_profits_df.parquet",index=True)
del training_profits_df, training_market_data_df
gc.collect()



# Retrieve Transfers Data
# ----------------------------------------------------------
# Transfers data retrieval for the wallet_ids in temp.wallet_modeling_training_cohort
training_transfers_sequencing_df = wts.retrieve_transfers_sequencing()
training_transfers_sequencing_df.to_parquet("temp/wallet_modeling_dfs/training_transfers_sequencing_df.parquet",index=True)
del training_transfers_sequencing_df
gc.collect()


### generate training features (loadable parquet)

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Load files
training_profits_df = pd.read_parquet("temp/wallet_modeling_dfs/training_profits_df.parquet")
training_market_indicators_data_df = pd.read_parquet("temp/wallet_modeling_dfs/training_market_indicators_data_df.parquet")
training_transfers_sequencing_df = pd.read_parquet("temp/wallet_modeling_dfs/training_transfers_sequencing_df.parquet")
training_wallet_cohort = list(set(training_profits_df['wallet_address']))


# Generate Features for the Full Training Period
# ----------------------------------------------------------
logger.info("Generating features for full training period...")
training_wallet_features_df = wfo.calculate_wallet_features(training_profits_df,
                                                            training_market_indicators_data_df,
                                                            training_transfers_sequencing_df,
                                                            training_wallet_cohort,
                                                            wallets_config['training_data']['training_period_start'],
                                                            wallets_config['training_data']['training_period_end'])

# Define the start of training_data_df appending a suffix for the window
training_data_df = training_wallet_features_df.add_suffix("|all_windows")

# del training_profits_df,training_wallet_features_df
gc.collect()

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Generate Features for Each Individual Window
# ----------------------------------------------------------
# Generate features for each window
for i, window_profits_df in enumerate(training_windows_profits_dfs, 1):
    logger.info("Generating features for window %s...", i)

    # Extract the window_df boundary dates that were validated by split_training_window_profits_dfs()
    window_opening_balance_date = window_profits_df['date'].min()
    window_start_date = window_opening_balance_date + timedelta(days=1)
    window_end_date = window_profits_df['date'].max()

    # Generate the features
    window_wallet_features_df = wfo.calculate_wallet_features(
        window_profits_df,  # profits_df is filtered to the window
        training_market_indicators_data_df,training_transfers_sequencing_df,  # full training period dfs
        training_wallet_cohort,  # full training cohort
        window_start_date.strftime('%Y-%m-%d'), window_end_date.strftime('%Y-%m-%d')  # window-specific dates
    )

    # Check for NaN values and identify problematic columns
    nan_columns = window_wallet_features_df.columns[window_wallet_features_df.isna().any()].tolist()
    if nan_columns:
        raise ValueError(f"NaN values detected in window {i} in columns: {nan_columns}")

    # Add column suffix and join to training_data_df
    window_wallet_features_df = window_wallet_features_df.add_suffix(f'|w{i}')
    training_data_df = training_data_df.join(window_wallet_features_df, how='left')

    # Check for NaN values and identify problematic columns
    nan_columns = training_data_df.columns[training_data_df.isna().any()].tolist()
    if nan_columns:
        raise ValueError(f"NaN values detected in training_data_df after window {i} in columns: {nan_columns}")


del window_profits_df,window_wallet_features_df,training_market_indicators_data_df,training_transfers_sequencing_df
gc.collect()

u.obj_mem()

In [None]:
# Generate Clusters Using All Other Features
# ----------------------------------------------------------
# Append clustering features based on all numeric features in the base training data
training_cluster_features_df = wcl.create_basic_cluster_features(training_data_df)
training_cluster_features_df = training_cluster_features_df.add_prefix('cluster|')
training_data_df = training_data_df.join(training_cluster_features_df, how='inner')



# Save TRAINING_DATA_DF
# ----------------------------------------------------------
# Verify all input wallets exist in final output
missing_wallets = set(training_wallet_cohort) - set(training_data_df.index)
if missing_wallets:
    raise ValueError(f"Lost {len(missing_wallets)} wallets from original cohort during feature generation. First few missing: {list(missing_wallets)[:5]}")
logger.info("Feature generation complete. Final training_df shape: %s", training_data_df.shape)


# Save and clear from memory
training_data_df.to_parquet("temp/wallet_modeling_dfs/training_data_df.parquet",index=True)
del training_data_df,training_cluster_features_df
gc.collect()
u.obj_mem()

## Wallet Modeling Data

### Retrieve modeling datasets

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))


# Retrieve Modeling Profits and Market Data
# ----------------------------------------------------------
# Retrieve training coin cohort to restrict modeling period data to only training period coins
training_coin_cohort = pd.read_parquet("temp/wallet_modeling_dfs/training_market_indicators_data_df.parquet",
                                       columns=['coin_id'])['coin_id'].unique()
# Retrieve full historical through modeling period datasets
modeling_profits_df_full, modeling_market_data_df_full, modeling_coin_cohort = wmo.retrieve_period_datasets(
    wallets_config['training_data']['modeling_period_start'],
    wallets_config['training_data']['modeling_period_end'],
    coin_cohort=training_coin_cohort
)

# Remove pre-modeling period prices
modeling_market_data_df = modeling_market_data_df_full[modeling_market_data_df_full['date']
                                                       >=wallets_config['training_data']['modeling_starting_balance_date']]
del modeling_market_data_df_full,training_coin_cohort
gc.collect()


# Filter to only training wallet cohort
training_wallet_cohort = pd.read_parquet("temp/wallet_modeling_dfs/training_data_df.parquet", columns=[]).index.values
modeling_profits_df = modeling_profits_df_full[modeling_profits_df_full['wallet_address'].isin(training_wallet_cohort)]
del modeling_profits_df_full
gc.collect()


# Assert period, save files, remove from memory
u.assert_period(modeling_market_data_df,
                wallets_config['training_data']['modeling_period_start'],
                wallets_config['training_data']['modeling_period_end'])
u.assert_period(modeling_profits_df,
                wallets_config['training_data']['modeling_period_start'],
                wallets_config['training_data']['modeling_period_end'])
modeling_profits_df.to_parquet("temp/wallet_modeling_dfs/modeling_profits_df.parquet",index=False)
modeling_market_data_df.to_parquet("temp/wallet_modeling_dfs/modeling_market_data_df.parquet",index=False)
del modeling_profits_df,modeling_market_data_df
gc.collect()

## Wallet Cohort and Target Variable

### define modeling cohort and features (loadable parquet)

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Create training_cohort-Indexed modeling_wallet_features_df
# -----------------------------------------------------------
# Create a DataFrame with training wallet cohort as the index
training_wallet_cohort = pd.read_parquet("temp/wallet_modeling_dfs/training_data_df.parquet", columns=[]).index.values
modeling_wallet_features_df = pd.DataFrame(index=training_wallet_cohort)
modeling_wallet_features_df.index.name = 'wallet_address'

# Store feature sets with their prefixes for bulk renaming
feature_column_names = {}


# Identify Modeling Period Cohort
# -----------------------------------------------------------
# Retrieve trading features for all wallets in training_cohort with boolean for in_modeling_cohort
modeling_profits_df = pd.read_parquet("temp/wallet_modeling_dfs/modeling_profits_df.parquet")
modeling_trading_features_df = wmo.identify_modeling_cohort(modeling_profits_df)
modeling_wallet_features_df = modeling_wallet_features_df.join(modeling_trading_features_df, how='left')\
    .fillna({col: 0 for col in modeling_trading_features_df.columns})


# Generate Modeling Period Performance Features
# -----------------------------------------------------------
# Calculate performance metrics for the training cohort (wallets with 0 activity still impact rank orders)
modeling_performance_features_df = wpf.calculate_performance_features(modeling_wallet_features_df)
modeling_wallet_features_df = modeling_wallet_features_df.join(modeling_performance_features_df, how='left')\
    .fillna({col: 0 for col in modeling_performance_features_df.columns})

### select target variable and build model

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Create MODELING_DF and Construct Wallet Model
# ----------------------------------------------------------
# Retrieve training data for the full training wallet cohort
training_data_df = pd.read_parquet("temp/wallet_modeling_dfs/training_data_df.parquet")

# Filter training data to only the modeling cohort through inner join to target variable
modeling_cohort_target_var_df = modeling_wallet_features_df[['in_modeling_cohort', wallets_config['modeling']['target_variable']]]

# Run the experiment and get results
wallet_model = wm.WalletModel(wallets_config)
model_results = wallet_model.run_experiment(training_data_df,modeling_cohort_target_var_df)
# del training_data_df
# gc.collect()

# Extract the trained model
model = model_results['pipeline'].named_steps['regressor']

# Generate and save all model artifacts
model_id, evaluator, wallet_scores_df = wmr.generate_and_save_model_artifacts(
    model_results=model_results,
    base_path='../wallet_modeling'
)
u.notify()

## Model Analysis

### assess wallet model performance

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Reload evaluator
evaluator = wime.RegressionEvaluator(
    y_train=model_results['y_train'],
    y_true=model_results['y_test'],
    y_pred=model_results['y_pred'],
    training_cohort_pred=model_results['training_cohort_pred'],
    training_cohort_actuals=model_results['training_cohort_actuals'],
    model=model,
    feature_names=model_results['X_train'].columns.tolist()
)

# Print results
print(evaluator.summary_report())
evaluator.plot_evaluation()
evaluator.importance_summary()

### Cluster analysis

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# List of the x features with the highest importance in the model
x_features = 8
top_feature_metrics = list((pd.DataFrame(evaluator.metrics['importances'])
                      .sort_values(by='importance',ascending=False)
                      .head(x_features)['feature']))
all_metrics = list(set(top_feature_metrics))

# Cluster numbers
n_clusters=4


styled_df = wime.create_cluster_report(training_data_df, model_results, n_clusters, all_metrics)
styled_df

# Validation Period Analysis

### Retrieve validation datasets

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Retrieve Validation Profits and Market Data
# ----------------------------------------------------------
# Retrieve full historical through validation period datasets
validation_profits_df, validation_market_data_df_full, validation_coin_cohort = wmo.retrieve_period_datasets(
    wallets_config['training_data']['validation_period_start'],
    wallets_config['training_data']['validation_period_end']
)

# Remove pre-validation period prices
validation_market_data_df = validation_market_data_df_full[validation_market_data_df_full['date']
                                                       >=wallets_config['training_data']['validation_starting_balance_date']]
del validation_market_data_df_full
gc.collect()


# Assert period, save files, remove from memory
u.assert_period(validation_market_data_df,
                wallets_config['training_data']['validation_period_start'],
                wallets_config['training_data']['validation_period_end'])
u.assert_period(validation_profits_df,
                wallets_config['training_data']['validation_period_start'],
                wallets_config['training_data']['validation_period_end'])
validation_profits_df.to_parquet("temp/wallet_modeling_dfs/validation_profits_df.parquet",index=False)
validation_market_data_df.to_parquet("temp/wallet_modeling_dfs/validation_market_data_df.parquet",index=False)
del validation_profits_df,validation_market_data_df
gc.collect()

### generate wallet_validation_features_df

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Load parquet
validation_profits_df = pd.read_parquet("temp/wallet_modeling_dfs/modeling_profits_df.parquet")
validation_market_data_df = pd.read_parquet("temp/wallet_modeling_dfs/validation_market_data_df.parquet")


# Create a DataFrame with all wallets that should exist
validation_wallet_features_df = pd.DataFrame(index=training_wallet_cohort)
validation_wallet_features_df.index.name = 'wallet_address'


# Calculate modeling period wallet metrics
trading_features_df = wtf.calculate_wallet_trading_features(validation_profits_df)
validation_wallet_features_df = validation_wallet_features_df.join(trading_features_df, how='left')\
    .fillna({col: 0 for col in trading_features_df.columns})

# Performance features (inner join, no fill)
performance_features_df = (wpf.calculate_performance_features(validation_wallet_features_df)
                                .drop(['max_investment', 'crypto_net_gain'], axis=1))  # already exist as trading features
validation_wallet_features_df = validation_wallet_features_df.join(performance_features_df, how='inner')
validation_wallet_features_df.describe()

### wallet validation period trading/performance by score quantile

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Create analysis by prediction bands
metrics = [
    'return',
    'realized_return',
    'return_unwinsorized',
    'max_investment',
    'crypto_net_gain',
    'net_crypto_investment',
    'total_volume',
]

min_wallet_volume_usd = 1000
num_quantiles = 5

wiwv.create_quantile_report(
    validation_wallet_features_df,
    model_results['y_pred'],
    metrics,  # Your existing metrics list
    num_quantiles,  # Split into ntiles
    min_wallet_volume_usd
)


In [None]:
profits_df.isna().sum()

### coin-aggregated wallet metrics by coin performance

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Generate coin-level features about wallet behavior during the
coin_wallet_features_df = wicv.calculate_coin_metrics_from_wallet_scores(
    validation_profits_df,
    wallet_scores_df,
    validation_market_data_df
)

# Filter coins by market cap
analyze_df = coin_wallet_features_df[
    (coin_wallet_features_df['market_cap_filled'] >= wallets_config['coin_validation_analysis']['min_market_cap'])
    & (coin_wallet_features_df['market_cap_filled'] <= wallets_config['coin_validation_analysis']['max_market_cap'])
].copy()

# Create styled performance analysis
wicv.create_top_coins_wallet_metrics_report(analyze_df,percentile=90,method='median')


## Basic coin model testing

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# 1. Generate model scores (using existing wallet model results)
wallet_scores_df = pd.DataFrame({
    'score': model_results['y_pred']
}, index=model_results['X_test'].index)

# 2. Prepare the modeling dataset using modeling period data
coin_modeling_df = wicm.prepare_features_and_targets(
    coin_validation_df=coin_validation_df,
    modeling_profits_df=modeling_profits_df,
    modeling_market_data_df=modeling_market_data_df,
    wallet_scores_df=wallet_scores_df
)

# 3. Train model and get evaluation
model, evaluator = wicm.train_coin_prediction_model(coin_modeling_df)

# 4. View results
print(evaluator.summary_report())
evaluator.plot_evaluation()

# 5. Optional: Generate feature importance summary
evaluator.importance_summary()

# 6. Optional: Analyze predictions by market cap segment
predictions_df = pd.DataFrame({
    'y_true': evaluator.y_true,
    'y_pred': evaluator.y_pred,
    'market_cap': coin_modeling_df['market_cap_filled']
})

segment_results, summary_df = wicv.analyze_market_cap_segments(predictions_df)
wicv.plot_segment_heatmap(summary_df)

In [None]:
# Create a DataFrame with all wallets that should exist
validation_wallet_features_df = pd.DataFrame(index=wallet_cohort)
validation_wallet_features_df.index.name = 'wallet_address'


# Calculate modeling period wallet metrics
validation_profits_df = wtf.add_cash_flow_transfers_logic(validation_profits_df)
trading_features_df = wtf.calculate_wallet_trading_features(validation_profits_df)
validation_wallet_features_df = validation_wallet_features_df.join(trading_features_df, how='left')\
    .fillna({col: 0 for col in trading_features_df.columns})

# Performance features (inner join, no fill)
performance_features_df = (wpf.calculate_performance_features(validation_wallet_features_df)
                                .drop(['max_investment', 'crypto_net_gain'], axis=1))  # already exist as trading features
validation_wallet_features_df = validation_wallet_features_df.join(performance_features_df, how='inner')
validation_wallet_features_df.describe()

In [None]:
# Create coin_modeling_df
coin_modeling_df = coin_wallet_features_df.copy().drop('market_cap',axis=1)
coin_modeling_df['coin_return_unwinsorized'] = coin_modeling_df['coin_return']
coin_modeling_df['coin_return'] = u.winsorize(coin_modeling_df['coin_return'],0.05)

# Filter coins by market cap
coin_modeling_df = coin_modeling_df[
    (coin_modeling_df['market_cap_filled'] >= wallets_config['coin_validation_analysis']['min_market_cap'])
    & (coin_modeling_df['market_cap_filled'] <= wallets_config['coin_validation_analysis']['max_market_cap'])
].copy()


In [None]:
coin_modeling_df

In [None]:
df = coin_modeling_df.copy()

# 1. Simple feature prep and model
X, y = wicm.prepare_features(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

# 2. Train
model.fit(X_train, y_train)

# 3. Predict
y_pred = model.predict(X_test)

# 4. Evaluate with the fancy evaluator
feature_names = df.columns.drop(['coin_return', 'market_cap_filled']).tolist()
evaluator = wime.RegressionEvaluator(y_train, y_test, y_pred, model=model, feature_names=feature_names)

# 5. Get the goods
print(evaluator.summary_report())

# 6. Plot everything
evaluator.plot_evaluation()

## experiments beta

In [None]:
# Create modeling dataset using existing pipeline
modeling_wallets_df = wmo.filter_modeling_period_wallets(modeling_profits_df)
target_vars_df = wpf.calculate_performance_features(modeling_wallets_df)


In [None]:
### save model artifacts
[importlib.reload(module) for module in modules]
wallets_config.reload()

# 1. Initialize dependencies
metrics_config = {
    'rmse': lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)),
    'r2': r2_score
}

# 2. Define experiment sequence
sequence_config = {
    'run_baseline': True,
    'parameter_variations': {
        'modeling': {
            'target_variable': [
                'max_investment',
                'crypto_net_gain',
                'return',
                'realized_return',
                'return_unwinsorized',
                'performance_score',
                'size_adjusted_rank'
            ]
        }
    }
}

# 3. Create experiment manager
exp_manager = wem.ExperimentsManager(
    config=wallets_config.config,
    training_data_df=training_data_df,
)

# 4. Run experiments and get results
results_df = exp_manager.run_experiment_sequence(modeling_profits_df, sequence_config)

# 5. View results
print(results_df)

In [None]:
results_df

### Validation period assessments

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

wallet_performance_df, bucketed_performance_df = wicv.calculate_validation_metrics(
    X_test=model_results['X_test'],
    y_pred=model_results['y_pred'],
    validation_profits_df=validation_profits_df,
    n_buckets=10,
    method='ntiles'
)

bucketed_performance_df

## coin performance analysis

### compare wallet metrics for the top n% of coins vs the others

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Partition coin features for analysis
analyze_df = coin_wallet_features_df[
    (coin_wallet_features_df['market_cap_filled'] >= wallets_config['coin_validation_analysis']['min_market_cap'])
    & (coin_wallet_features_df['market_cap_filled'] <= wallets_config['coin_validation_analysis']['max_market_cap'])
].copy()

# Create styled performance analysis
styled_df = wicv.create_top_coins_wallet_metrics_report(analyze_df,percentile=90,method='median')

# Display results
styled_df

### plotting coin feature performance vs market cap

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Get the analysis results
segment_results, summary_df = wicv.analyze_market_cap_segments(
    coin_wallet_features_df,
    top_n=10
)

# Or create the visualizations
wicv.plot_segment_heatmap(summary_df)
wicv.plot_metric_consistency(summary_df)  # Optional secondary visualization


### coin performance of top n for each bucket

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Run analysis
top_n = wallets_config['coin_validation_analysis']['top_n']
max_market_cap = wallets_config['coin_validation_analysis']['max_market_cap']
min_market_cap = wallets_config['coin_validation_analysis']['min_market_cap']

metric_top_coin_performance_df = wicv.validate_coin_performance(coin_wallet_features_df,top_n,
                                                                max_market_cap, min_market_cap)

metric_top_coin_performance_df

### compare performance of high vs low score coins

In [None]:
coin_wallet_features_df

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

wicv.print_performance_analysis(coin_wallet_features_df)

# Junkyard

# Tests failing

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()