### start

In [2]:
# pyright: reportMissingImports=false
# pyright: reportMissingModuleSource=false

import uuid
import random
import hashlib
import os
import sys
import gc
import time
import copy
import logging
import re
from itertools import chain,combinations
import pdb
import math
from pathlib import Path
import datetime
from datetime import datetime,timedelta
import json
import warnings
import yaml
from typing import Dict,Union,List,Any,Tuple
import pytest
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import IPython
import requests
import warnings
import boto3
import matplotlib as plt
import sklearn

# load_dotenv(Path("../../../Local/.env"))

# Custom format function for displaying |numbers/
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

# Suppress warnings
os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1'
warnings.filterwarnings('ignore', message='.*frozen modules.*')
warnings.filterwarnings("ignore", message="MallocStackLogging")

# silence pygame donation request
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
os.environ['LOGGING_FILE'] = "../../../Local/logs/wallet_modeling.log"
os.environ['NOTIFICATION_SOUNDS_DIR'] = "../../../Local"

# Dark mode charts
plt.rcParams['figure.facecolor'] = '#181818'  # Custom background color (dark gray in this case)
plt.rcParams['axes.facecolor'] = '#181818'
plt.rcParams['text.color'] = '#afc6ba'
plt.rcParams['axes.labelcolor'] = '#afc6ba'
plt.rcParams['xtick.color'] = '#afc6ba'
plt.rcParams['ytick.color'] = '#afc6ba'
plt.rcParams['axes.titlecolor'] = '#afc6ba'
plt.rcParams['text.usetex'] = False
plt.rcParams['mathtext.default'] = 'regular'

# import local modules
# pyright: reportMissingImports=false
sys.path.append('..//src')

import sagemaker_wallets.wallet_modeler as wm
from sagemaker_wallets.wallet_preprocessor import SageWalletsPreprocessor
import sagemaker_wallets.workflow_orchestrator as wo
import sage_utils.config_validation as ucv
import utils as u
from utils import ConfigError

# reload all modules
modules = [
    wm, wo,
    u, ucv
]

# import utils as u
# Set the custom error handler
ipython = IPython.get_ipython()
# ipython.set_custom_exc((Exception,), u.notify_on_failure)

player = u.AmbientPlayer()
player.stop_all_players()

# configure logger
logger = u.setup_notebook_logger('../logs/notebook_logs.log')
logger.setLevel(logging.INFO)

from IPython.core import ultratb
ultratb.VerboseTB._tb_highlight = "bg:#b45827"


# load all configs
sage_wallets_config = ucv.load_sage_wallets_config(Path('../config/sage_wallets_config.yaml'))
sage_wallets_modeling_config = ucv.load_sage_wallets_modeling_config(Path('../config/sage_wallets_modeling_config.yaml'))


u.export_code(
    code_directories=[
        'sagemaker_wallets',
        # 'sage_utils',
    ],
    # include_config = True,
    # ipynb_notebook = 'DDA-769 coin model score dist toggle.ipynb'
    output_file="temp/sagemaker_code.py"
)

[importlib.reload(module) for module in modules]
u.notify('retro')

logger.milestone("Good morning, let's get to work")

[0m[17/Jul/25 12:37:39] INFO [utils.export_code:1659] Consolidation complete. All files are saved in temp/sagemaker_code.py[0m
[92m[17/Jul/25 12:37:39] MILESTONE [4259847109.<module>:114] Good morning, let's get to work[0m


# Code begins

### Initiate orchestrator

In [3]:
[importlib.reload(module) for module in modules]
sage_wallets_config = yaml.safe_load(Path('../config/sage_wallets_config.yaml').read_text(encoding='utf-8'))
sage_wallets_modeling_config = yaml.safe_load(Path('../config/sage_wallets_modeling_config.yaml').read_text(encoding='utf-8'))


date_suffixes = [
    '231107',
    '240306'
]
workflow_orch = wo.WalletWorkflowOrchestrator(sage_wallets_config)

### Load, preprocess, and upload data

#### load data

In [None]:
[importlib.reload(module) for module in modules]
sage_wallets_config = yaml.safe_load(Path('../config/sage_wallets_config.yaml').read_text(encoding='utf-8'))
sage_wallets_modeling_config = yaml.safe_load(Path('../config/sage_wallets_modeling_config.yaml').read_text(encoding='utf-8'))

workflow_orch.load_training_data(date_suffixes)

##### inspect features

In [None]:
# # Create combined NaN count and describe statistics
# nan_counts = workflow_orch.training_data['x_train'].isna().sum()
# describe_stats = workflow_orch.training_data['x_train'].describe().T

# # Combine into single DataFrame
# combined_stats = pd.concat([
#     nan_counts.rename('nan_count'),
#     describe_stats
# ], axis=1).sort_index()

# u.display_full(combined_stats.sort_index())

#### preprocess data

In [None]:
preprocessor = SageWalletsPreprocessor(sage_wallets_config)
preprocessed_data = preprocessor.preprocess_training_data(workflow_orch.training_data)

#### upload data

In [None]:
# Upload and retrieve URIs
workflow_orch.upload_training_data(preprocessed_data, overwrite_existing=True)
s3_uris = workflow_orch.retrieve_training_data_uris(date_suffixes)

### Train Model

#### retrieve file uris

In [4]:
date_suffixes = [
    '231107',
    '240306'
]
workflow_orch = wo.WalletWorkflowOrchestrator(sage_wallets_config)

# Generate URIs for specific dates without any prior setup
s3_uris = workflow_orch.retrieve_training_data_uris(date_suffixes)
s3_uris

[0m[16/Jul/25 22:04:03] INFO [credentials.load:1352] Found credentials in shared credentials file: ~/.aws/credentials[0m


{'231107': {'train': 's3://wallet-training-data/training-data-preprocessed/dda-897/231107/train_cw_crypto_net_gain_crypto_inflows_winsorized.csv',
  'test': 's3://wallet-training-data/training-data-preprocessed/dda-897/231107/test_cw_crypto_net_gain_crypto_inflows_winsorized.csv',
  'eval': 's3://wallet-training-data/training-data-preprocessed/dda-897/231107/eval_cw_crypto_net_gain_crypto_inflows_winsorized.csv',
  'val': 's3://wallet-training-data/training-data-preprocessed/dda-897/231107/val_cw_crypto_net_gain_crypto_inflows_winsorized.csv'},
 '240306': {'train': 's3://wallet-training-data/training-data-preprocessed/dda-897/240306/train_cw_crypto_net_gain_crypto_inflows_winsorized.csv',
  'test': 's3://wallet-training-data/training-data-preprocessed/dda-897/240306/test_cw_crypto_net_gain_crypto_inflows_winsorized.csv',
  'eval': 's3://wallet-training-data/training-data-preprocessed/dda-897/240306/eval_cw_crypto_net_gain_crypto_inflows_winsorized.csv',
  'val': 's3://wallet-training-d

#### train model

In [5]:
[importlib.reload(module) for module in modules]
sage_wallets_config = yaml.safe_load(Path('../config/sage_wallets_config.yaml').read_text(encoding='utf-8'))
sage_wallets_modeling_config = yaml.safe_load(Path('../config/sage_wallets_modeling_config.yaml').read_text(encoding='utf-8'))

date_suffix = date_suffixes[1]

modeler = wm.WalletModeler(
    sage_wallets_config,
    sage_wallets_modeling_config,
    date_suffix,
    s3_uris
)



In [6]:
modeling_results = modeler.train_model()

[0m[16/Jul/25 22:04:09] INFO [wallet_modeler.train_model:113] Starting SageMaker training...[0m
[0m[16/Jul/25 22:04:09] INFO [image_uris._processor:530] Ignoring unnecessary instance type: None.[0m
[0m[16/Jul/25 22:04:09] INFO [wallet_modeler.train_model:141] SageMaker XGBoost container: 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.7-1[0m
[0m[16/Jul/25 22:04:09] INFO [wallet_modeler.train_model:147] Container version tag: 1.7-1[0m
[0m[16/Jul/25 22:04:09] INFO [wallet_modeler.train_model:151] Requested framework version: 1.7-1[0m
[0m[16/Jul/25 22:04:14] INFO [wallet_modeler.train_model:205] Launching training job: wallet-xgb-dda-897-240306-20250716-220414[0m
[0m[16/Jul/25 22:04:14] INFO [wallet_modeler.train_model:206] Model output path: s3://wallet-training-data/sagemaker-models/dda-897/[0m
[0m[16/Jul/25 22:04:14] INFO [telemetry_logging.wrapper:92] SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose iss

2025-07-17 05:04:15 Starting - Starting the training job...
2025-07-17 05:04:29 Starting - Preparing the instances for training...
2025-07-17 05:04:51 Downloading - Downloading input data...
2025-07-17 05:05:47 Downloading - Downloading the training image......
2025-07-17 05:06:38 Training - Training image download completed. Training in progress...[2025-07-17 05:06:42.940 ip-10-0-182-73.us-west-2.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2025-07-17 05:06:42.963 ip-10-0-182-73.us-west-2.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.
[2025-07-17:05:06:43:INFO] Imported framework sagemaker_xgboost_container.training
[2025-07-17:05:06:43:INFO] Failed to parse hyperparameter objective value reg:linear to Json.
Returning the value itself
[2025-07-17:05:06:43:INFO] No GPUs detected (normal if no gpus installed)
[2025-07-17:05:06:43:INFO] Running XGBoost Sagemaker in algorithm mode
[2025-07-17:05:06:43:INFO] Determined 0 GPU(

[0m[16/Jul/25 22:09:42] INFO [wallet_modeler.train_model:220] Training completed. Model stored at: s3://wallet-training-data/sagemaker-models/dda-897/wallet-xgb-dda-897-240306-20250716-220414/output/model.tar.gz[0m


### Score validation set

#### get existing model uri

In [2]:
[importlib.reload(module) for module in modules]
sage_wallets_config = yaml.safe_load(Path('../config/sage_wallets_config.yaml').read_text(encoding='utf-8'))
sage_wallets_modeling_config = yaml.safe_load(Path('../config/sage_wallets_modeling_config.yaml').read_text(encoding='utf-8'))



date_suffixes = [
    '231107',
    '240306'
]
date_suffix = date_suffixes[1]


modeler = wm.WalletModeler(
    sage_wallets_config,
    sage_wallets_modeling_config,
    date_suffix
)


model_metadata = modeler.load_existing_model()

[0m[16/Jul/25 22:24:53] INFO [credentials.load:1352] Found credentials in shared credentials file: ~/.aws/credentials[0m
[0m[16/Jul/25 22:24:53] INFO [wallet_modeler.load_existing_model:298] Loaded most recent model (timestamp: 20250716-220414): s3://wallet-training-data/sagemaker-models/dda-897/wallet-xgb-dda-897-240306-20250716-220414/output/model.tar.gz[0m


#### deploy endpoint for predictions

In [3]:
modeler.list_active_endpoints()

[0m[16/Jul/25 22:24:56] INFO [wallet_modeler.list_active_endpoints:619] Active endpoints: ['xgboost-dda-897-20250716-220942'][0m


['xgboost-dda-897-20250716-220942']

In [11]:
modeler.deploy_endpoint()

[0m[16/Jul/25 22:17:06] INFO [wallet_modeler.list_active_endpoints:611] Active endpoints: ['xgboost-dda-897-20250716-220942'][0m
[0m[16/Jul/25 22:17:08] INFO [wallet_modeler.deploy_endpoint:554] Deployment cancelled by user; using existing endpoint.[0m


'xgboost-dda-897-20250716-220942'

#### load validation df and predict

In [None]:
x_val = pd.read_parquet(
    Path("../s3_uploads/wallet_training_data_queue") /
    sage_wallets_config['training_data']['local_directory'] /
    f"x_val_{date_suffix}.parquet"
)
x_val.shape



(58092, 221)

#### preprocess df

In [None]:
preprocessor = SageWalletsPreprocessor(sage_wallets_config)
x_val_processed = preprocessor.preprocess_x_df(x_val)

[0m[16/Jul/25 22:24:59] INFO [wallet_preprocessor._handle_missing_values:198] Filled NaN values in 100 columns for inference.[0m


In [9]:
predictions = modeler.predict_using_endpoint(x_val_processed)
predictions

[0m[16/Jul/25 22:25:35] INFO [wallet_modeler.list_active_endpoints:619] Active endpoints: ['xgboost-dda-897-20250716-220942'][0m
[0m[16/Jul/25 22:25:35] INFO [wallet_modeler.predict_using_endpoint:480] Using detected endpoint: xgboost-dda-897-20250716-220942[0m
[0m[16/Jul/25 22:25:35] INFO [wallet_modeler.predict_using_endpoint:499] Prediction preview: 58092 rows across 20.0 chunks (102.10MB estimated total size)[0m
[0m[16/Jul/25 22:25:37] INFO [wallet_modeler.predict_using_endpoint:506] Beginning endpoint predictions for 20.0 chunks...[0m
  return bound(*args, **kwds)
[0m[16/Jul/25 22:27:14] INFO [wallet_modeler._save_endpoint_predictions:691] Predictions saved to temp/endpoint_predictions/endpoint_predictions_dda_897_td_column_ordering_v1_240306.csv[0m
[0m[16/Jul/25 22:27:14] INFO [wallet_modeler.predict_using_endpoint:530] Endpoint predictions completed successfully.[0m


array([0.12944555, 0.26320183, 0.12195228, ..., 0.41422129, 0.37756491,
       0.0749698 ])

#### delete all endpoints

In [10]:
modeler.delete_all_endpoints()

[0m[16/Jul/25 22:27:19] INFO [wallet_modeler.list_active_endpoints:619] Active endpoints: ['xgboost-dda-897-20250716-220942'][0m
[0m[16/Jul/25 22:27:19] INFO [wallet_modeler.delete_endpoint:645] Deleting endpoint: xgboost-dda-897-20250716-220942[0m
[0m[16/Jul/25 22:27:19] INFO [wallet_modeler.delete_endpoint:647] Successfully deleted endpoint: xgboost-dda-897-20250716-220942[0m


In [14]:
modeler.list_active_endpoints()
modeler.list_all_endpoints()

[0m[16/Jul/25 22:27:40] INFO [wallet_modeler.list_active_endpoints:619] Active endpoints: [][0m


[]