In [None]:
# pyright: reportMissingImports=false
# pyright: reportMissingModuleSource=false

import uuid
import random
import hashlib
import os
import sys
import time
import logging
import datetime
import json
from datetime import datetime, timedelta
import yaml
import pytest
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from sklearn.model_selection import ParameterGrid, ParameterSampler
from scipy.signal import argrelextrema
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
import seaborn as sns
import progressbar


# load dotenv
load_dotenv()


# import local files if necessary
# pyright: reportMissingImports=false
sys.path.append('..//src')
import training_data as td
importlib.reload(td)
import feature_engineering as fe
importlib.reload(fe)
import coin_wallet_metrics as cwm
importlib.reload(cwm)
import modeling as m
importlib.reload(m)
import insights as i
importlib.reload(i)
import utils as u
importlib.reload(u)


# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.DEBUG)

# Custom format function for displaying numbers/
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')


# Load all configs as global variables
global CONFIG, METRICS_CONFIG, MODELING_CONFIG, EXPERIMENTS_CONFIG, MODELING_FOLDER
config = u.load_config('../config/config.yaml')
metrics_config = u.load_config('../config/metrics_config.yaml')
modeling_config = u.load_config('../config/modeling_config.yaml')
experiments_config = u.load_config('../config/experiments_config.yaml')
CONFIG = config
METRICS_CONFIG = metrics_config
MODELING_CONFIG = modeling_config
EXPERIMENTS_CONFIG = experiments_config
MODELING_FOLDER = MODELING_CONFIG['modeling']['modeling_folder']
modeling_folder = MODELING_FOLDER

## Overall Sequencing

In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
importlib.reload(u)
config = u.load_config('../config/config.yaml')
metrics_config = u.load_config('../config/metrics_config.yaml')
modeling_config = u.load_config('../config/modeling_config.yaml')
experiments_config = u.load_config('../config/experiments_config.yaml')
logger.setLevel(logging.INFO)


start_date = config['training_data']['training_period_start']
end_date = config['training_data']['modeling_period_end']

# Retrieve market data
market_data_df = td.retrieve_market_data()
market_data_df, _ = cwm.split_dataframe_by_coverage(market_data_df, start_date, end_date, id_column='coin_id')
prices_df = market_data_df[['coin_id','date','price']].copy()

# retrieve profits data if necessary
if 'profits_df' not in globals():
    profits_df = None
profits_df = i.rebuild_profits_df_if_necessary(
                config,
                modeling_folder,
                prices_df,
                profits_df)

# filter market_data rows without transfers if configured to do so
if config['data_cleaning']['exclude_coins_without_transfers']:
    market_data_df = market_data_df[market_data_df['coin_id'].isin(profits_df['coin_id'])]
    prices_df = market_data_df[['coin_id','date','price']].copy()


In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
importlib.reload(u)
config = u.load_config('../config/config.yaml')
metrics_config = u.load_config('../config/metrics_config.yaml')
modeling_config = u.load_config('../config/modeling_config.yaml')
experiments_config = u.load_config('../config/experiments_config.yaml')
logger.setLevel(logging.INFO)


# Initialize empty lists to hold concatenated data
X_train_all, X_test_all = [], []
y_train_all, y_test_all = [], []
returns_test_all = []

time_windows = i.generate_experiment_configurations(modeling_config['modeling']['config_folder'])

for n, window in enumerate(time_windows):

    # Prepare the full configuration by applying overrides from the current trial config
    config, metrics_config, modeling_config = i.prepare_configs(modeling_config['modeling']['config_folder'], window)

    # Define window start and end dates
    start_date = config['training_data']['training_period_start']
    end_date = config['training_data']['modeling_period_end']

    # Rebuild market data
    market_data_df = td.retrieve_market_data()
    market_data_df, _ = cwm.split_dataframe_by_coverage(market_data_df, start_date, end_date, id_column='coin_id')
    prices_df = market_data_df[['coin_id','date','price']].copy()

    # Rebuild profits_df
    profits_df = i.rebuild_profits_df_if_necessary(config, modeling_folder, prices_df, profits_df)

    # Build the configured model input data for the nth window
    X_train_n, X_test_n, y_train_n, y_test_n, returns_test_n = i.build_configured_model_input(
                                        profits_df,
                                        market_data_df,
                                        config,
                                        metrics_config,
                                        modeling_config)

    # Append the current window's data to the lists
    X_train_all.append(X_train_n)
    X_test_all.append(X_test_n)
    y_train_all.append(y_train_n)
    y_test_all.append(y_test_n)
    returns_test_all.append(returns_test_n)

# Concatenate all the data for each part
X_train = pd.concat(X_train_all, axis=0)
X_test = pd.concat(X_test_all, axis=0)
y_train = pd.concat(y_train_all, axis=0)
y_test = pd.concat(y_test_all, axis=0)
returns_test = pd.concat(returns_test_all, axis=0)

In [None]:
# 3.4 Train the model using the current configuration and log the results
model, model_id = m.train_model(
                    X_train,
                    y_train,
                    modeling_folder,
                    modeling_config)



In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
importlib.reload(u)
config = u.load_config('../config/config.yaml')
metrics_config = u.load_config('../config/metrics_config.yaml')
modeling_config = u.load_config('../config/modeling_config.yaml')
experiments_config = u.load_config('../config/experiments_config.yaml')
logger.setLevel(logging.INFO)


# 3.5 Evaluate and save the model performance on the test set to a CSV
metrics_dict, y_pred, y_pred_prob = m.evaluate_model(model, X_test, y_test, model_id, returns_test, modeling_config)


metrics_dict

In [None]:
len(y_pred_prob)

In [None]:
running_profitability_scores = m.calculate_running_profitability_score(
                                                        y_pred,
                                                        returns_test,
                                                        modeling_config["evaluation"]["winsorization_cutoff"]
                                                        )


running_profitability_scores.plot(kind='line')
plt.show()

## Junkyard

## tests failing