In [None]:
# pyright: reportMissingImports=false
# pyright: reportMissingModuleSource=false

import uuid
import random
import hashlib
import os
import sys
import time
import logging
import datetime
import json
from datetime import datetime, timedelta
import yaml
import pytest
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from sklearn.model_selection import ParameterGrid, ParameterSampler
from scipy.signal import argrelextrema
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
import seaborn as sns
import progressbar

# load dotenv
load_dotenv()

# Custom format function for displaying numbers/
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

# Dark mode charts
plt.rcParams['figure.facecolor'] = '#181818'  # Custom background color (dark gray in this case)
plt.rcParams['axes.facecolor'] = '#181818'
plt.rcParams['text.color'] = '#afc6ba'
plt.rcParams['axes.labelcolor'] = '#afc6ba'
plt.rcParams['xtick.color'] = '#afc6ba'
plt.rcParams['ytick.color'] = '#afc6ba'
plt.rcParams['axes.titlecolor'] = '#afc6ba'

# import local modules
# pyright: reportMissingImports=false
sys.path.append('..//src')
import training_data.data_retrieval as dr
import training_data.profits_row_imputation as pri
import feature_engineering as fe
import coin_wallet_metrics.coin_wallet_metrics as cwm
import coin_wallet_metrics.indicators as ind
import modeling as m
import insights.analysis as ia
import insights.model_input_flows as mif
import utils as u


# reload all modules
modules = [dr, pri, fe, cwm, ind, m, ia, mif, u]
[importlib.reload(module) for module in modules]

# load all configs
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)


## Pre-Modeling sequence

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs


# 1. Data Retrieval, Cleaning, Indicator Calculation
# --------------------------------------------------
# Macro trends: retrieve, clean, and add indicators
macro_trends_df = dr.retrieve_macro_trends_data()
macro_trends_df = dr.clean_macro_trends(macro_trends_df, config)
macro_trends_df = ind.generate_time_series_indicators(macro_trends_df.reset_index(),
                                                      metrics_config['macro_trends'],
                                                      None)

# Profits: retrieve and clean precalculated profits data from BigQuery
profits_df = dr.retrieve_profits_data(config['training_data']['earliest_window_start'],
                                      config['training_data']['modeling_period_end'],
                                      config['data_cleaning']['minimum_wallet_inflows'])
profits_df, _ = dr.clean_profits_df(profits_df, config['data_cleaning'])

# Market data: retrieve, clean, and add indicators
market_data_df = dr.retrieve_market_data()
market_data_df = dr.clean_market_data(market_data_df, config)
market_data_df = ind.generate_time_series_indicators(market_data_df,
                                                     metrics_config['time_series']['market_data'],
                                                     'coin_id')



# 2. Filtering based on dataset overlap
# -------------------------------------
# Filter market_data to only coins with transfers data if configured to
if config['data_cleaning']['exclude_coins_without_transfers']:
    market_data_df = market_data_df[market_data_df['coin_id'].isin(profits_df['coin_id'])]
# Create prices_df: lightweight reference for other functions
prices_df = market_data_df[['coin_id','date','price']].copy()

# Filter profits_df to remove records for any coins that were removed in data cleaning
profits_df = profits_df[profits_df['coin_id'].isin(market_data_df['coin_id'])]





### Refactored Model Sequence

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs

# Generate time_windows config overrides that will modify each window's config settings
time_windows = mif.generate_time_windows(config)
n = 0
time_window = time_windows[n]

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs


# Prepare time window config files
config, metrics_config, modeling_config = mif.prepare_configs(modeling_config['modeling']['config_folder'], time_window)


# Market data
window_market_data_df = cwm.split_dataframe_by_coverage(market_data_df,
                                                        config['training_data']['training_period_start'],
                                                        config['training_data']['modeling_period_end'],
                                                        id_column='coin_id',
                                                        drop_outside_date_range=True)

# Macro trends
window_macro_trends_df = cwm.split_dataframe_by_coverage(macro_trends_df,
                                                        config['training_data']['training_period_start'],
                                                        config['training_data']['modeling_period_end'],
                                                        id_column=None,
                                                        drop_outside_date_range=True)



In [None]:
"""
profits_df
1. identify all dates needed
    all cohort lookback window starts
    training_period_start
    training_period_end
    modeling_period_start
    modeling_period_end
2. impute them
3. filter df to only dates between earliest and latest dates
4. wallet cohorts and buysell metrics
5. indicators
6. filter to window
"""


In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs

# Generate time_windows config overrides that will modify each window's config settings
time_windows = mif.generate_time_windows(config)
n = 0
time_window = time_windows[n]

# Prepare time window config files
config, metrics_config, modeling_config = mif.prepare_configs(modeling_config['modeling']['config_folder'], time_window)

# Identify all required imputation dates
imputation_dates = mif.identify_imputation_dates(config)

# Impute all required dates
window_profits_df = pri.impute_profits_for_multiple_dates(profits_df, prices_df, imputation_dates, n_threads=24)
window_profits_df = (window_profits_df[(window_profits_df['date'] >= pd.to_datetime(min(imputation_dates))) &
                                       (window_profits_df['date'] <= pd.to_datetime(max(imputation_dates)))])


In [None]:
profits_df.dtypes

In [None]:
u.df_mem(profits_df)

In [None]:
"""
IN WINDOW FUNCTIONS

market_data_df: just filter to window
macro_trends_df: just filter to window

profits_df
1. identify all dates needed
    all cohort lookback window starts
    training_period_start
    training_period_end
    modeling_period_start
    modeling_period_end
2. impute them
3. filter df to only dates between earliest and latest dates
4. wallet cohorts and buysell metrics
5. indicators
6. filter to window
"""

# def build_time_window_model_input(n, window, config, metrics_config, modeling_config):
#     """
#     Generates training data for each of the config.training_data.additional_windows.

#     Params:
#         n (int): The lookback number of the time window (e.g 0,1,2)
#         window (Dict): The config override dict with the window's modeling_period_start
#         config: config.yaml
#         metrics_config: metrics_config.yaml
#         modeling_config: modeling_config.yaml

#     Returns:
#         model_data (Dict): Dictionary containing all of the modeling features and variables:
#             X_train, X_test (DataFrame): Model training features
#             y_train, y_test (pd.Series): Model target variables
#             returns_test (DataFrame): The actual returns of each coin_id in each time_window.
#                 - coin_id: Index (str)
#                 - time_window: Index (int)
#                 - returns: value column (float)
#     """

# Prepare the full configuration by applying overrides from the current trial config
config, metrics_config, modeling_config = prepare_configs(modeling_config['modeling']['config_folder'], window)

# Define window start and end dates
start_date = config['training_data']['training_period_start']
end_date = config['training_data']['modeling_period_end']

# Rebuild market data
market_data_df = dr.retrieve_market_data()
market_data_df, _ = cwm.split_dataframe_by_coverage(market_data_df, start_date, end_date, id_column='coin_id')
prices_df = market_data_df[['coin_id','date','price']].copy()

# Retrieve macro trends data
macro_trends_df = dr.retrieve_macro_trends_data()
macro_trends_df = cwm.generate_macro_trends_features(macro_trends_df, config)

# Rebuild profits_df
if 'profits_df' not in locals():
    profits_df = None
profits_df = rebuild_profits_df_if_necessary(config, prices_df, profits_df)

# Build the configured model input data for the nth window
X_train, X_test, y_train, y_test, returns_test = build_configured_model_input(
                                    profits_df,
                                    market_data_df,
                                    macro_trends_df,
                                    config,
                                    metrics_config,
                                    modeling_config)

# Add time window indices to dfs with coin_ids
X_train['time_window'] = n
X_train.set_index('time_window', append=True, inplace=True)
X_test['time_window'] = n
X_test.set_index('time_window', append=True, inplace=True)
returns_test['time_window'] = n
returns_test.set_index('time_window', append=True, inplace=True)

model_data = {
    'X_train': X_train,
    'X_test': X_test,
    'y_train': y_train,
    'y_test': y_test,
    'returns_test': returns_test
}

# return model_data


## Modeling Sequence

In [None]:


# Generate time_windows config overrides that will modify each window's config settings
time_windows = mif.generate_time_windows(config)

# Initialize empty lists to hold concatenated data
X_train_list, X_test_list = [], []
y_train_list, y_test_list = [], []
returns_test_list = []

for n, window in enumerate(time_windows):

    model_data = mif.build_time_window_model_input(n, window, config, metrics_config, modeling_config)

    # Append the current window's data to the lists
    X_train_list.append(model_data['X_train'])
    X_test_list.append(model_data['X_test'])
    y_train_list.append(model_data['y_train'])
    y_test_list.append(model_data['y_test'])
    returns_test_list.append(model_data['returns_test'])


# Concatenate all the data for each part
X_train = pd.concat(X_train_list, axis=0)
X_test = pd.concat(X_test_list, axis=0)
y_train = pd.concat(y_train_list, axis=0)
y_test = pd.concat(y_test_list, axis=0)
returns_test = pd.concat(returns_test_list, axis=0)

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs
logger.setLevel(logging.INFO)


# 3.4 Train the model using the current configuration and log the results
model, model_id = m.train_model(
                    X_train,
                    y_train,
                    modeling_config)

# 3.5 Evaluate and save the model performance on the test set to a CSV
metrics_dict, y_pred, y_pred_prob = m.evaluate_model(model, X_test, y_test, model_id, returns_test, modeling_config)

metrics_dict

In [None]:
feature_importances = model.feature_importances_
features = X_train.columns  # Feature names

# Create a DataFrame with feature names and importance
importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importances
})

# Sort by importance in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)
importance_df.head(20)

In [None]:
importance_df

In [None]:

for module in modules:
    importlib.reload(module)


# Select y_pred_prob from the classifier, or y_pred from a regressor
predictions = y_pred_prob or y_pred
returns = returns_test['returns']
winsorization_cutoff = modeling_config["evaluation"]["winsorization_cutoff"]


ia.generate_profitability_curves(predictions, returns, winsorization_cutoff)

## Time Window Sequencing

In [None]:
market_data_df.shape

### checking profits_df downcasts

In [None]:
start_date = config['training_data']['earliest_window_start']
end_date = config['training_data']['modeling_period_end']
minimum_wallet_inflows = config['data_cleaning']['minimum_wallet_inflows']

# SQL query to retrieve profits data
query_sql = f"""
    -- STEP 1: retrieve profits data and apply USD inflows filter
    -------------------------------------------------------------
    with profits_base as (
        select coin_id
        ,date
        ,wallet_address
        ,profits_cumulative
        ,usd_balance
        ,usd_net_transfers
        ,usd_inflows
        ,usd_inflows_cumulative
        from core.coin_wallet_profits
        where date <= '{end_date}'
    ),

    usd_inflows_filter as (
        select coin_id
        ,wallet_address
        ,max(usd_inflows_cumulative) as total_usd_inflows
        from profits_base
        -- we don't need to include coin-wallet pairs that have no transactions between
        -- the start and end dates
        group by 1,2
    ),

    profits_base_filtered as (
        select pb.*
        from profits_base pb
        join usd_inflows_filter f on f.coin_id = pb.coin_id
            and f.wallet_address = pb.wallet_address
        where f.total_usd_inflows >= {minimum_wallet_inflows}
    ),


    -- STEP 2: create new records for all coin-wallet pairs as of the training_period_start
    ---------------------------------------------------------------------------------------
    -- compute the starting profits and balances as of the training_period_start
    training_start_existing_rows as (
        -- identify coin-wallet pairs that already have a balance as of the period end
        select *
        from profits_base_filtered
        where date = '{start_date}'
    ),
    training_start_needs_rows as (
        -- for coin-wallet pairs that don't have existing records, identify the row closest to the period end date
        select t.*
        ,cmd_previous.price as price_previous
        ,cmd_training.price as price_current
        ,row_number() over (partition by t.coin_id,t.wallet_address order by t.date desc) as rn
        from profits_base_filtered t
        left join training_start_existing_rows e on e.coin_id = t.coin_id
            and e.wallet_address = t.wallet_address

        -- obtain the last price used to compute the balance and profits data
        join core.coin_market_data cmd_previous on cmd_previous.coin_id = t.coin_id and cmd_previous.date = t.date

        -- obtain the training_period_start price so we can update the calculations
        join core.coin_market_data cmd_training on cmd_training.coin_id = t.coin_id and cmd_training.date = '{start_date}'
        where t.date < '{start_date}'
        and e.coin_id is null
    ),
    training_start_new_rows as (
        -- create a new row for the period end date by carrying the balance from the closest existing record
        select t.coin_id
        ,cast('{start_date}' as datetime) as date
        ,t.wallet_address
        -- profits_cumulative is the previous profits_cumulative + the change in profits up to the start_date
        ,((t.price_current / t.price_previous) - 1) * t.usd_balance + t.profits_cumulative as profits_cumulative
        -- usd_balance is previous balance * (1 + % change in price)
        ,(t.price_current / t.price_previous) * t.usd_balance as usd_balance
        -- there were no transfers
        ,0 as usd_net_transfers
        -- there were no inflows
        ,0 as usd_inflows
        -- no change since there were no inflows
        ,usd_inflows_cumulative as usd_inflows_cumulative

        from training_start_needs_rows t
        where rn=1

    ),

    -- STEP 3: merge all records together
    -------------------------------------
    profits_merged as (
        select * from profits_base_filtered
        -- transfers prior to the training period are summarized in training_start_new_rows
        where date >= '{start_date}'

        union all

        select * from training_start_new_rows
    )

    select coin_id
    ,date

    -- replace the memory-intensive address strings with integers
    ,DENSE_RANK() OVER (ORDER BY wallet_address) as wallet_address

    ,profits_cumulative
    ,usd_balance
    ,usd_net_transfers
    ,usd_inflows
    -- set a floor of $0.01 to avoid divide by 0 errors caused by rounding
    ,greatest(0.01,usd_inflows_cumulative) as usd_inflows_cumulative
    from profits_merged
"""

# Run the SQL query using dgc's run_sql method
profits_df = dgc().run_sql(query_sql)

logger.info('Converting columns to memory-optimized formats...')

# Convert coin_id to categorical and date to date
profits_df['coin_id'] = profits_df['coin_id'].astype('category')
profits_df['date'] = pd.to_datetime(profits_df['date'])

# Add total_return column
profits_df['total_return'] = (profits_df['profits_cumulative']
                                / profits_df['usd_inflows_cumulative'])



In [None]:
profits_df = safe_downcast(profits_df, 'wallet_address', 'int32')

In [None]:
market_data_df.head()

In [None]:
np.can_cast(market_data_df['market_cap'].dtype, 'int32', casting='safe')


In [None]:
market_data_df = safe_downcast(market_data_df, 'volume', 'int32')

### Market Data resequencing

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs


# Generate time_windows config overrides that will modify each window's config settings
time_windows = mif.generate_time_windows(config)
n = 0
window = time_windows[n]

# Prepare the full configuration by applying overrides from the current trial config
config, metrics_config, modeling_config = mif.prepare_configs(modeling_config['modeling']['config_folder'], window)

# Generate time_windows config overrides that will modify each window's config settings
time_windows = mif.generate_time_windows(config)
n = 0
window = time_windows[n]


# market_data_df, _ = cwm.split_dataframe_by_coverage(market_data_df, start_date, end_date, id_column='coin_id')
# prices_df = market_data_df[['coin_id','date','price']].copy()
market_data_df_full = market_data_df.copy()
market_data_df.shape

In [None]:
market_data_df_full.head()

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs


market_data_df = market_data_df_full.copy()
print(market_data_df.columns)
market_data_df = ind.generate_time_series_indicators('market_data', market_data_df, metrics_config)
print(market_data_df.columns)


In [None]:
isinstance(time_series_df.index, pd.RangeIndex)

In [None]:
market_data_df.tail()

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs




market_data_df = market_data_df_full.copy()
value_column = 'price'
value_column_indicators_config = metrics_config['time_series']['market_data'][value_column]['indicators']
id_column = 'coin_id'
market_data_df = ind.generate_column_time_series_indicators(
    market_data_df,
    value_column,
    value_column_indicators_config,
    id_column
)

market_data_df.columns

In [None]:
id_column = None
if not id_column:
    print('x')

In [None]:
time_series_df = time_series_df.reset_index()

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs


# time_series_df = market_data_df[['date','coin_id','price']].copy()
time_series_df = market_data_df_full.copy()
config = config
value_column_indicators_config = metrics_config['time_series']['market_data']['price']['indicators']
value_column = 'price'
id_column='coin_id'

time_series_df = time_series_df.set_index(['coin_id','date'])

# Data Quality Checks and Formatting
if value_column not in time_series_df.columns:
    raise KeyError(f"Input DataFrame does not include column '{value_column}'.")

if time_series_df[value_column].isnull().any():
    raise ValueError(f"The '{value_column}' column contains null values, which are not allowed.")

# Indicator Calculations
# ----------------------
# If there is an id_column, group on it
if id_column:
    groupby_column = id_column
# If there isn't, create a dummy_column for grouping and remove it later
else:
    time_series_df['dummy_group'] = 1
    groupby_column = 'dummy_group'

# For each indicator, loop through all options and add the appropriate column
for indicator, indicator_config in value_column_indicators_config.items():
    if indicator == 'sma':
        windows = indicator_config['parameters']['window']
        for w in windows:
            ind_series = time_series_df.groupby(level=groupby_column, observed=True)[value_column].transform(
                lambda x: ind.calculate_sma(x, w))
            time_series_df[f"{value_column}_{indicator}_{w}"] = ind_series

    elif indicator == 'ema':
        windows = indicator_config['parameters']['window']
        for w in windows:
            ind_series = time_series_df.groupby(level=groupby_column, observed=True)[value_column].transform(
                lambda x: ind.calculate_ema(x, w))
            time_series_df[f"{value_column}_{indicator}_{w}"] = ind_series

    # elif indicator == 'rsi':
    #     windows = indicator_config['parameters']['window']
    #     for w in windows:
    #         ind_series = time_series_df.groupby(level=groupby_column, observed=True)['price'].transform(
    #             lambda x: calculate_rsi(x, w))
    #         time_series_df[f"{value_column}_{indicator}_{w}"] = ind_series

    # elif indicator == 'bollinger_bands_upper':
    #     windows = indicator_config['parameters']['window']
    #     num_std = indicator_config['parameters'].get('num_std', None)
    #     for w in windows:
    #         ind_series = time_series_df.groupby(level=groupby_column, observed=True)['price'].transform(
    #             lambda x: calculate_bollinger_bands(x, 'upper', w, num_std))
    #         time_series_df[f"{value_column}_{indicator}_{w}"] = ind_series

    # elif indicator == 'bollinger_bands_lower':
    #     windows = indicator_config['parameters']['window']
    #     num_std = indicator_config['parameters'].get('num_std', None)
    #     for w in windows:
    #         ind_series = time_series_df.groupby(level=groupby_column, observed=True)['price'].transform(
    #             lambda x: calculate_bollinger_bands(x, 'lower', w, num_std))
    #         time_series_df[f"{value_column}_{indicator}_{w}"] = ind_series

# Remove the dummy column if it was created
if groupby_column == 'dummy_group':
    time_series_df = time_series_df.drop('dummy_group', axis=1)

logger.info("Generated indicators for column '%s' :%s",
            value_column,
            list(value_column_indicators_config.keys()))


In [None]:
list(value_column_indicators_config.keys())

In [None]:
logger.info("%s",value_column_indicators_config.keys())

In [None]:
market_data_df.xs('9d6619f4-b44b-4ff4-9f68-1f563f57e060',level='coin_id').tail()

In [None]:
market_data_df.sample(15)

In [None]:
x = indicator_config['parameters'].get('num_std', None)
x

In [None]:
market_data_df.groupby(level='coin_id', observed=True)['price'].transform(

### indicators implementation

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs


df = market_data_df.copy()
df = market_data_df.set_index(['coin_id','date'])


# Add Relative Strength Index (RSI)
df['rsi'] = df.groupby(level='coin_id', observed=True)['price'].transform(
    lambda x: ind.calculate_rsi(x, 14))
# Add Money Flow Index (MFI)
df = ind.add_mfi_column(df)

# Calculate MACD with EMAs
df['ema_12'] = df.groupby(level='coin_id', observed=True)['price'].transform(lambda x: ind.calculate_ema(x, 12))
df['ema_26'] = df.groupby(level='coin_id', observed=True)['price'].transform(lambda x: ind.calculate_ema(x, 26))
df = ind.add_crossover_column(df, 'ema_12', 'ema_26', drop_col1=True, drop_col2=True)

# Add Bollinger Bands
df = ind.add_bollinger_bands(df, include_middle=False)
# Add crossover for price and upper band
df = ind.add_crossover_column(df, 'price', 'bollinger_band_upper', drop_col1=False, drop_col2=True)
# Add crossover for price and lower band
df = ind.add_crossover_column(df, 'price', 'bollinger_band_lower', drop_col1=False, drop_col2=True)

# Calculate OBV
df['obv_price_volume'] = ind.generalized_obv(df['price'],df['volume'])


df.head()

## Junkyard

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs

# Define a function to calculate MFI within each group, similar to the crossovers function
def apply_mfi(group):
    # Reset index to avoid issues with the multi-index during group operations
    group = group.reset_index()
    group['mfi'] = ind.calculate_mfi(group['price'], group['volume'])

    # Set index back to the original multi-index
    return group.set_index(['coin_id', 'date'])

# Apply the function within each 'coin_id' group
df = df.groupby('coin_id', observed=True, group_keys=False).apply(apply_mfi)

# Display the updated DataFrame with the MFI column
df.head()

In [None]:
df.head(20)

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs

df2 = df[['ema_12','ema_26']].copy()

df2 = ind.add_crossover_column(df2, 'ema_12', 'ema_26', drop_col1=True, drop_col2=True)
df2.head()

In [None]:
def identify_crossovers(series1, series2):
    """
    Identify crossovers between two time series.

    This function calculates the points where series1 crosses over series2.
    It handles NaN values by converting them to 0.

    Parameters:
    series1 (array-like): The first time series
    series2 (array-like): The second time series

    Returns:
    numpy.ndarray: An array of the same length as the input series, where:
        0 indicates no crossover
        1 indicates an upward crossover (series1 crosses above series2)
        -1 indicates a downward crossover (series1 crosses below series2)
    """
    diff = series1 - series2

    # Handle NaN values
    diff = np.nan_to_num(diff, nan=0.0)

    # Initialize crossovers array
    crossovers = np.zeros(len(series1))

    # Identify crossovers
    signs = np.sign(diff)
    sign_changes = signs[1:] != signs[:-1]
    crossover_indices = np.where(sign_changes)[0] + 1

    # Assign 1 for upward crossovers, -1 for downward crossovers
    crossovers[crossover_indices] = np.where(signs[crossover_indices] > 0, 1, -1)


In [None]:
df[['ema_12','ema_26']]

In [None]:

# Assuming `df` is your DataFrame with multi-index (coin_id, date) and ema_12, ema_26 columns

# Define a function that applies identify_crossovers to a group
def apply_crossovers(group):
    group['crossovers'] = identify_crossovers(group['ema_12'], group['ema_26'])
    return group

# Apply the function within each 'coin_id' group
df = df.groupby('coin_id', group_keys=False).apply(apply_crossovers)

# Display the resulting DataFrame with the new 'crossovers' column
df

In [None]:
import pandas as pd



# Display the resulting DataFrame with the new 'crossovers' column
df

## Tests failing

In [None]:
[importlib.reload(module) for module in modules]  # Reload all modules
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')  # Reload all configs



def sample_data():
    """
    Fixture to create a sample DataFrame for testing.

    Returns:
    - pd.DataFrame: A DataFrame with sample data for testing indicators.
    """
    return pd.DataFrame({
        'coin_id': ['BTC', 'BTC', 'BTC', 'ETH', 'ETH', 'ETH'],
        'date': pd.date_range(start='2023-01-01', periods=6),
        'price': [100, 110, 105, 200, 220, 210]
    })
sample_data=sample_data()

def sample_config():
    """
    Fixture to create a sample configuration for testing.

    Returns:
    - dict: A configuration dictionary for testing all supported indicators.
    """
    return {
        'time_series': {
            'market_data': {
                'price': {
                    'indicators': {
                        'sma': {'parameters': {'window': [2]}},
                        'ema': {'parameters': {'window': [2]}},
                        'rsi': {'parameters': {'window': [2]}},
                        'bollinger_bands_upper': {'parameters': {'window': [2], 'num_std': 2}},
                        'bollinger_bands_lower': {'parameters': {'window': [2], 'num_std': 2}}
                    }
                }
            }
        }
    }
sample_config=sample_config()
# @pytest.mark.unit
# def test_all_supported_indicators(sample_data, sample_config):
"""
Test that all supported indicators are correctly calculated and added to the DataFrame.

This test checks the calculation of SMA, EMA, RSI, and Bollinger Bands for the given sample data.
"""
result = ind.generate_time_series_indicators('market_data', sample_data, sample_config)

# Calculate expected values
# SMA (2-day)
# For BTC: [None, 105, 107.5]
# For ETH: [None, 210, 215]

# EMA (2-day)
# For BTC: [None, 106.67, 105.56]
# For ETH: [None, 213.33, 211.11]
# EMA = (Current * (2 / (1 + 2))) + (Previous EMA * (1 - (2 / (1 + 2))))

# RSI (2-day)
# For BTC: [None, 100, 33.33]
# For ETH: [None, 100, 33.33]
# RSI = 100 - (100 / (1 + (Average Gain / Average Loss)))

# Bollinger Bands (2-day, 2 std dev)
# Upper Band = SMA + (2 * std dev)
# Lower Band = SMA - (2 * std dev)
# For BTC: [None, 115, 112.5], [None, 95, 102.5]
# For ETH: [None, 230, 225], [None, 190, 205]

expected_columns = [
    'coin_id', 'date', 'price',
    'price_sma_2', 'price_ema_2', 'price_rsi_2',
    'price_bollinger_bands_upper_2', 'price_bollinger_bands_lower_2'
]

assert list(result.columns) == expected_columns

# Check SMA values
expected_sma = [np.nan, 105, 107.5, np.nan, 210, 215]
assert all(np.isclose(a, b, equal_nan=True) for a, b in zip(result['price_sma_2'], expected_sma))

# Check EMA values
expected_ema = [np.nan, 106.67, 105.56, np.nan, 213.33, 211.11]
assert all(np.isclose(a, b, equal_nan=True, rtol=1e-2) for a, b in zip(result['price_ema_2'], expected_ema))

# Check RSI values
expected_rsi = [np.nan, 1.0, 0.6667, np.nan, 1.0, 0.6667]
assert all(np.isclose(a, b, equal_nan=True, rtol=1e-2) for a, b in zip(result['price_rsi_2'], expected_rsi))

# Check Bollinger Bands values
expected_bb_upper = [np.nan, 115, 112.5, np.nan, 230, 225]
expected_bb_lower = [np.nan, 95, 102.5, np.nan, 190, 205]
assert all(np.isclose(a, b, equal_nan=True) for a, b in zip(result['price_bollinger_bands_upper_2'], expected_bb_upper))
assert all(np.isclose(a, b, equal_nan=True) for a, b in zip(result['price_bollinger_bands_lower_2'], expected_bb_lower))


In [None]:
result['price_bollinger_bands_upper_2']

In [None]:
import pandas as pd
import numpy as np

sample_data = pd.DataFrame({
    'price': [100, 110, 105, 200, 220, 210]
})

upper_band = ind.calculate_bollinger_bands(sample_data['price'], 'upper', window=2, num_std=2)
print(upper_band)

In [None]:
expected_rsi

In [None]:
result['price_rsi_2']

In [None]:
expected_rsi

In [None]:
pd.DataFrame(result['price_rsi_2'], expected_rsi)

In [None]:
expected_columns

In [None]:
result_df

In [None]:
list(result_df.loc[result_df['coin_id'] == 'coin1', 'price_sma_2'].values) == list(expected_sma_2_coin1)

In [None]:
list(expected_sma_2_coin1)

In [None]:
result_mfi

In [None]:
expected_rsi.values