In [None]:
# pyright: reportMissingImports=false
# pyright: reportMissingModuleSource=false

import uuid
import random
import hashlib
import os
import sys
import time
import logging
import re
import pdb
from pathlib import Path
import datetime
from datetime import datetime,timedelta
import json
import warnings
import yaml
from typing import Dict,Union,List,Any,Tuple
import pytest
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from sklearn.model_selection import ParameterGrid, ParameterSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from scipy.signal import argrelextrema
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
import seaborn as sns
import progressbar

# load_dotenv(Path("../../../Local/.env"))

# Custom format function for displaying |numbers/
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

# Suppress warnings
warnings.filterwarnings("ignore", message="MallocStackLogging")

# silence pygame donation request
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
os.environ['ALERT_SOUND_FILEPATH']="../../../Local/assets/sounds/mixkit-alert-bells-echo-765.wav"

# Dark mode charts
plt.rcParams['figure.facecolor'] = '#181818'  # Custom background color (dark gray in this case)
plt.rcParams['axes.facecolor'] = '#181818'
plt.rcParams['text.color'] = '#afc6ba'
plt.rcParams['axes.labelcolor'] = '#afc6ba'
plt.rcParams['xtick.color'] = '#afc6ba'
plt.rcParams['ytick.color'] = '#afc6ba'
plt.rcParams['axes.titlecolor'] = '#afc6ba'

# import local modules
# pyright: reportMissingImports=false
sys.path.append('..//src')
import utils as u
import training_data.data_retrieval as dr
import training_data.profits_row_imputation as pri
import coin_wallet_metrics.coin_wallet_metrics as cwm
import coin_wallet_metrics.indicators as ind
import feature_engineering.feature_generation as fg
import feature_engineering.time_windows_orchestration as tw
import feature_engineering.flattening as flt
import feature_engineering.data_splitting as ds
import feature_engineering.target_variables as tv
import feature_engineering.preprocessing as prp
import modeling as m
import insights.analysis as ia
import insights.experiments as exp

# Wallet modeling
import wallet_modeling.wallet_orchestrator as wo
import wallet_modeling.wallet_training_data as wtd
import wallet_modeling.model_reporting as wmr
import wallet_modeling.wallet_model_experiment as wme
from wallet_modeling.wallets_config_manager import WalletsConfig

# Wallet features
import wallet_features.clustering_features as wcl
import wallet_features.market_cap_features as wmc
import wallet_features.market_timing_features as wmt
import wallet_features.performance_features as wp
import wallet_features.trading_features as wtf
import wallet_features.transfers_features as wts
import wallet_features.wallet_features as wf

# Wallet insights
import wallet_insights.wallet_model_evaluation as wime
import wallet_insights.validation_analysis as wiv
import wallet_insights.coin_forecasting as wicf


# reload all modules
modules = [u, dr, pri, cwm, ind, fg, tw, flt, ds, tv, prp, m, ia, exp,
           wo, wtd, wmr, wme,
           wcl, wmc, wmt, wp, wtf, wts, wf,
           wime, wiv, wicf]
[importlib.reload(module) for module in modules]

# load all configs
config, metrics_config, modeling_config, experiments_config = u.load_all_configs('../config')
wallets_config = WalletsConfig.load_from_yaml('../config/wallets_config.yaml')
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

logger.info("Good morning, let's get to work")

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))

u.export_code(code_directories=['wallet_features'])

## Full Training Data Sequence

### retrieve datasets

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))


# Retrieve datasets
profits_df,market_data_df = wo.retrieve_datasets()

# # Define wallet cohort after cleaning
# training_wallet_metrics_df,wallet_cohort = wo.define_wallet_cohort(profits_df,market_data_df)

# # Generate profits_df for all training windows and the modeling period
# training_profits_df, training_windows_profits_dfs, modeling_profits_df, validation_profits_df = wo.split_profits_df(profits_df,
#                                                                                market_data_df,wallet_cohort)


In [None]:
# Impute the training period end (training period start is pre-imputed into profits_df generation)
training_period_end = [wallets_config['training_data']['training_period_end']]
imputed_profits_df = pri.impute_profits_for_multiple_dates(profits_df, market_data_df,
                                                        training_period_end, n_threads=24)

# Create a training period only profits_df
training_profits_df = imputed_profits_df[
    imputed_profits_df['date']<=wallets_config['training_data']['training_period_end']
    ].copy()


training_profits_df.describe()

In [None]:
mid_training_profits_df = training_profits_df[training_profits_df['date']<wallets_config['training_data']['training_period_end']].copy()
end_training_profits_df = training_profits_df[training_profits_df['date']==wallets_config['training_data']['training_period_end']].copy()

# identify the last dates in the middle of the training period for each wallet address
last_mid_dates = mid_training_profits_df.groupby(['wallet_address','coin_id'])['date'].max()
last_mid_dates_df = last_mid_dates.reset_index()
last_mid_dates_df.columns = ['wallet_address', 'coin_id', 'date']
last_mid_dates_df
# # Merge to keep only the matching rows
# last_mid_dates_df = mid_training_profits_df.merge(last_mid_dates_df, on=['wallet_address', 'coin_id', 'date'])
# last_mid_dates_df.head()

In [None]:
# identify wallets that had a balance as of their last transaction in the middle of the period
had_mid_balance_df = last_mid_dates_df[last_mid_dates_df['usd_balance']>0]
had_mid_balance_df.shape

In [None]:
end_training_profits_df[end_training_profits_df['is_imputed']==True].shape

In [None]:
c = '0019555b-bb44-405e-a035-e868fe113ca8'
w = 214076
test_profits_df = u.cw_filter_df(training_profits_df,c,w).copy()
test_profits_df

u.cw_filter_df(end_training_profits_df,c,w).copy()

In [None]:
training_period_end = [wallets_config['training_data']['training_period_end']]
imputed_profits_df = pri.impute_profits_for_multiple_dates(test_profits_df, market_data_df,
                                                        training_period_end, n_threads=24)
imputed_profits_df

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))


u.export_code(['training_data'])

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()
wallets_metrics_config = u.load_config('../config/wallets_metrics_config.yaml')
wallets_features_config = yaml.safe_load(Path('../config/wallets_features_config.yaml').read_text(encoding='utf-8'))


# Market data: add indicators
market_indicators_data_df = ind.generate_time_series_indicators(market_data_df,
                                                        wallets_metrics_config['time_series']['market_data'],
                                                        'coin_id')


# Transfers data retrieval for the wallet_ids in temp.wallet_modeling_cohort
transfers_sequencing_df = wts.retrieve_transfers_sequencing()

## Codespace

### generate features

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Generate features for the full training dataset
training_wallet_features_df = wf.calculate_wallet_features(training_profits_df, market_indicators_data_df,
                                                           transfers_sequencing_df, wallet_cohort)

# Define the full feature set by appending a suffix for each window
training_data_df = training_wallet_features_df.add_suffix("_all_windows")

# Generate features for each window
for i, window_profits_df in enumerate(training_windows_profits_dfs, 1):
    # Generate the features
    window_wallet_features_df = wf.calculate_wallet_features(window_profits_df, market_indicators_data_df,
                                                             transfers_sequencing_df, wallet_cohort)

    # Add column suffix and join to training_data_df
    window_wallet_features_df = window_wallet_features_df.add_suffix(f'_w{i}')
    training_data_df = training_data_df.join(window_wallet_features_df, how='left')


base_training_data_df = training_data_df.copy()
base_training_data_df.describe()

In [None]:
u.export_code(code_directories=['wallet_features'])

In [None]:
training_profits_df.describe()

In [None]:
# Generate features for the full training dataset
training_wallet_features_df = wf.calculate_wallet_features(training_profits_df, market_indicators_data_df,
                                                           transfers_sequencing_df, wallet_cohort)

In [None]:
window_profits_df.describe()

In [None]:
window_wallet_features_df = wf.calculate_wallet_features(window_profits_df, market_indicators_data_df,
                                                            transfers_sequencing_df, wallet_cohort)


In [None]:
profits_df2 = window_profits_df.copy()

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


profits_df2 = window_profits_df.copy()

# Create a DataFrame with all wallets that should exist
wallet_features_df = pd.DataFrame(index=wallet_cohort)
wallet_features_df.index.name = 'wallet_address'

# Trading features (inner join, custom fill)
profits_df2 = wtf.add_cash_flow_transfers_logic(profits_df2)
trading_features = wtf.calculate_wallet_trading_features(profits_df2)
trading_features = wtf.fill_trading_features_data(trading_features, wallet_cohort)
wallet_features_df = wallet_features_df.join(trading_features, how='inner')

# Market timing features (fill zeros)
timing_features = wmt.calculate_market_timing_features(profits_df2, market_indicators_data_df)
wallet_features_df = wallet_features_df.join(timing_features, how='left')\
    .fillna({col: 0 for col in timing_features.columns})

# Market cap features (fill zeros)
market_features = wmc.calculate_market_cap_features(profits_df2, market_indicators_data_df)
wallet_features_df = wallet_features_df.join(market_features, how='left')\
    .fillna({col: 0 for col in market_features.columns})

# Transfers features (fill -1)
transfers_features = wts.calculate_transfers_sequencing_features(profits_df2, transfers_sequencing_df)
wallet_features_df = wallet_features_df.join(transfers_features, how='left')\
    .fillna({col: -1 for col in transfers_features.columns})

# Performance features (inner join, no fill)
performance_features = wp.calculate_performance_features(wallet_features_df)
wallet_features_df = wallet_features_df.join(
    performance_features.drop(['invested', 'net_gain'], axis=1),
    how='inner'
)

In [None]:
if trading_features['invested'].min() < 0:
    raise ValueError(f"Found {len(trading_features[trading_features['invested']<0])} wallets "
                     "with negative invested values.")

In [None]:
w = 33872418
profits_df2[profits_df2['wallet_address']==w]

In [None]:
trading_features

In [None]:
wallets_df = wallet_features_df.copy()

metrics_df = wallets_df[['invested','net_gain']].copy().round(6)
returns_winsorization = wallets_config['modeling']['returns_winsorization']
epsilon = 1e-10

# Calculate base return
metrics_df['return'] = np.where(abs(metrics_df['invested']) == 0,0,
                                metrics_df['net_gain'] / metrics_df['invested'])

# Apply winsorization
if returns_winsorization > 0:
    metrics_df['return'] = u.winsorize(metrics_df['return'],returns_winsorization)

# Risk-Adjusted Dollar Return
metrics_df['risk_adj_return'] = metrics_df['net_gain'] * \
    (1 + np.log10(metrics_df['invested'] + epsilon))

# # Normalize returns
# metrics_df['norm_return'] = (metrics_df['return'] - metrics_df['return'].min()) / \
#     (metrics_df['return'].max() - metrics_df['return'].min())

# # Normalize logged investments
# log_invested = np.log10(metrics_df['invested'] + epsilon)
# metrics_df['norm_invested'] = (log_invested - log_invested.min()) / \
#     (log_invested.max() - log_invested.min())

# # Performance score
# metrics_df['performance_score'] = (0.6 * metrics_df['norm_return'] +
#                                     0.4 * metrics_df['norm_invested'])

# # Log-weighted return
# metrics_df['log_weighted_return'] = metrics_df['return'] * \
#     np.log10(metrics_df['invested'] + epsilon)

# # Hybrid score (combining absolute and relative performance)
# max_gain = metrics_df['net_gain'].abs().max()
# metrics_df['norm_gain'] = metrics_df['net_gain'] / max_gain
# metrics_df['hybrid_score'] = (metrics_df['norm_gain'] +
#                             metrics_df['norm_return']) / 2

# # Size-adjusted rank
# # Create mask for zero values
# zero_mask = metrics_df['invested'] == 0

# # Create quartiles series initialized with 'q0' for zero values
# quartiles = pd.Series('q0', index=metrics_df.index)

# # Calculate quartiles for non-zero values
# non_zero_quartiles = pd.qcut(metrics_df['invested'][~zero_mask],
#                             q=4,
#                             labels=['q1', 'q2', 'q3', 'q4'])

# # Assign the quartiles to non-zero values
# quartiles[~zero_mask] = non_zero_quartiles

# # Calculate size-adjusted rank within each quartile
# metrics_df['size_adjusted_rank'] = metrics_df.groupby(quartiles)['return'].rank(pct=True)


# # Clean up intermediate columns
# cols_to_drop = ['norm_return', 'norm_invested', 'norm_gain']
# metrics_df = metrics_df.drop(columns=[c for c in cols_to_drop
#                                     if c in metrics_df.columns])



In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Append clustering features based on all numeric features in the base training data
cluster_features = wcl.create_basic_cluster_features(base_training_data_df)
training_data_df = base_training_data_df.join(cluster_features, how='inner')



### join target variable to training data

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Clean inactive wallets from modeling period data
modeling_wallets_df = wo.filter_modeling_period_wallets(modeling_profits_df)

# Generate target variables
target_vars_df = wp.calculate_performance_features(modeling_wallets_df)

# Merge training data and target variables?
modeling_df = training_data_df.join(target_vars_df[wallets_config['modeling']['target_variable']],
                                    how='inner')


## Wallet Modeling

### build model

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Create an experiment instance
experiment = wme.WalletModel(wallets_config)

# Run the experiment and get results
model_results = experiment.run_experiment(modeling_df)

# Extract the trained model
model = model_results['pipeline'].named_steps['regressor']

### assess model performance

In [None]:
### save model artifacts
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Generate and save all model artifacts
model_id, evaluator, wallet_scores_df, coin_validation_df = wmr.generate_and_save_model_artifacts(
    model_results=model_results,
    validation_profits_df=validation_profits_df,
    base_path='../wallet_modeling'
)
u.play_notification()

# Print results
evaluator.plot_evaluation()

### Validation period assessments

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

wallet_performance_df, bucketed_performance_df = wiv.calculate_validation_metrics(
    X_test=model_results['X_test'],
    y_pred=model_results['y_pred'],
    validation_profits_df=validation_profits_df,
)

bucketed_performance_df

## coin performance predictions

### create coin_validation_df with metrics and returns

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Consolidate wallet scores at the coin level
wallet_scores_df = pd.DataFrame({'score': model_results['y_pred']}, index=model_results['y_test'].index)
coin_wallet_metrics_df = wicf.calculate_coin_metrics_from_wallet_scores(validation_profits_df, wallet_scores_df)

# Calculate coin performance during the validation period
coin_performance_df = wicf.calculate_coin_performance(market_data_df,
                                                     wallets_config['training_data']['validation_period_start'],
                                                     wallets_config['training_data']['validation_period_end'])

# Join aggregated wallet metrics with actual coin performance
coin_validation_df = coin_wallet_metrics_df.join(coin_performance_df, how='inner')

### plotting coin feature performance vs market cap

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()


# Get the analysis results
segment_results, summary_df = wicf.analyze_market_cap_segments(
    coin_validation_df,
    top_n=10
)

# Or create the visualizations
wicf.plot_segment_heatmap(summary_df)
# wicf.plot_metric_consistency(summary_df)  # Optional secondary visualization


### coin performance of top n for each bucket

In [None]:

# Run analysis
top_n = wallets_config['coin_forecasting']['top_n']
max_market_cap = wallets_config['coin_forecasting']['max_market_cap']
min_market_cap = wallets_config['coin_forecasting']['min_market_cap']

metric_top_coin_performance_df = wicf.validate_coin_performance(coin_validation_df,top_n,
                                                                max_market_cap, min_market_cap)

metric_top_coin_performance_df

### compare performance of high vs low score coins

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

wicf.print_performance_analysis(coin_validation_df)

## Junkyard

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Convert the data into a pandas DataFrame
def analyze_coin_metrics(df):
    """
    Analyze relationships between coin metrics and returns
    """
    # Calculate correlations with coin_return
    metrics_of_interest = [
        'weighted_avg_score',
        'composite_score',
        'score_confidence',
        'top_wallet_balance_pct',
        'top_wallet_count_pct',
        'total_wallets',
        'avg_wallet_balance',
        'market_cap'
    ]

    # Calculate correlations
    correlations = {}
    for metric in metrics_of_interest:
        correlation = df[metric].corr(df['coin_return'])
        correlations[metric] = correlation

    # Sort correlations by absolute value
    correlations_sorted = {k: v for k, v in sorted(correlations.items(),
                                                 key=lambda x: abs(x[1]),
                                                 reverse=True)}

    # Calculate basic statistics for coins with positive vs negative returns
    positive_returns = df[df['coin_return'] > 0]
    negative_returns = df[df['coin_return'] <= 0]

    comparison_stats = {}
    for metric in metrics_of_interest:
        pos_mean = positive_returns[metric].mean()
        neg_mean = negative_returns[metric].mean()
        # Perform t-test
        t_stat, p_value = stats.ttest_ind(positive_returns[metric],
                                        negative_returns[metric])

        comparison_stats[metric] = {
            'positive_mean': pos_mean,
            'negative_mean': neg_mean,
            'difference': pos_mean - neg_mean,
            'p_value': p_value
        }

    # Identify potential success indicators
    success_indicators = {
        metric: stats for metric, stats in comparison_stats.items()
        if (abs(stats['difference']) > 0.1 * stats['negative_mean'] and
            stats['p_value'] < 0.05)
    }

    return {
        'correlations': correlations_sorted,
        'comparison_stats': comparison_stats,
        'success_indicators': success_indicators
    }

# Create summary statistics
def print_analysis_results(results):
    """
    Print formatted analysis results
    """
    print("\n=== Correlation Analysis ===")
    print("\nCorrelations with coin return (sorted by strength):")
    for metric, corr in results['correlations'].items():
        print(f"{metric:25} : {corr:0.4f}")

    print("\n=== Positive vs Negative Returns Analysis ===")
    print("\nMetrics comparison for positive vs negative returns:")
    for metric, stats in results['comparison_stats'].items():
        print(f"\n{metric}:")
        print(f"  Positive returns mean: {stats['positive_mean']:0.4f}")
        print(f"  Negative returns mean: {stats['negative_mean']:0.4f}")
        print(f"  Difference: {stats['difference']:0.4f}")
        print(f"  P-value: {stats['p_value']:0.4f}")

    print("\n=== Strong Success Indicators ===")
    print("\nMetrics showing significant difference between positive and negative returns:")
    for metric, stats in results['success_indicators'].items():
        print(f"\n{metric}:")
        print(f"  Mean difference: {stats['difference']:0.4f}")
        print(f"  P-value: {stats['p_value']:0.4f}")


# Run the analysis
def main():
    # Read the data
    df = pd.read_csv('coin_wallet_metrics.csv')

    # Run analysis
    results = analyze_coin_metrics(df)

    # Print results
    print_analysis_results(results)

    # Create visualizations
    create_visualizations(df)

if __name__ == "__main__":
    main()

In [None]:
# Winsorize the returns (apply caps to the top n % of values)
returns_winsorized = u.winsorize(returns, winsorization_cutoff)

# Merge datasets
df = pd.DataFrame({
    'predictions': predictions,
    'returns': returns_winsorized,
})

# Sort by actual returns to obtain optimal performance
df_sorted = df.sort_values('returns', ascending=False)
cumulative_best_returns = np.cumsum(df_sorted['returns'])
cumulative_best_avg_returns = df_sorted['returns'].expanding().mean()

# Sort by model score to obtain modeled performance
df_sorted = df.sort_values('predictions', ascending=False)
cumulative_model_returns = np.cumsum(df_sorted['returns'])
cumulative_model_avg_returns = df_sorted['returns'].expanding().mean()

# Calculate average return across all data
average_return = np.mean(returns_winsorized)

In [None]:
df

In [None]:
cumulative_model_returns

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# Run analysis
top_n = wallets_config['coin_forecasting']['top_n']
max_market_cap = wallets_config['coin_forecasting']['max_market_cap']
min_market_cap = wallets_config['coin_forecasting']['min_market_cap']

metric_top_coin_performance_df = wicf.validate_coin_performance(coin_validation_df,top_n,
                                                                max_market_cap, min_market_cap)

metric_top_coin_performance_df

In [None]:
[importlib.reload(module) for module in modules]
wallets_config.reload()

# List the coins that would have been picked at the start of the validation period
top_coins_df = coin_validation_df[
    (coin_validation_df['market_cap_filled']<=max_market_cap)
    & (coin_validation_df['market_cap_filled']>=min_market_cap)
].copy()

sort_column = wallets_config['coin_forecasting']['sort_method']

top_coins_df.sort_values(sort_column,ascending=False).head(top_n)

## Tests failing

In [None]:
"""
Test the clean_profits_df function to ensure wallets with excessive inflows
are correctly excluded and logged.
"""

# Hardcoded test data for profits_df
profits_df = pd.DataFrame({
    'coin_id': ['BTC', 'ETH', 'BTC', 'ETH', 'LTC', 'BTC', 'ETH'],
    'wallet_address': ['wallet1', 'wallet1', 'wallet2', 'wallet2', 'wallet2',
                        'wallet3', 'wallet3'],
    'date': pd.date_range(start='2023-01-01', periods=7),
    'usd_inflows_cumulative': [10000, 8000, 2000, 1500, 1500, 500, 250]
})

# Hardcoded data cleaning config
data_cleaning_config = {
    'max_wallet_inflows': 15000  # Threshold for total inflows
}

# Call the function
cleaned_df, exclusions_logs_df = dr.clean_profits_df(profits_df, data_cleaning_config)

# Expected cleaned DataFrame
expected_cleaned_df = (profits_df[profits_df['wallet_address'].isin(['wallet2', 'wallet3'])]
    .reset_index(drop=True)
    .sort_values(['coin_id','wallet_address','date']))

# Expected exclusions DataFrame
expected_exclusions = pd.DataFrame({
    'wallet_address': ['wallet1'],
    'inflows_exclusion': [True]
})

# Assertions
assert len(cleaned_df) == len(expected_cleaned_df)
assert np.array_equal(cleaned_df.values, expected_cleaned_df.values)

assert len(exclusions_logs_df) == len(expected_exclusions)
assert np.array_equal(exclusions_logs_df.values, expected_exclusions.values)

# Check inflows in the cleaned DataFrame
assert cleaned_df['usd_inflows_cumulative'].sum() == 5750

In [None]:
cleaned_df

In [None]:
expected_cleaned_df.sort_values(['coin_id','wallet_address','date'])