# Imports

In [26]:
import pandas as pd
import statistics
#%run config.ipynb
import import_ipynb
from config import *
from data_preparation import *
from data_exploration import *
from labeling import *
from model_training import *
from label_exploration import *
from model_evaluation import *

# Data preparation

In [2]:
#Dictionary as data structure with coin name as key which stores the according dataframe 
daily_crypto_dict = {}
weekly_crypto_dict = {}

In [3]:
#reading the csv files of crypto currencies for daily and weekly directories
daily_dict = read_dir_of_csv(daily_crypto_dict, directory_daily)
weekly_dict = read_dir_of_csv(weekly_crypto_dict, directory_weekly)

In [4]:
#drop rows where no data for prices available (crypto currency is too young)
daily_dict = clean_dfs_prices(daily_dict)
weekly_dict = clean_dfs_prices(weekly_dict)

In [5]:
#drop rows where no data for prices available (crypto currency is too young)
daily_dict = convert_timestamps(daily_dict)
weekly_dict = convert_timestamps(weekly_dict)

In [6]:
#assign time horizon for each data point according to the window that was defined in the config
daily_dict = assign_horizons(daily_dict, delta)
weekly_dict = assign_horizons(weekly_dict, delta)

In [7]:
#allign data tails according to the configurated time window
daily_dict = allign_all_tails(daily_dict, window_in_days)
weekly_dict = allign_all_tails(weekly_dict, window_in_days)

# Labeling

In [8]:
# Label the currencies according to the triple barrier method
daily_dict = apply_tbm(daily_dict, volatility_delta)
weekly_dict = apply_tbm(weekly_dict, volatility_delta)

In [9]:
# Label the currencies according to the fixed horizon method
daily_dict = apply_fixed_time_horizon(daily_dict, threshold)
weekly_dict = apply_fixed_time_horizon(weekly_dict, threshold)

In [10]:
#Assign relative returns within the given horizon
daily_dict = assign_relative_returns(daily_dict)
weekly_dict = assign_relative_returns(weekly_dict)

In [11]:
# Calculating and storing the market means and median on a daily and weekly base each
daily_mean_returns, daily_median_returns = calculate_mean_median_market_return(daily_dict)
weekly_mean_returns, weekly_median_returns = calculate_mean_median_market_return(weekly_dict)

In [12]:
# Apply excess over mean and median labeling
daily_dict = assign_excess_over_mean_median_label(daily_dict, daily_mean_returns, daily_median_returns)
weekly_dict = assign_excess_over_mean_median_label(weekly_dict, weekly_mean_returns, weekly_median_returns)

In [13]:
#Assign tail sets
daily_dict = assign_tail_sets(daily_dict)
weekly_dict = assign_tail_sets(weekly_dict)

In [14]:
#Assign trend scanning
daily_dict = assign_trend_scanning(daily_dict)
weekly_dict = assign_trend_scanning(weekly_dict)

In [15]:
#Assign matrix flag labeling
daily_dict = assign_matrix_flags(daily_dict)
weekly_dict = assign_matrix_flags(weekly_dict)

In [16]:
# explore how labels are distributed
check_label_distribution(daily_dict)
check_label_distribution(weekly_dict)

Chainlink
excess over mean
excess_over_mean
-1.0    1237
 1.0    1043
 0.0       1
Name: count, dtype: int64
excess over median
excess_over_median
-1.0    1172
 1.0    1108
 0.0       1
Name: count, dtype: int64
triple Barrier
tbm_label
 1.0    1070
-1.0     876
 0.0     335
Name: count, dtype: int64
fixed time horizon
fth_label
1     1215
-1    1055
0       11
Name: count, dtype: int64
tail sets
tail_sets
 0    1141
 1     570
-1     570
Name: count, dtype: int64
trend scanning
trend
 1.0    1211
 0.0     952
-1.0     118
Name: count, dtype: int64
matrix flag
matrix_flag
 0    1935
 1     202
-1     144
Name: count, dtype: int64
------------------------------
Curve
excess over mean
excess_over_mean
-1.0    735
 1.0    486
 0.0      1
Name: count, dtype: int64
excess over median
excess_over_median
-1.0    733
 1.0    488
 0.0      1
Name: count, dtype: int64
triple Barrier
tbm_label
 1.0    529
-1.0    514
 0.0    179
Name: count, dtype: int64
fixed time horizon
fth_label
-1    638
1  

In [17]:
# Merge all currencies into one dataframe so that the model can be trained on the crypto market instead of particular currencies
crypto_market_daily = merge_currencies(daily_dict)
crypto_market_weekly = merge_currencies(weekly_dict)

In [18]:
# Train lgbm classifier based on the previously assigned labels
trained_currencies_daily = apply_training(crypto_market_daily)
trained_currencies_weekly = apply_training(crypto_market_weekly)

In [19]:
# Evaluate overall model accuracies (across all currencies)
print("Daily data")
evaluate_overall_accuracy(trained_currencies_daily)
print("Weekly data")
evaluate_overall_accuracy(trained_currencies_weekly)

Daily data
excess_over_mean
Training set score: 0.8716
Test set score: 0.5300
------------------------------
excess_over_median
Training set score: 0.8984
Test set score: 0.5150
------------------------------
fixed_time_horizon
Training set score: 0.9026
Test set score: 0.5230
------------------------------
triple_barrier
Training set score: 0.8474
Test set score: 0.4580
------------------------------
trend_scanning
Training set score: 0.8432
Test set score: 0.4915
------------------------------
tail_sets
Training set score: 0.8485
Test set score: 0.4394
------------------------------
matrix_flag
Training set score: 0.9411
Test set score: 0.9184
------------------------------
Weekly data
excess_over_mean
Training set score: 0.9886
Test set score: 0.5351
------------------------------
excess_over_median
Training set score: 0.9974
Test set score: 0.5195
------------------------------
fixed_time_horizon
Training set score: 0.9964
Test set score: 0.5304
------------------------------
tripl

In [20]:
# Seperate the training results based on currencies
currencies_results_daily = split_training_results_in_currencies(trained_currencies_daily)
currencies_results_weekly = split_training_results_in_currencies(trained_currencies_weekly)

In [21]:
# Evaluate accuracy for each currency seperately
evaluate_currency_accuracy(currencies_results_daily)
evaluate_currency_accuracy(currencies_results_weekly)

excess_over_mean
Chainlink
Training score: 0.8508170795993675
Test score: 0.4973958333333333
Polygon
Training score: 0.8688024408848207
Test score: 0.4883116883116883
Curve
Training score: 0.8770883054892601
Test score: 0.6171875
Ribbon
Training score: 0.9114832535885168
Test score: 0.4805194805194805
Uniswap
Training score: 0.8781094527363185
Test score: 0.5104166666666666
Dydx
Training score: 0.90625
Test score: 0.546875
Aave
Training score: 0.8536986301369863
Test score: 0.5376623376623376
Lido
Training score: 0.9007299270072993
Test score: 0.5755208333333334
Ethereum
Training score: 0.874835886214442
Test score: 0.5298701298701298
Bitcoin
Training score: 0.8613298337707787
Test score: 0.546875
SHIB
Training score: 0.92
Test score: 0.421875
Pepe
Training score: nan
Test score: 0.6504065040650406
------------------------------
excess_over_median
Chainlink
Training score: 0.8940432261465472
Test score: 0.5286458333333334
Polygon
Training score: 0.8985507246376812
Test score: 0.5506493

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


In [22]:
currencies_results_weekly

{'excess_over_mean': {'Chainlink': {'train':                Price  Daily Active Addresses  Price Volatility 1w     RSI 1d  \
   Date                                                                           
   2017-09-14  0.189165              651.571429             0.000084        NaN   
   2017-09-21  0.194586             1087.142857             0.091253  55.853535   
   2017-09-28  0.397361             1557.142857             0.262067  72.561373   
   2017-10-05  0.450463              843.000000             0.112177  76.635093   
   2017-10-12  0.366229              757.857143             0.067984  48.110946   
   ...              ...                     ...                  ...        ...   
   2022-10-27  7.427645             2069.142857             0.033640  63.714620   
   2022-11-03  6.224503             4232.571429             0.050293  44.942018   
   2022-11-10  6.187089             8174.571429             0.101420  44.604797   
   2022-11-17  6.701307             4610.5714

In [27]:
currencies_results_daily = evaluate_relative_return(currencies_results_daily)
currencies_results_weekly = evaluate_relative_return(currencies_results_weekly)

NameError: name 'evaluate_relative_return' is not defined