# Imports

In [1]:
import pandas as pd
import statistics
#%run config.ipynb
import import_ipynb
from config import *
from data_preparation import *
from data_exploration import *
from labeling import *
from model_training import *
from label_exploration import *
from model_evaluation import *

importing Jupyter notebook from config.ipynb
importing Jupyter notebook from data_preparation.ipynb
importing Jupyter notebook from data_exploration.ipynb
importing Jupyter notebook from labeling.ipynb
importing Jupyter notebook from model_training.ipynb
importing Jupyter notebook from label_exploration.ipynb
importing Jupyter notebook from model_evaluation.ipynb


# Data preparation

In [2]:
#Dictionary as data structure with coin name as key which stores the according dataframe 
daily_crypto_dict = {}
weekly_crypto_dict = {}

In [3]:
#reading the csv files of crypto currencies for daily and weekly directories
daily_dict = read_dir_of_csv(daily_crypto_dict, directory_daily)
weekly_dict = read_dir_of_csv(weekly_crypto_dict, directory_weekly)

In [4]:
#drop rows where no data for prices available (crypto currency is too young)
daily_dict = clean_dfs_prices(daily_dict)
weekly_dict = clean_dfs_prices(weekly_dict)

In [5]:
#drop rows where no data for prices available (crypto currency is too young)
daily_dict = convert_timestamps(daily_dict)
weekly_dict = convert_timestamps(weekly_dict)

In [6]:
#assign time horizon for each data point according to the window that was defined in the config
daily_dict = assign_horizons(daily_dict, delta)
weekly_dict = assign_horizons(weekly_dict, delta)

In [7]:
#allign data tails according to the configurated time window
daily_dict = allign_all_tails(daily_dict, window_in_days)
weekly_dict = allign_all_tails(weekly_dict, window_in_days)

# Labeling

In [8]:
# Label the currencies according to the triple barrier method
daily_dict = apply_tbm(daily_dict, volatility_delta)
weekly_dict = apply_tbm(weekly_dict, volatility_delta)

In [9]:
# Label the currencies according to the fixed horizon method
daily_dict = apply_fixed_time_horizon(daily_dict, threshold)
weekly_dict = apply_fixed_time_horizon(weekly_dict, threshold)

In [10]:
#Assign relative returns within the given horizon
daily_dict = assign_relative_returns(daily_dict)
weekly_dict = assign_relative_returns(weekly_dict)

In [11]:
# Calculating and storing the market means and median on a daily and weekly base each
daily_mean_returns, daily_median_returns = calculate_mean_median_market_return(daily_dict)
weekly_mean_returns, weekly_median_returns = calculate_mean_median_market_return(weekly_dict)

In [12]:
#Assign relative returns within the given horizon
daily_dict = assign_excess_over_mean_median_label(daily_dict, daily_mean_returns, daily_median_returns)
weekly_dict = assign_excess_over_mean_median_label(weekly_dict, weekly_mean_returns, weekly_median_returns)

In [13]:
# Train lgbm classifier based on the previously assigned labels
trained_currencies_daily = apply_training(daily_dict)
trained_currencies_weekly = apply_training(weekly_dict)

In [15]:
# explore how labels are distributed
check_label_distribution(daily_dict)
check_label_distribution(weekly_dict)

Chainlink
excess over mean
excess_over_mean
-1.0    1160
 1.0    1038
 0.0       1
Name: count, dtype: int64
excess over median
excess_over_median
-1.0    1022
 1.0    1005
 0.0     172
Name: count, dtype: int64
triple Barrier
tbm_label
 1.0    1048
-1.0     846
 0.0     305
Name: count, dtype: int64
fixed time horizon
fth_label
1     1184
-1    1004
0       11
Name: count, dtype: int64
------------------------------
Poylgon
excess over mean
excess_over_mean
-1.0    931
 1.0    741
 0.0      1
Name: count, dtype: int64
excess over median
excess_over_median
-1.0    895
 1.0    777
 0.0      1
Name: count, dtype: int64
triple Barrier
tbm_label
 1.0    720
-1.0    566
 0.0    387
Name: count, dtype: int64
fixed time horizon
fth_label
1     884
-1    781
0       8
Name: count, dtype: int64
------------------------------
Curve
excess over mean
excess_over_mean
-1.0    726
 1.0    481
 0.0      1
Name: count, dtype: int64
excess over median
excess_over_median
-1.0    729
 1.0    478
 0.0    

In [16]:
# Evaluate accuracies
print("Daily data")
evaluate_accuracy(trained_currencies_daily)
print("Weekly data")
evaluate_accuracy(trained_currencies_weekly)

Chainlink
excess_over_mean
Training set score: 1.0000
Test set score: 0.4463
excess_over_median
Training set score: 1.0000
Test set score: 0.4189
fixed_time_horizon
Training set score: 1.0000
Test set score: 0.5537
triple_barrier
Training set score: 1.0000
Test set score: 0.4845
------------------------------
Poylgon
excess_over_mean
Training set score: 1.0000
Test set score: 0.4761
excess_over_median
Training set score: 1.0000
Test set score: 0.5096
fixed_time_horizon
Training set score: 1.0000
Test set score: 0.5742
triple_barrier
Training set score: 1.0000
Test set score: 0.4163
------------------------------
Curve
excess_over_mean
Training set score: 1.0000
Test set score: 0.5000
excess_over_median
Training set score: 1.0000
Test set score: 0.4868
fixed_time_horizon
Training set score: 1.0000
Test set score: 0.4570
triple_barrier
Training set score: 1.0000
Test set score: 0.3974
------------------------------
Ribbon
excess_over_mean
Training set score: 1.0000
Test set score: 0.5025