In [1]:
import pandas as pd
import statistics
import import_ipynb
from config import *
from data_preparation import *
from data_exploration import *
from labeling import *
from model_training import *
from label_exploration import *
from model_evaluation import *
from make_visualizations import *
import pickle
import numpy as np
from feature_engineering import *

importing Jupyter notebook from config.ipynb
importing Jupyter notebook from data_preparation.ipynb
importing Jupyter notebook from data_exploration.ipynb
importing Jupyter notebook from labeling.ipynb
importing Jupyter notebook from model_training.ipynb
importing Jupyter notebook from label_exploration.ipynb
importing Jupyter notebook from model_evaluation.ipynb
importing Jupyter notebook from make_visualizations.ipynb
importing Jupyter notebook from feature_engineering.ipynb


In [2]:
def main():
    
    """
    Main function to execute the script.
    """
    
    # Run time configuration
    prepare_data = False
    visualize_price_development = False
    label_data = False
    visualize_labels = False
    train_model = True
    explore_label_distribution = False
    evaluate_ml_metrics = False
    evaluate_financial_metrics = False
    
    
    
    ### Data Preparation ###
    
    if prepare_data:
        
        #Dictionary as data structure with coin name as key which stores the according dataframe 
        daily_crypto_dict = {}
        weekly_crypto_dict = {}
    
        #reading the csv files of crypto currencies for daily and weekly directories
        daily_dict = read_dir_of_csv(daily_crypto_dict, directory_daily)
        weekly_dict = read_dir_of_csv(weekly_crypto_dict, directory_weekly)
        
        #drop rows where no data for prices available (crypto currency is too young)
        daily_dict = clean_dfs_prices(daily_dict)
        weekly_dict = clean_dfs_prices(weekly_dict)
        
        describe_dfs(daily_dict)
        print("-"*50)
        describe_dfs(weekly_dict)
        
        #drop rows where no data for prices available (crypto currency is too young)
        daily_dict = convert_timestamps(daily_dict)
        weekly_dict = convert_timestamps(weekly_dict)
    
        #assign time horizon for each data point according to the window that was defined in the config
        daily_dict = assign_horizons(daily_dict, delta)
        weekly_dict = assign_horizons(weekly_dict, delta)
        
        #allign data tails according to the configurated time window
        daily_dict = allign_all_tails(daily_dict, window_in_days)
        weekly_dict = allign_all_tails(weekly_dict, window_in_days)
        
        if visualize_price_development:
            visualize_price_development_for_timeframe(daily_dict["Ethereum"], 30, "visualizations/price_development_ethereum.png")
        
        save_dictionary("dump_dictionaries/daily_dict.pkl", daily_dict)
        save_dictionary("dump_dictionaries/weekly_dict.pkl", weekly_dict)
        
    if label_data:
        
        if prepare_data == False:
            
            daily_dict = load_dictionary("dump_dictionaries/daily_dict.pkl")
            weekly_dict = load_dictionary("dump_dictionaries/weekly_dict.pkl")
        
        # Label the currencies according to the triple barrier method
        daily_dict = apply_tbm(daily_dict, volatility_delta)
        weekly_dict = apply_tbm(weekly_dict, volatility_delta)
        
        # Label the currencies according to the fixed horizon method
        daily_dict = apply_fixed_time_horizon(daily_dict, threshold)
        weekly_dict = apply_fixed_time_horizon(weekly_dict, threshold)
        
        #Assign relative returns within the given horizon
        daily_dict = assign_relative_returns(daily_dict)
        weekly_dict = assign_relative_returns(weekly_dict)

        # Calculating and storing the market means and median on a daily and weekly base each
        daily_mean_returns, daily_median_returns = calculate_mean_median_market_return(daily_dict)
        weekly_mean_returns, weekly_median_returns = calculate_mean_median_market_return(weekly_dict)
        
        # Apply excess over mean and median labeling
        daily_dict = assign_excess_over_mean_median_label(daily_dict, daily_mean_returns, daily_median_returns)
        weekly_dict = assign_excess_over_mean_median_label(weekly_dict, weekly_mean_returns, weekly_median_returns)
        
        #Assign tail sets
        daily_dict = assign_tail_sets(daily_dict)
        weekly_dict = assign_tail_sets(weekly_dict)
        
        #Assign trend scanning
        daily_dict = assign_trend_scanning(daily_dict)
        weekly_dict = assign_trend_scanning(weekly_dict)
        
        #Assign matrix flag labeling
        daily_dict = assign_matrix_flags(daily_dict)
        weekly_dict = assign_matrix_flags(weekly_dict)
        
        save_dictionary("dump_dictionaries/daily_dict.pkl", daily_dict)
        save_dictionary("dump_dictionaries/weekly_dict.pkl", weekly_dict)
        
    if explore_label_distribution:
        
        if label_data == False:
            daily_dict = load_dictionary("dump_dictionaries/daily_dict.pkl")
            weekly_dict = load_dictionary("dump_dictionaries/weekly_dict.pkl")
        
        # explore how labels are distributed
        label_distibution_daily = check_label_distribution(daily_dict)
        label_distibution_weekly = check_label_distribution(weekly_dict)
        label_distibution_daily.to_excel("raw_results/label_distribution_daily.xlsx")
        label_distibution_weekly.to_excel("raw_results/label_distribution_weekly.xlsx")
        
        explore_and_visualize_overlap(daily_dict, label_columns,"visualizations/label_overlap", "daily")
        explore_and_visualize_overlap(weekly_dict, label_columns,"visualizations/label_overlap", "weekly")
        
        # Merge all currencies into one dataframe so that the model can be trained on the crypto market instead of particular currencies
        explore_crypto_market_daily = merge_currencies(daily_dict)
        explore_crypto_market_weekly = merge_currencies(weekly_dict)
        
        daily_overlaps_df = explore_label_overlap(explore_crypto_market_daily, label_columns)
        weekly_overlaps_df = explore_label_overlap(explore_crypto_market_weekly, label_columns)
        
        visualize_label_overlap(daily_overlaps_df, "market", "visualizations/label_overlap", "daily")
        visualize_label_overlap(daily_overlaps_df, "market", "visualizations/label_overlap", "weekly")     
        
        
    if visualize_labels:
        if label_data == False:
            daily_dict = load_dictionary("dump_dictionaries/daily_dict.pkl")
            weekly_dict = load_dictionary("dump_dictionaries/weekly_dict.pkl")
        
        plot_trend_labels(daily_dict["Ethereum"], label_columns)
        
        
    if train_model:
        
        if label_data == False:
            daily_dict = load_dictionary("dump_dictionaries/daily_dict.pkl")
            weekly_dict = load_dictionary("dump_dictionaries/weekly_dict.pkl")
            
        # Feature engineering
        daily_dict = add_features(daily_dict)
        weekly_dict = add_features(weekly_dict)
        check_stationarity(daily_dict["Bitcoin"])

        sdgsf
        
        # Merge all currencies into one dataframe so that the model can be trained on the crypto market instead of particular currencies
        crypto_market_daily = merge_currencies(daily_dict)
        crypto_market_weekly = merge_currencies(weekly_dict)
        
        
        # Train lgbm classifier based on the previously assigned labels
        trained_currencies_daily = apply_training(crypto_market_daily)
        trained_currencies_weekly = apply_training(crypto_market_weekly)
        
        
        # Seperate the training results based on currencies
        currencies_results_daily = split_training_results_in_currencies(trained_currencies_daily)
        currencies_results_weekly = split_training_results_in_currencies(trained_currencies_weekly)
        
        
        save_dictionary("dump_dictionaries/trained_currencies_daily.pkl", trained_currencies_daily)
        save_dictionary("dump_dictionaries/trained_currencies_weekly.pkl", trained_currencies_weekly)
        save_dictionary("dump_dictionaries/currencies_results_daily.pkl", currencies_results_daily)
        save_dictionary("dump_dictionaries/currencies_results_weekly.pkl", currencies_results_weekly)
        
    if evaluate_ml_metrics:
        
        if train_model == False:
            
            trained_currencies_daily = load_dictionary("dump_dictionaries/trained_currencies_daily.pkl")
            trained_currencies_weekly = load_dictionary("dump_dictionaries/trained_currencies_weekly.pkl")
            currencies_results_daily = load_dictionary("dump_dictionaries/currencies_results_daily.pkl")
            currencies_results_weekly = load_dictionary("dump_dictionaries/currencies_results_weekly.pkl")
        
        
        # Evaluate overall model accuracies (across all currencies)
        print("Daily data")
        evaluate_overall_accuracy(trained_currencies_daily)
        print("Weekly data")
        evaluate_overall_accuracy(trained_currencies_weekly)
        
        # Evaluate accuracy for each currency seperately
        evaluate_currency_accuracy(currencies_results_daily)
        evaluate_currency_accuracy(currencies_results_weekly)
        
        evaluate_and_visualize_all_approaches(trained_currencies_daily)

    if evaluate_financial_metrics:
        
        if train_model == False:
            
            trained_currencies_daily = load_dictionary("dump_dictionaries/trained_currencies_daily.pkl")
            trained_currencies_weekly = load_dictionary("dump_dictionaries/trained_currencies_weekly.pkl")
            currencies_results_daily = load_dictionary("dump_dictionaries/currencies_results_daily.pkl")
            currencies_results_weekly = load_dictionary("dump_dictionaries/currencies_results_weekly.pkl")
            
        currencies_results_daily, evaluation_df_daily = evaluate_strategy_financially(currencies_results_daily, periods = "daily")
        currencies_results_weekly, evaluation_df_weekly = evaluate_strategy_financially(currencies_results_weekly, periods = "weekly")
        
        evaluation_df_daily.to_excel("raw_results/daily.xlsx", index=False)
        evaluation_df_weekly.to_excel("raw_results/weekly.xlsx", index=False)
        
        

In [3]:
if __name__ == "__main__":
    main()

Price: non-stationary
Daily Active Addresses: non-stationary
Price Volatility 1w: non-stationary
RSI 1d: non-stationary
Exchange Flow Balance: non-stationary
Percent of Stablecoin Total Supply held by Whales with more than 5 million USD: non-stationary
Whale Transaction Count (>1m USD): non-stationary
Age Consumed: non-stationary
Circulation (90d): non-stationary
The Ratio of Daily On-Chain Transaction Volume in Profit to Loss: non-stationary
Mean Coin Age: non-stationary
Mean Dollar Invested Age: non-stationary
MVRV Long/Short Difference: non-stationary
MVRV Ratio (Z score): non-stationary
Percent of Total Supply in Profit: non-stationary
threshold: non-stationary
tbm_label: non-stationary
excess_over_mean: non-stationary
excess_over_median: non-stationary
tail_sets: non-stationary
trend: non-stationary
matrix_flag: non-stationary
Bitcoin_orientation: non-stationary
Previous_period_change: non-stationary
Ethereum_orientation: non-stationary
Volatility: non-stationary


NameError: name 'sdgsf' is not defined