In [1]:
import lightgbm as lgbm
from sklearn.model_selection import TimeSeriesSplit

In [2]:
tss = TimeSeriesSplit(n_splits = 3)

In [3]:
def train_model(df, label_name):
    
    model_dict = {}
    
    train_feature_list = [
        'Price',
        'Daily Active Addresses',
        'Price Volatility 1w',
        'RSI 1d',
        'Exchange Flow Balance',
        'Percent of Stablecoin Total Supply held by Whales with more than 5 million USD',
        'Whale Transaction Count (>1m USD)',
        'Age Consumed',
        'Circulation (90d)',
        'The Ratio of Daily On-Chain Transaction Volume in Profit to Loss',
        'Mean Coin Age',
        'Mean Dollar Invested Age',
        'MVRV Long/Short Difference',
        'MVRV Ratio (Z score)',
        'Percent of Total Supply in Profit'
    ]
    
    # define features and target variable
    y = df[label_name]
    y = y.astype('int')
    #X = df.loc[:, df.columns == train_feature_list]
    X = df[train_feature_list]
    
    # drop horizon timestamp and return before training
    #X = X.drop('horizon', axis = 1)
    #X = X.drop('relative_return', axis = 1)
    
    for train_index, test_index in tss.split(X):
        X_train, X_test = X.iloc[train_index, :], X.iloc[test_index,:]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
    clf = lgbm.LGBMClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    model_dict["X_train"] = X_train
    model_dict["X_test"] = X_test
    model_dict["y_train"] = y_train
    model_dict["y_test"] = y_test
    model_dict["model"] = clf
    model_dict["y_pred"] = y_pred
    
    return model_dict

In [4]:
def apply_training(dictionary):
    
    # data structure as following:
        # first layer of dictionary containing crypto currencies as keys
            # assigned to the currencies there is another dictionary as value containing all the possible labeling methods
                # inside the second layer of dictionary is the actual training and test data as well as results and classifier
    
    currencies = {}
    
    for currency in dictionary:
        
        labeling_methods = {}
        
        labeling_methods["excess_over_mean"] = train_model(dictionary[currency], "excess_over_mean")
        labeling_methods["excess_over_median"] = train_model(dictionary[currency], "excess_over_median")
        labeling_methods["fixed_time_horizon"] = train_model(dictionary[currency], "fth_label")
        labeling_methods["triple_barrier"] = train_model(dictionary[currency], "tbm_label")
        
        currencies[currency] = labeling_methods
        
    return currencies