In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from xgboost import XGBRFRegressor

import scipy
from sklearn.model_selection import train_test_split
import ipywidgets as widgets
from sklearn.preprocessing import StandardScaler

import pyarrow.feather as feather

from pickle import dump

# Set up data for model

In [2]:
pd.set_option('display.max_columns', None)

#read training file
result = pd.read_feather("data/combined_data.feather")

#add in timeseries features
result['Unnamed: 0'] = pd.to_datetime(result.timestamp, unit='s')
result['week'] = result['Unnamed: 0'].dt.week
result['year'] = result['Unnamed: 0'].dt.year
result['month'] = result['Unnamed: 0'].dt.month

result['minute'] = result['Unnamed: 0'].dt.minute
result['dayofweek'] = result['Unnamed: 0'].dt.dayofweek
result['day'] = result['Unnamed: 0'].dt.day
result = result.drop(['Unnamed: 0', 'index'], axis=1)

#read asset file
asset_details = pd.read_csv('data/asset_details.csv')

#create asset it dictionary
asset_info = dict(zip(asset_details.Asset_ID, asset_details.Asset_Name))

#create coin drop dictionary
coin_dict = {'Bitcoin Cash': 'DEFAULT',
             'Binance Coin': ['Bitcoin', 'EOS.IO', 'Dogecoin'],
             'Bitcoin': ['Dogecoin', 'Ethereum Classic', 'Cardano'],
             'EOS.IO': ['Bitcoin Cash', 'Bitcoin', 'Ethereum'],
             'Ethereum Classic': ['Bitcoin Cash'],
             'Ethereum': ['Stellar', 'Ethereum Classic'],
             'Litecoin': ['Dogecoin', 'Bitcoin Cash'],
             'Monero': ['TRON', 'Cardano'],
             'TRON': ['Stellar'],
             'Stellar': ['TRON'],
             'Cardano': ['IOTA', 'Bitcoin Cash', 'Monero'],
             'IOTA': 'DEFAULT',
             'Maker': ['Dogecoin', 'Stellar'],
             'Dogecoin': ['Ethereum Classic'],
            }

#create custom dataframe function
def custom_dataframe(df, coin, param_list):
    #print((coin, param_list))
    custom_dataset = df
    #For each coin
    for c in param_list:
        #DROP COIN
        dropped_coin = [k for k in df.columns if c in k]
        if (c == 'Bitcoin') | (c == 'Ethereum'):
            dropped_coin = [k for k in dropped_coin if ' ' not in k]
        custom_dataset = custom_dataset.drop(dropped_coin, axis=1)   
    return custom_dataset

In [3]:
asset_details

Unnamed: 0,Asset_ID,Weight,Asset_Name
0,2,2.397895,Bitcoin Cash
1,0,4.304065,Binance Coin
2,1,6.779922,Bitcoin
3,5,1.386294,EOS.IO
4,7,2.079442,Ethereum Classic
5,6,5.894403,Ethereum
6,9,2.397895,Litecoin
7,11,1.609438,Monero
8,13,1.791759,TRON
9,12,2.079442,Stellar


# MAKE SURE 'coin' IS THE COIN YOU WANT
copy from 'Asset_Name' column above

In [4]:
coin = 'IOTA'

# MAKE SURE 'coin' IS THE COIN YOU WANT 

In [5]:
%%time

#drop all target values that are not for the specific coin
refined_dataset = result.dropna(subset=['Target_{}'.format(coin)])

#fill missing values down
refined_dataset = refined_dataset.fillna(method="ffill")

#fill any missing values left
refined_dataset = refined_dataset.dropna()


if coin_dict[coin] == "DEFAULT":
    #use default dataframe
    sample_x = refined_dataset
else:
    #generate custom dataframe
    sample_x = custom_dataframe(refined_dataset, coin, coin_dict[coin])


#create X and y values    
Targets = [k for k in sample_x.columns if 'Target' in k]
X_values = sample_x.drop(Targets, axis=1)
y_values = sample_x[['Target_{}'.format(coin)]]
X_values.replace([np.inf, -np.inf], 0, inplace=True)

#train test split
X_train, X_test, y_train, y_test = train_test_split(X_values, y_values, test_size=0.33, random_state=42)

#scale values
scaler = StandardScaler()
x_trainScaled = scaler.fit_transform(X_train)
x_testScaled = scaler.transform(X_test)

#save scaler
dump(scaler, open("scaler_{}.pkl".format(coin), 'wb'))

#initialize model
model = XGBRFRegressor(
    n_estimators=3000, 
    max_depth=10,   
    verbosity=0,
    booster='dart',
    tree_method='approx',
    subsample=0.52,
    colsample_bytree = 0.05,
    min_child_weight = 2
) 

#fit model
model.fit(x_trainScaled, y_train)
y_pred =model.predict(x_testScaled)
corr, p_val = scipy.stats.pearsonr(y_test['Target_{}'.format(coin)], y_pred)
print("Default Score for {}: ".format(coin), corr, "\n(goal is to get as close to 1.0 as possible)")

#save model
model.save_model("model_{}.json".format(coin))

Default Score for IOTA:  0.19154620022524468 
(goal is to get as close to 1.0 as possible)
CPU times: user 4h 13min 39s, sys: 6min 32s, total: 4h 20min 11s
Wall time: 17min 54s


## Check to make sure model is folder