In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import time 
import sys 
from hmmlearn import hmm
import pickle
import os
import glob

## Coding MAPE and R^2 Functions

In [2]:
# Function to calculate Mean Absolute Percentage Error 

def mape(predictions, truth): 
    #calculate the absolute value of the MAPE 
    abs_val = np.absolute(predictions - truth)
    div_op = np.divide(abs_val, truth)
    mape_val = np.divide(np.sum(div_op, 0), truth.shape[0])
    return mape_val

## Data Read in Through Separate csv's

In [3]:
import os
import glob

path = '/Users/cho/Desktop/crypto-prediction/individual_series/data'
extension = 'csv'
os.chdir(path)
tickers = glob.glob('*.{}'.format(extension))
print(tickers[:3])

['LTCBUSD.csv', 'EGLDBUSD.csv', 'GALAUSD.csv']


## Testing

## HMM Tuning 

In [4]:
names = []
predicted = []
r = []
mape_vals = []
series_skipped = []

#for t in tickers: 

for t in tickers:
    try:

        table = np.genfromtxt(t, delimiter=',')
        table = table[1:,1:]
        #print(table)
        #table = table[:,:-1]
        #print(table)
        #table = np.reshape(table)
        table = np.flipud(table)


        #establish vectors for calculation 
        predicted_value = np.empty([0,table.shape[1]])

        #likelihood needed for HMM model 
        likelihood = np.empty([0,1])

        #Model metrics for evaluation 
        bic = np.empty([0,1])
        aic = np.empty([0,1])



        #metrics for evaluation
        for i in range(2,15):

            param_vals = i + (i**2)
            dirichlet_params = np.random.randint(1,50,i)

            #initialize model 
            mod = hmm.GaussianHMM(covariance_type = 'full', n_components = i, n_iter = 10**3, tol = 1e-4)
            mod.fit(table[100:,:])

            #sanity check to recount number of epochs
            if mod.monitor_.iter == 10**4:
                sys.exit(1)
                print("Epoch Increased by 1")

            #append values to metrics array of vectors 
            likelihood = np.vstack((likelihood, mod.score(table)))
            bic = np.vstack((bic, -2*mod.score(table) + param_vals * np.log(table.shape[0])))
            aic = np.vstack((aic, -2*mod.score(table) + 2* param_vals))

        #calculate the number of states in the model 
        #opt_states_aic = np.argmin(aic) + 2
        opt_states_bic = np.argmin(bic) + 2
        print(opt_states_bic)

        #print(table)

        pred_val_array = []

        for i in reversed(range(100)): 
            train = table[i+2:,:]
            #print(train.shape)
            test = table[i,:]
            #print(test.shape)
            tot_vals = train.shape[0]

            #model tuning 
            if i == 99: 
                mod = hmm.GaussianHMM(n_components = opt_states_bic, covariance_type = 'full', tol = 1e-4, n_iter = 10**3,  init_params = 'stmc')
            #refactor for prior distribution and retune
            else: 
                mod = hmm.GaussianHMM(n_components = opt_states_bic, covariance_type = 'full', tol = 1e-4, n_iter = 10**3,  init_params = '')
                mod.transmat_ = transmat_retune_prior
                mod.startprob_ = startprob_retune_prior
                mod.means_ = means_retune_prior
                mod.covars_ = covars_retune_prior


            mod.fit(np.flipud(train))

            #values tuned parameters updated
            transmat_retune_prior = mod.transmat_
            startprob_retune_prior = mod.startprob_
            means_retune_prior = mod.means_
            covars_retune_prior = mod.covars_

            #sanity check to recount number of epochs
            if mod.monitor_.iter == 10**4: 
                sys.exit(1)
                print("Epoch Increased by 1")

            #compute likehoods using model params such that we can generate predictions
            counter = 1 
            former_lkhd = []
            current_lkhd = mod.score(np.flipud(train[0:48,:]))

            while counter < tot_vals/48: 
                scoring = mod.score(np.flipud(train[counter:counter + 48,:]))
                former_lkhd = np.append(former_lkhd,scoring)
                counter += 1

            abs_diffs = np.absolute(former_lkhd - current_lkhd)
            min_i = np.argmin(abs_diffs)
            pred_change = train[min_i,:] - train[2 + min_i,:]
            predicted_value = np.vstack((predicted_value, table[i + 2,:] + pred_change))
            #print(predicted_value)
        predicted.append(predicted_value)


        #calculate mape
        mean_average = mape(predicted_value, np.flipud(table[range(100), :]))
        mape_vals.append(mean_average)
        
        names.append(t)
        
    except:
        print("failed csv:", t)
        series_skipped.append(t)
        continue
        

13
14
12
14
10
14
14
13
12
failed csv: HARDBUSD.csv
14
14
13
14
14
failed csv: CHZUSD.csv
14
13
failed csv: BTCSTBUSD.csv
14
14
14
13
13
failed csv: TUSDBUSD.csv
11
12
13
failed csv: GBPBUSD.csv
12
14
14
14
12
failed csv: PSGBUSD.csv
13
8
14
10
14
14
14
failed csv: GHSTBUSD.csv
14
14
14
13
10
14
13
failed csv: BTGBUSD.csv
14
13
14
13
13
failed csv: FIOBUSD.csv
14
13
14
14
failed csv: NMRBUSD.csv
14
failed csv: SPELLUSD.csv
11
14
14
failed csv: STMXBUSD.csv
14
13
14
13
14
7
14
14
11
failed csv: AUCTIONBUSD.csv
13
14
13
6
failed csv: USDPBUSD.csv
14
8
failed csv: HOTBUSD.csv
14
14
14
12
14
13
14
failed csv: DOCKBUSD.csv
14
failed csv: DATABUSD.csv
14
10
failed csv: JUVBUSD.csv
14
failed csv: DEXEBUSD.csv
14
14
13
14
14
13
14
10
failed csv: XVGBUSD.csv
14
14
14
14
14
14
14
13
13
11
12
failed csv: BARBUSD.csv
14
14
14
10
failed csv: QUICKBUSD.csv
14
failed csv: LSKBUSD.csv
14
14
13
12
14
13
14
13
failed csv: DEGOBUSD.csv
14
14
9
failed csv: TRBBUSD.csv
14
14
14
13
14
12
14
11
failed csv: P

In [5]:
names



['LTCBUSD.csv',
 'EGLDBUSD.csv',
 'GALAUSD.csv',
 'MIRBUSD.csv',
 'CELRBUSD.csv',
 'PYRBUSD.csv',
 'POWRUSD.csv',
 'XVSBUSD.csv',
 'BNBBUSD.csv',
 'YFIBUSD.csv',
 'ACMBUSD.csv',
 'EOSUSD.csv',
 'BAKEBUSD.csv',
 'QTUMBUSD.csv',
 'FISBUSD.csv',
 'ARBUSD.csv',
 'MLNUSD.csv',
 'COVALUSD.csv',
 'VETBUSD.csv',
 'ERNBUSD.csv',
 'IOTXBUSD.csv',
 'ELFBUSD.csv',
 'DOTUSDT.csv',
 'LAZIOBUSD.csv',
 'MDTUSD.csv',
 'LINABUSD.csv',
 'SNXBUSD.csv',
 'ANKRBUSD.csv',
 'HIVEBUSD.csv',
 'RENBUSD.csv',
 'BICOBUSD.csv',
 'ATOMBUSD.csv',
 'DARBUSD.csv',
 'POLYBUSD.csv',
 'MATICUSD.csv',
 'DOGEUSD.csv',
 'RARIUSD.csv',
 'MANAUSD.csv',
 'COMPBUSD.csv',
 'INJBUSD.csv',
 'THETABUSD.csv',
 'ZECUSD.csv',
 'DIABUSD.csv',
 'ALGOUSD.csv',
 'AMPUSD.csv',
 'CTSIBUSD.csv',
 'FTTBUSD.csv',
 'CELOBUSD.csv',
 'BADGERBUSD.csv',
 'RENUSD.csv',
 'BALUSD.csv',
 'NKNUSD.csv',
 'OOKIBUSD.csv',
 'BADGERUSD.csv',
 'DYDXBUSD.csv',
 'SOLBUSD.csv',
 'LPTUSD.csv',
 'CROUSD.csv',
 'ZRXBUSD.csv',
 'JSTBUSD.csv',
 'HNTBUSD.csv',
 'BCHUSD

In [6]:
#pwd

In [7]:
from sklearn.metrics import r2_score 

ex_object = [names, predicted, r, mape_vals]
name_array = np.array(ex_object[0])
print(name_array)
index_array = []
r2_array = []
opening_mape = []

for i in name_array: 
    #load indices for the coin pairs that we need 
    idx = np.where(name_array == i)[0][0]
    index_array.append(idx)
    print(i)
    
    #get data frame that we need for coin pairs 
    df = pd.read_csv('{}'.format(i))

    
    #predicted values 
    values = pd.DataFrame(ex_object[1][idx], columns = ['Open', 'High', 'Low', 'Close'])
    predicted_values = values.iloc[:,0]
    
    #actual values
    actual_values = df.iloc[-100:]
    actual_values = actual_values.iloc[:,1]
    
    r2 = r2_score(actual_values,predicted_values)
    r2_array.append(r2)
    
    mape_score = ex_object[3][idx][1]
    opening_mape.append(mape_score)
    
    #r2 scores
    print("name:{}, \n MAPE Open:{}, \n r2 score:{}".format(ex_object[0][idx], ex_object[3][idx][1], r2))

['LTCBUSD.csv' 'EGLDBUSD.csv' 'GALAUSD.csv' 'MIRBUSD.csv' 'CELRBUSD.csv'
 'PYRBUSD.csv' 'POWRUSD.csv' 'XVSBUSD.csv' 'BNBBUSD.csv' 'YFIBUSD.csv'
 'ACMBUSD.csv' 'EOSUSD.csv' 'BAKEBUSD.csv' 'QTUMBUSD.csv' 'FISBUSD.csv'
 'ARBUSD.csv' 'MLNUSD.csv' 'COVALUSD.csv' 'VETBUSD.csv' 'ERNBUSD.csv'
 'IOTXBUSD.csv' 'ELFBUSD.csv' 'DOTUSDT.csv' 'LAZIOBUSD.csv' 'MDTUSD.csv'
 'LINABUSD.csv' 'SNXBUSD.csv' 'ANKRBUSD.csv' 'HIVEBUSD.csv' 'RENBUSD.csv'
 'BICOBUSD.csv' 'ATOMBUSD.csv' 'DARBUSD.csv' 'POLYBUSD.csv' 'MATICUSD.csv'
 'DOGEUSD.csv' 'RARIUSD.csv' 'MANAUSD.csv' 'COMPBUSD.csv' 'INJBUSD.csv'
 'THETABUSD.csv' 'ZECUSD.csv' 'DIABUSD.csv' 'ALGOUSD.csv' 'AMPUSD.csv'
 'CTSIBUSD.csv' 'FTTBUSD.csv' 'CELOBUSD.csv' 'BADGERBUSD.csv' 'RENUSD.csv'
 'BALUSD.csv' 'NKNUSD.csv' 'OOKIBUSD.csv' 'BADGERUSD.csv' 'DYDXBUSD.csv'
 'SOLBUSD.csv' 'LPTUSD.csv' 'CROUSD.csv' 'ZRXBUSD.csv' 'JSTBUSD.csv'
 'HNTBUSD.csv' 'BCHUSD.csv' 'ETHUSD.csv' 'KRLUSD.csv' 'BATUSD.csv'
 'AERGOBUSD.csv' 'PLAUSD.csv' 'AVAXUSDT.csv' 'HIGHBUSD.csv' 'BCHB

name:BNBTUSD.csv, 
 MAPE Open:0.00957511283808019, 
 r2 score:0.757858313265698
GRTBUSD.csv
name:GRTBUSD.csv, 
 MAPE Open:0.019237842702921636, 
 r2 score:0.7785221080535218
WRXBUSD.csv
name:WRXBUSD.csv, 
 MAPE Open:0.015405423870248165, 
 r2 score:0.8907542247586753
ZECBUSD.csv
name:ZECBUSD.csv, 
 MAPE Open:0.013846216888122183, 
 r2 score:0.777514378983367
GTCBUSD.csv
name:GTCBUSD.csv, 
 MAPE Open:0.03414901264300657, 
 r2 score:0.7363412402837863
BONDBUSD.csv
name:BONDBUSD.csv, 
 MAPE Open:0.01809484416072953, 
 r2 score:0.7071137114658247
AUCTIONUSD.csv
name:AUCTIONUSD.csv, 
 MAPE Open:0.03650434586517141, 
 r2 score:0.5442494276950052
MANABUSD.csv
name:MANABUSD.csv, 
 MAPE Open:0.016543776878464714, 
 r2 score:0.8382412363394253
CHESSBUSD.csv
name:CHESSBUSD.csv, 
 MAPE Open:0.036635051728215684, 
 r2 score:0.7838172037440391
PERPBUSD.csv
name:PERPBUSD.csv, 
 MAPE Open:0.03492765918980985, 
 r2 score:0.5211939313359639
ONTBUSD.csv
name:ONTBUSD.csv, 
 MAPE Open:0.013139058719599483,

name:LCXUSD.csv, 
 MAPE Open:0.02256731573645253, 
 r2 score:0.5223382397140903
MASKBUSD.csv
name:MASKBUSD.csv, 
 MAPE Open:0.024651137147219674, 
 r2 score:0.8326127761722294
WAXPBUSD.csv
name:WAXPBUSD.csv, 
 MAPE Open:0.017441382109302312, 
 r2 score:0.8792520577796736
TVKBUSD.csv
name:TVKBUSD.csv, 
 MAPE Open:0.020508821440393444, 
 r2 score:0.7472036530146342
FXUSD.csv
name:FXUSD.csv, 
 MAPE Open:0.0363409716113638, 
 r2 score:0.5243019810618295
YGGBUSD.csv
name:YGGBUSD.csv, 
 MAPE Open:0.03920278077595882, 
 r2 score:0.6430207366140513
AUDBUSD.csv
name:AUDBUSD.csv, 
 MAPE Open:0.0011795501653910046, 
 r2 score:0.6302771146186916
UNFIBUSD.csv
name:UNFIBUSD.csv, 
 MAPE Open:0.053324185368022636, 
 r2 score:0.8971138037380907
COSBUSD.csv
name:COSBUSD.csv, 
 MAPE Open:0.01885386517281096, 
 r2 score:0.8278802093821696
IDEXUSD.csv
name:IDEXUSD.csv, 
 MAPE Open:0.02833204548817294, 
 r2 score:0.00858137780727286
XLMUSD.csv
name:XLMUSD.csv, 
 MAPE Open:0.010710133725973426, 
 r2 score:0.

name:ORNUSD.csv, 
 MAPE Open:0.0309501011051539, 
 r2 score:-0.9085261570222487
1INCHBUSD.csv
name:1INCHBUSD.csv, 
 MAPE Open:0.011153189864536804, 
 r2 score:0.8176069919929074
BICOUSD.csv
name:BICOUSD.csv, 
 MAPE Open:0.028950588328491685, 
 r2 score:0.7768244566810809
IDEXBUSD.csv
name:IDEXBUSD.csv, 
 MAPE Open:0.025881090664340287, 
 r2 score:-0.285874813171477
TRXTUSD.csv
name:TRXTUSD.csv, 
 MAPE Open:0.011059562853798905, 
 r2 score:-0.14004147636778663
TWTBUSD.csv
name:TWTBUSD.csv, 
 MAPE Open:0.0167393087689025, 
 r2 score:0.784129707479364
SHIBBUSD.csv
name:SHIBBUSD.csv, 
 MAPE Open:0.016879254473570426, 
 r2 score:0.7528099061418139
RNDRBUSD.csv
name:RNDRBUSD.csv, 
 MAPE Open:0.027054675101443406, 
 r2 score:0.8266702761109563
OGNUSD.csv
name:OGNUSD.csv, 
 MAPE Open:0.04379718952679978, 
 r2 score:0.8179668963069605
FLOWBUSD.csv
name:FLOWBUSD.csv, 
 MAPE Open:0.017216280380228544, 
 r2 score:0.8650749082648476
ALGOBUSD.csv
name:ALGOBUSD.csv, 
 MAPE Open:0.016200184398395674, 

## Save Data

In [8]:
import pickle

some_obj = [names, predicted, r, mape_vals]

with open('update2_pickle.pickle', 'wb') as f:
    pickle.dump(some_obj, f)
with open('update2_pickle.pickle', 'rb') as f:
    loaded_obj = pickle.load(f)
    
loaded_obj

[['LTCBUSD.csv',
  'EGLDBUSD.csv',
  'GALAUSD.csv',
  'MIRBUSD.csv',
  'CELRBUSD.csv',
  'PYRBUSD.csv',
  'POWRUSD.csv',
  'XVSBUSD.csv',
  'BNBBUSD.csv',
  'YFIBUSD.csv',
  'ACMBUSD.csv',
  'EOSUSD.csv',
  'BAKEBUSD.csv',
  'QTUMBUSD.csv',
  'FISBUSD.csv',
  'ARBUSD.csv',
  'MLNUSD.csv',
  'COVALUSD.csv',
  'VETBUSD.csv',
  'ERNBUSD.csv',
  'IOTXBUSD.csv',
  'ELFBUSD.csv',
  'DOTUSDT.csv',
  'LAZIOBUSD.csv',
  'MDTUSD.csv',
  'LINABUSD.csv',
  'SNXBUSD.csv',
  'ANKRBUSD.csv',
  'HIVEBUSD.csv',
  'RENBUSD.csv',
  'BICOBUSD.csv',
  'ATOMBUSD.csv',
  'DARBUSD.csv',
  'POLYBUSD.csv',
  'MATICUSD.csv',
  'DOGEUSD.csv',
  'RARIUSD.csv',
  'MANAUSD.csv',
  'COMPBUSD.csv',
  'INJBUSD.csv',
  'THETABUSD.csv',
  'ZECUSD.csv',
  'DIABUSD.csv',
  'ALGOUSD.csv',
  'AMPUSD.csv',
  'CTSIBUSD.csv',
  'FTTBUSD.csv',
  'CELOBUSD.csv',
  'BADGERBUSD.csv',
  'RENUSD.csv',
  'BALUSD.csv',
  'NKNUSD.csv',
  'OOKIBUSD.csv',
  'BADGERUSD.csv',
  'DYDXBUSD.csv',
  'SOLBUSD.csv',
  'LPTUSD.csv',
  'CROUSD.csv'

## Visualizations

In [9]:
# example = dfs[0]
# cols = ['open', 'high', 'low', 'close']

# for i in range(example.shape[1]):
#     plt.plot(range(example.shape[0]), example.iloc[:,i], label = '{}'.format(example.columns[i]), linewidth = 0.5)
#     plt.legend(bbox_to_anchor=(1.43,1))
#     plt.xlabel("Date")
#     plt.ylabel(names[0])
#     plt.title('{} Versus Predictions'.format(names[0]))
# plt.show()
    
