In [None]:
import pandas as pd
import numpy as np

In [None]:
def prepare_data(lags, sub_lags, vol_lags):
    
    #Import target currency
    data = pd.read_csv("ETHUSDT-1h.csv", index_col="Open Time")
    data = data[["Close","Volumn"]]
    data["return"] = data["Close"]/data["Close"].shift(1)
    data["log_return"] = np.log(data["return"])
    data["direction"] = np.sign(data["log_return"])


    cols = []
    
    for lag in range(1,lags+1):
        col = "lag{}".format(lag)
        data[col] = data["log_return"].shift(lag)
        cols.append(col)

    #Add volumn info
    data["log_volumn_change"] = np.log(data["Volumn"].div(data["Volumn"].shift(1)))

    for lag in range(1,vol_lags+1):
        col = "volumn lag{}".format(lag)
        data[col] = data["log_volumn_change"].shift(lag)
        cols.append(col)

    #Import leading/correlated currency
    data_btc = pd.read_csv("BTCUSDT-1h.csv", index_col="Open Time")
    data_btc = data_btc[["Close"]]
    data_btc.columns = ["BTC Close"]
    data_btc["BTC return"] = data_btc["BTC Close"]/data_btc["BTC Close"].shift(1)
    data_btc["BTC log_return"] = np.log(data_btc["BTC return"])

    for lag in range(1,sub_lags+1):
        col = "btc lag{}".format(lag)
        data_btc[col] = data_btc["BTC log_return"].shift(lag)
        cols.append(col)

    #Merge target and correlated currencies info to a big dataframe
    merged_df = pd.concat([data, data_btc], join="outer", axis = 1)

    merged_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    merged_df.dropna(inplace=True)

    return merged_df, cols



In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

clf = svm.SVC()

def test_svm(data, cols, lap_number):
    accuracy_scores =[]
    for i in range(0,lap_number):
        print("Lap {}: ".format(i+1))
        x_train, x_test, y_train, y_test = train_test_split(data[cols],data.direction,test_size=0.3)
        clf.fit(X = x_train, y = y_train)
        print("Trained. Testing... \n")
        y_pred = clf.predict(X = x_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracy_scores.append(accuracy)
        print("Accuracy Score: {} \n".format(accuracy))
    score_array = np.array(accuracy_scores)
    print("Average: {}, Std: {}".format(score_array.mean(),score_array.std()))

In [None]:
(data,cols) = prepare_data(lags=9,sub_lags=3, vol_lags=4)

In [8]:
data

Unnamed: 0_level_0,Close,Volumn,return,log_return,direction,lag1,lag2,lag3,lag4,lag5,...,volumn lag1,volumn lag2,volumn lag3,volumn lag4,BTC Close,BTC return,BTC log_return,btc lag1,btc lag2,btc lag3
Open Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1502978400000,306.50,710.25965,0.990627,-0.009417,-1.0,0.002524,-0.001069,-0.001132,-0.002261,0.004462,...,-0.257494,0.385402,-0.039816,-0.500846,4470.82,1.002651,0.002647,0.010823,-0.003688,-0.007359
1502982000000,307.06,459.41982,1.001827,0.001825,1.0,-0.009417,0.002524,-0.001069,-0.001132,-0.002261,...,0.827188,-0.257494,0.385402,-0.039816,4352.34,0.973499,-0.026858,0.002647,0.010823,-0.003688
1502985600000,308.00,596.81077,1.003061,0.003057,1.0,0.001825,-0.009417,0.002524,-0.001069,-0.001132,...,-0.435666,0.827188,-0.257494,0.385402,4354.18,1.000423,0.000423,-0.026858,0.002647,0.010823
1502989200000,308.33,191.94856,1.001071,0.001071,1.0,0.003057,0.001825,-0.009417,0.002524,-0.001069,...,0.261636,-0.435666,0.827188,-0.257494,4289.24,0.985086,-0.015027,0.000423,-0.026858,0.002647
1502992800000,309.10,203.65480,1.002497,0.002494,1.0,0.001071,0.003057,0.001825,-0.009417,0.002524,...,-1.134373,0.261636,-0.435666,0.827188,4256.97,0.992477,-0.007552,-0.015027,0.000423,-0.026858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1672513200000,1202.65,6435.02040,1.002543,0.002539,1.0,-0.000983,-0.001673,-0.000058,-0.000482,0.001946,...,-0.840356,0.491008,-0.468410,-0.800908,16570.14,0.999539,-0.000462,-0.000259,-0.001104,0.000620
1672516800000,1202.34,3557.34730,0.999742,-0.000258,-1.0,0.002539,-0.000983,-0.001673,-0.000058,-0.000482,...,0.632884,-0.840356,0.491008,-0.468410,16568.60,0.999907,-0.000093,-0.000462,-0.000259,-0.001104
1672520400000,1200.10,3865.48620,0.998137,-0.001865,-1.0,-0.000258,0.002539,-0.000983,-0.001673,-0.000058,...,-0.592740,0.632884,-0.840356,0.491008,16548.28,0.998774,-0.001227,-0.000093,-0.000462,-0.000259
1672524000000,1196.19,9549.47930,0.996742,-0.003263,-1.0,-0.001865,-0.000258,0.002539,-0.000983,-0.001673,...,0.083072,-0.592740,0.632884,-0.840356,16520.81,0.998340,-0.001661,-0.001227,-0.000093,-0.000462


In [9]:
test_svm(data=data,cols=cols,lap_number=5)

Lap 1: 
Trained. Testing... 

Accuracy Score: 0.493357804579619 

Lap 2: 
Trained. Testing... 

Accuracy Score: 0.5013109596224437 

Lap 3: 
Trained. Testing... 

Accuracy Score: 0.5017479461632581 

Lap 4: 
Trained. Testing... 

Accuracy Score: 0.5001747946163259 

Lap 5: 
Trained. Testing... 

Accuracy Score: 0.5021849327040727 

Average: 0.49975528753714393, Std: 0.0032679582021713353
