In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [8]:
def prepare_data(lags, sub_lags, vol_lags, scaler):
    
    if scaler == 0:
        my_scaler = MaxAbsScaler()
    elif scaler == 1:
        my_scaler = MinMaxScaler()
    elif scaler == 2:
        my_scaler = StandardScaler()
    else:
        my_scaler = MaxAbsScaler()

    #Import target currency
    data = pd.read_csv("ETHUSDT-1d.csv", index_col="Open Time")
    data = data[["Close","Volumn"]]
    data["return"] = data["Close"].div(data["Close"].shift(1))
    data["log_return"] = np.log(data["return"])
    data["direction"] = np.sign(data["log_return"])


    cols = []
    
    for lag in range(1,lags+1):
        col = "lag{}".format(lag)
        data[col] = data["log_return"].shift(lag)
        cols.append(col)

    #Add volumn info
    data["log_volumn_change"] = np.log(data["Volumn"].div(data["Volumn"].shift(1)))
    for lag in range(1,vol_lags+1):
        col = "volumn lag{}".format(lag)
        data[col] = data["log_volumn_change"].shift(lag)
        cols.append(col)

    #Import leading/correlated currency
    data_btc = pd.read_csv("BTCUSDT-1d.csv", index_col="Open Time")
    data_btc = data_btc[["Close"]]
    data_btc.columns = ["BTC Close"]
    data_btc["BTC return"] = data_btc["BTC Close"].div(data_btc["BTC Close"].shift(1))
    data_btc["BTC log_return"] = np.log(data_btc["BTC return"])

    for lag in range(1,sub_lags+1):
        col = "btc lag{}".format(lag)
        data[col] = data_btc["BTC log_return"].shift(lag)
        cols.append(col)

    #Merge target and correlated currencies info to a big dataframe
    merged_df = pd.concat([data, data_btc], join="inner", axis = 1)
    merged_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    merged_df.dropna(inplace=True)

    merged_df.to_csv("merged.csv")


    #Nomarlized data
    normalized_cols = []
    for col in cols:
        # print("Nomalizing col: {}".format(col))
        normalized_col = "nom " + col
        array = merged_df[col].to_frame()
        try:
            my_scaler.fit(array)
            merged_df[normalized_col] = my_scaler.transform(array)
            normalized_cols.append(normalized_col)
        except:
            print("An exception occured")

    return merged_df, normalized_cols



In [9]:

def test_svm(data, cols, lap_number):
    accuracy_scores =[]
    clf = svm.SVC()
    for i in range(0,lap_number):
        print("Lap {}: ".format(i+1))
        x_train, x_test, y_train, y_test = train_test_split(data[cols],data.direction,test_size=0.3)
        clf.fit(X = x_train, y = y_train)
        print("Trained. Testing...")
        y_pred = clf.predict(X = x_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracy_scores.append(accuracy)
        print("Accuracy Score: {}".format(accuracy))
    score_array = np.array(accuracy_scores)
    mean = score_array.mean()
    std = score_array.std()
    print("Average: {}, Std: {}".format(mean,std))
    return mean, std

In [10]:
from itertools import product

def prepare_and_run_svm(lags,sub_lags=0,vol_lags=0,scaler=0,lap_number=5):
    print("Lags={}, Sub_lags={}, Vol_lags={}, Scaler={}".format(lags,sub_lags,vol_lags,scaler))
    (data, cols) = prepare_data(lags=lags,sub_lags=sub_lags,vol_lags=vol_lags,scaler=scaler)
    (mean, std) = test_svm(data=data,cols=cols,lap_number=lap_number)
    return(mean,std)

def optimize_svm(args):

    lag_range = range(1,args[0])
    sub_lag_range = range(0,args[1])
    vol_lag_range = range(0,args[2])
    scaler_range = range(0,args[3])
    lap_number = args[4]

    combs = list(product(lag_range,sub_lag_range,vol_lag_range,scaler_range))

    results = []
    
    for comb in combs:
        (lags, sub_lags, vol_lags, scaler) = comb
        (mean, std) = prepare_and_run_svm(lags=lags, sub_lags=sub_lags, vol_lags=vol_lags,scaler=scaler,lap_number=lap_number)
        result = (lags,sub_lags,vol_lags,scaler,lap_number,mean,std)
        results.append(result)
        
    return results
    




In [None]:
max_lags = 12
max_sub_lags = 6
max_vol_lags = 3
max_scaler = 3
lap_number = 3

args = [max_lags,max_sub_lags,max_vol_lags,max_scaler,lap_number]

results = optimize_svm(args=args)



In [11]:
prepare_and_run_svm(lags = 16)

Lags=16, Sub_lags=0, Vol_lags=0, Scaler=0
Lap 1: 
Trained. Testing...
Accuracy Score: 0.5034246575342466
Lap 2: 
Trained. Testing...
Accuracy Score: 0.535958904109589
Lap 3: 
Trained. Testing...
Accuracy Score: 0.5136986301369864
Lap 4: 
Trained. Testing...
Accuracy Score: 0.4828767123287671
Lap 5: 
Trained. Testing...
Accuracy Score: 0.5154109589041096
Average: 0.5102739726027397, Std: 0.01729367283966109


(0.5102739726027397, 0.01729367283966109)