In [1]:
# Imports
import os
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt

In [2]:
# Read 'hourly' csv files
csv_files = os.listdir("data")
hourly_csv_files = [f for f in csv_files if '_1h.csv' in f]
hourlies = []
for i in range(len(hourly_csv_files)):
    hourlies.append(pd.read_csv('data/' + hourly_csv_files[i], header=1))

In [3]:
# Train Data
closes = {}
for i in range(len(hourlies)):
    temp = hourlies[i][hourlies[i]['date'] >= '2021-10-15 00:00:00']
    temp = temp[temp['date'] < '2021-12-01 00:00:00']
    if i == 0:
        closes['DATE'] = np.flip(temp['date'].values)
    closes[temp['symbol'].values[0]] = np.flip(temp['close'].values)

# Create the dataframe
df = pd.DataFrame(closes)
df = df.reindex(sorted(df.columns), axis=1)
cols = df.columns.tolist()
cols.remove('DATE')
cols = ['DATE'] + cols
df = df[cols]

# Normalize
train_df = df.copy()
norm_factors = {} # to remember normalizing factors
for c in train_df.columns.values:
    if c != 'DATE':
        norm_factors[c] = [train_df[c][0]]
        train_df[c] = train_df[c] / train_df[c][0]
train_df

Unnamed: 0,DATE,ADA/USDT,BNB/USDT,BTC/USDT,BTT/USDT,DASH/USDT,DOGE/USDT,EOS/USDT,ETC/USDT,ETH/USDT,LINK/USDT,LTC/USDT,NEO/USDT,QTUM/USDT,TRX/USDT,XLM/USDT,XMR/USDT,XRP/USDT,ZEC/USDT
0,2021-10-15 00:00:00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
1,2021-10-15 01:00:00,0.999535,0.993143,1.003650,0.998375,0.997287,0.998258,0.998242,0.996647,0.995459,0.996575,0.996641,0.997740,1.005824,1.000208,0.999170,0.998101,1.000628,0.993377
2,2021-10-15 02:00:00,1.012082,1.016070,1.035697,1.015980,1.018991,1.016986,1.019117,1.022541,1.017132,1.017504,1.027996,1.020570,1.027646,1.016460,1.018534,1.016711,1.019483,1.009106
3,2021-10-15 03:00:00,1.010688,0.997643,1.041741,1.014626,1.020076,1.016551,1.021094,1.036326,1.014583,1.020928,1.033035,1.021926,1.030520,1.018023,1.022130,1.021269,1.019573,1.012417
4,2021-10-15 04:00:00,1.020911,0.998714,1.044650,1.015710,1.026044,1.012195,1.030762,1.040611,1.015013,1.018265,1.036954,1.025542,1.031840,1.017919,1.017427,1.023547,1.019752,1.017384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123,2021-11-30 19:00:00,0.730483,1.335547,1.010160,0.859697,0.985350,0.943380,0.887278,0.909277,1.238245,0.988965,1.182531,0.860081,1.281898,1.016981,0.946058,0.932017,0.903843,1.855132
1124,2021-11-30 20:00:00,0.726301,1.333190,1.006136,0.860238,0.980467,0.939460,0.886838,0.903502,1.233149,0.985921,1.174692,0.855561,1.279491,1.013751,0.943015,0.922522,0.901508,1.855132
1125,2021-11-30 21:00:00,0.727230,1.328477,1.000831,0.857259,0.975583,0.938589,0.882443,0.906297,1.228306,0.977169,1.170773,0.852170,1.297119,1.009063,0.936653,0.907330,0.897378,1.834437
1126,2021-11-30 22:00:00,0.725372,1.341547,1.003813,0.859697,0.982094,0.942509,0.887278,0.908905,1.235473,0.976408,1.172452,0.853752,1.273200,1.011876,0.938313,0.912647,0.902047,1.846854


In [4]:
# Test Data
closes = {}
for i in range(len(hourlies)):
    temp = hourlies[i][hourlies[i]['date'] >= '2021-12-01 00:00:00']
    temp = temp[temp['date'] < '2022-01-16 00:00:00']
    if i == 0:
        closes['DATE'] = np.flip(temp['date'].values)
    closes[temp['symbol'].values[0]] = np.flip(temp['close'].values)

# Create the dataframe
df = pd.DataFrame(closes)
df = df.reindex(sorted(df.columns), axis=1)
cols = df.columns.tolist()
cols.remove('DATE')
cols = ['DATE'] + cols
df = df[cols]

# Normalize
test_df = df.copy()
for c in test_df.columns.values:
    if c != 'DATE':
        norm_factors[c].append(test_df[c][0])
        test_df[c] = test_df[c] / test_df[c][0]
test_df

Unnamed: 0,DATE,ADA/USDT,BNB/USDT,BTC/USDT,BTT/USDT,DASH/USDT,DOGE/USDT,EOS/USDT,ETC/USDT,ETH/USDT,LINK/USDT,LTC/USDT,NEO/USDT,QTUM/USDT,TRX/USDT,XLM/USDT,XMR/USDT,XRP/USDT,ZEC/USDT
0,2021-12-01 00:00:00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
1,2021-12-01 01:00:00,0.994278,0.998412,0.989883,0.994984,0.990675,0.996295,0.995799,0.999183,0.992940,1.002333,0.992877,0.997361,1.000434,0.998871,1.002349,1.006276,0.992957,0.992088
2,2021-12-01 02:00:00,1.004450,1.005083,0.994520,0.996865,0.989578,1.001853,1.002965,1.008786,1.005290,1.006610,1.001899,1.001320,1.013026,1.002258,1.010276,1.008368,0.998710,0.981538
3,2021-12-01 03:00:00,1.007629,1.003971,0.996265,0.998746,0.993417,1.000926,1.004695,1.009399,1.007504,1.012830,1.009022,1.004751,1.018732,1.004413,1.012038,1.012552,1.003769,0.985934
4,2021-12-01 04:00:00,1.002543,1.001271,0.990922,0.998119,0.988481,0.996295,0.997776,0.999591,1.000620,1.009331,1.002849,1.001056,1.000496,1.006157,1.008514,1.007531,0.998810,0.978901
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,2022-01-15 19:00:00,0.825811,0.791296,0.756001,0.799060,0.774547,0.867068,0.725229,0.677973,0.709019,0.998056,0.708927,0.672473,0.512219,0.709082,0.770112,0.914226,0.778097,0.638681
1100,2022-01-15 20:00:00,0.829625,0.790343,0.755249,0.799373,0.774547,0.865215,0.723005,0.676951,0.709377,0.990669,0.704653,0.671945,0.512033,0.707440,0.769524,0.912134,0.779883,0.637363
1101,2022-01-15 21:00:00,0.831532,0.785419,0.751979,0.794984,0.774547,0.862436,0.719298,0.673478,0.705628,0.986781,0.701804,0.671681,0.511289,0.704156,0.765414,0.910042,0.776312,0.634725
1102,2022-01-15 22:00:00,0.824539,0.787643,0.750948,0.793103,0.776193,0.855489,0.721522,0.673682,0.706532,0.987947,0.701804,0.674056,0.511413,0.704977,0.763359,0.911715,0.775221,0.635165


In [5]:
# Settings
INITIAL_BUDGET = 10000
COMMISSION_RATE = 0.001  #  % 0.1
PARAMETER = 'SPREAD'

In [6]:
# Log Book
logs = {
    'date':[],
    'long':[],
    'short':[],
    'long_price':[],
    'short_price':[],
    'long_amount':[],
    'short_amount':[],
    'budget':[],
}

In [7]:
# Define trade function (buy&sell)
def trade(idx:int, long:str, short:str, long_amount:float, short_amount:float, is_train:bool):
    global logs, budget, tracker
    date = test_df.iloc[idx]['DATE']
    long_price = (train_df.iloc[idx][long] * norm_factors[long][0]) if is_train else (test_df.iloc[idx][long] * norm_factors[long][1])
    short_price = (train_df.iloc[idx][short] * norm_factors[short][0]) if is_train else (test_df.iloc[idx][short] * norm_factors[short][1])

    

    logs['date'].append(date)
    logs['long'].append(long)
    logs['short'].append(short)
    logs['long_price'].append(long_price)
    logs['short_price'].append(short_price)
    logs['long_amount'].append(long_amount)
    logs['short_amount'].append(short_amount)

    tracker[long] += long_amount
    tracker[short] -= short_amount

    print("tracker: ", tracker)
    tracker_value = (tracker[long] * long_price) + (tracker[short] * short_price)
    print("tracker_value:",tracker_value)

    budget = budget - (long_amount*long_price) + (short_amount*short_price) - (COMMISSION_RATE * (long_amount*long_price)) - (COMMISSION_RATE * (short_amount*short_price)) + tracker_value
    logs['budget'].append(budget)

In [8]:
# ETH-EOS Simulation as in slides
budget = INITIAL_BUDGET
tracker = {"ETH/USDT": 0, "EOS/USDT": 0}

trade(idx=test_df[ test_df['DATE'] == '2021-12-01 20:00:00' ].index[0], long='ETH/USDT', short='EOS/USDT', long_amount=2.187671083, short_amount=2511.312217, is_train=False)
trade(idx=test_df[ test_df['DATE'] == '2021-12-04 04:00:00' ].index[0], long='EOS/USDT', short='ETH/USDT', long_amount=3164.003605, short_amount=2.600570322, is_train=False)
trade(idx=test_df[ test_df['DATE'] == '2021-12-08 13:00:00' ].index[0], long='ETH/USDT', short='EOS/USDT', long_amount=2.611503573, short_amount=2963.459424, is_train=False)
trade(idx=test_df[ test_df['DATE'] == '2021-12-08 16:00:00' ].index[0], long='EOS/USDT', short='ETH/USDT', long_amount=3143.556816, short_amount=2.649008604, is_train=False)


log_df = pd.DataFrame(logs)
log_df

tracker:  {'ETH/USDT': 2.187671083, 'EOS/USDT': -2511.312217}
tracker_value: 1.293497916776687e-06
tracker:  {'ETH/USDT': -0.4128992389999997, 'EOS/USDT': 652.6913879999997}
tracker_value: 496.55451473382027
tracker:  {'ETH/USDT': 2.198604334, 'EOS/USDT': -2310.7680360000004}
tracker_value: 696.621148980561
tracker:  {'ETH/USDT': -0.45040426999999994, 'EOS/USDT': 832.7887799999994}
tracker_value: 1107.2811420099983


Unnamed: 0,date,long,short,long_price,short_price,long_amount,short_amount,budget
0,2021-12-01 20:00:00,ETH/USDT,EOS/USDT,4566.5,3.978,2.187671,2511.312217,9980.02
1,2021-12-04 04:00:00,EOS/USDT,ETH/USDT,3.303,4018.62,3164.003605,2.60057,10455.673107
2,2021-12-08 13:00:00,ETH/USDT,EOS/USDT,4292.84,3.783,2.611504,2963.459424,11129.872725
3,2021-12-08 16:00:00,EOS/USDT,ETH/USDT,3.712,4405.0,3143.556816,2.649009,12213.8161


^^^ Results within the table above does not match with the results in page 62 of the slides ^^^

- I tried with&without commision, neither matched.


---

TODO: Genetic Algorithm

In [9]:
# TODO: Need a genetic algorithm trained on train dataset; that signals --->
# "0) Do Nothing"   OR   "1) Long first coin, Short second coin"   OR   "2) Long second coin, short first coin"

# Then,

# For each pair:
#   Pairs Trading (Buy & Sell)

if False: # When Completed: ===>>> Delete if False & Untab
    coins = test_df.columns.tolist()
    coins.remove('DATE')
    for c1 in range(len(coins)-1):
        for c2 in range(c1+1, len(coins)):
            # For each coin pair
            budget = INITIAL_BUDGET # Initialize budget
            tracker = { coins[c1]: 0, coins[c2]: 0 } # Keep track of the coins in the account
            for i in range(test_df.shape[0]):

                signal = None # TODO: Signal of the genetic algotihm
                
                if signal == 1: # Long first coin, Short second coin
                    trade(idx=i, long=coins[c1], short=coins[c2], long_amount=None, short_amount=None, is_train=False) # TODO: long&short_amount from <-- genetic alg.
                elif signal == 2: # Long second coin, Short first coin
                    trade(idx=i, long=coins[c2], short=coins[c1], long_amount=None, short_amount=None, is_train=False) # TODO: long&short_amount from <-- genetic alg.