# Data Collection

In [5]:
import pandas as pd
import numpy as np
from binance.client import Client
import datetime as dt
import matplotlib.pyplot as plt
import statistics

# client configuration - keys not needed here since using free api
api_key = 'API HERE'
api_secret = 'SECRET API HERE'
client = Client(api_key, api_secret, tld='us')

In [6]:
# get dataframe of many metrics for a crypto
def get_crypto_data(symbol, interval, data_start_date, data_end_date):
  klines = client.get_historical_klines(symbol, interval, data_start_date, data_end_date)
  data = pd.DataFrame(klines)
  data.columns = ['open_time','open', 'high', 'low', 'close', 'volume','close_time', 'qav','num_trades','taker_base_vol','taker_quote_vol','ignore']
  data.index = [dt.datetime.fromtimestamp(x/1000.0) for x in data.open_time] # date formating
  df=data.astype(float)
  return df[['open','high','low','close','volume','num_trades']]

# get dataframe of closing prices for a crypto
def get_closing_prices(symbol, interval, data_start_date, data_end_date):
  klines = client.get_historical_klines(symbol, interval, data_start_date, data_end_date)
  data = pd.DataFrame(klines)
  data.columns = ['open_time','open', 'high', 'low', 'close', 'volume','close_time', 'qav','num_trades','taker_base_vol','taker_quote_vol','ignore']
  data.index = [dt.datetime.fromtimestamp(x/1000.0) for x in data.open_time] # date formating
  df=data.astype(float)
  return df[['close']]

# Basic Model

In [None]:
# return list of prices for each token
def get_prices(tokens):
  data_start_date = "26 Aug, 2022"
  data_end_date = "26 Nov, 2022"

  # all tokens must have equal amount of price data
  out = {}
  df = get_closing_prices(tokens[0], '1m', data_start_date, data_end_date)
  amount_data = len(df.index)
  out[tokens[0]] = df['close'].tolist()

  i = 1
  for i in range(len(tokens)):
    df = get_closing_prices(tokens[i], '1m', data_start_date, data_end_date)
    if amount_data == len(df.index):
      out[tokens[i]] = df['close'].tolist()
    else:
      print(f"Not enought data, excluding {tokens[i]}")
  return out

# get prices data
tokens = ['BTCUSDT','ETHUSDT','DOTUSDT','DOGEUSDT','XRPUSDT','BNBUSDT','MATICUSDT','ADAUSDT','TRXUSDT','AVAXUSDT','NEARUSDT']
tokens = ['BTCUSDT','ETHUSDT']
prices = get_prices(tokens)

# find correlations of prices between many different tokens
corrs = []
for r in range(len(tokens)):
  corr_row = []
  for c in range(len(tokens)):
    corr_row += [np.corrcoef(prices[tokens[r]], prices[tokens[c]])[0][1]]
  corrs += [corr_row]
corrs_df = pd.DataFrame(corrs)
corrs_df.columns = tokens
corrs_df.index = tokens
corrs_df

In [39]:
# functions

# given: past data for token1 and token2 in 1 min increments
# returns: True if should buy token1, False to buy nothing
def get_signal(t1, t2, a):
  assert(len(t1) == len(t2))

  change1 = (t1[-1] - t1[0]) / t1[0]
  change2 = (t2[-1] - t2[0]) / t2[0]

  if change1 < -a and change2 > a:
    return True
  return False

# list of returns obtained using signal
def get_returns(token1, token2, a, lookback, hold):
  prices1 = prices[token1]
  prices2 = prices[token2]
  assert len(prices1) == len(prices2), "list of price data must be equal length"

  i = lookback
  returns = []
  while i <= len(prices1) - hold:
    # buy at i-1?
    signal = get_signal(prices1[i-lookback:i], prices2[i-lookback:i], a)
    if signal == 1:
      returns += [(prices1[i+hold-1]-prices1[i-1])/prices1[i-1] - 0.001] # binance fee
    i += 1

  return returns

# total return using signal
def total_return(token1, token2, a, lookback, hold):
  prices1 = prices[token1]
  prices2 = prices[token2]
  assert len(prices1) == len(prices2), "list of price data must be equal length"

  i = lookback
  total = 0
  while i <= len(prices1) - hold:
    # buy at i-1?
    signal = get_signal(prices1[i-lookback:i], prices2[i-lookback:i], a)
    if signal == 1:
      total += (prices1[i+hold-1]-prices1[i-1])/prices1[i-1] - 0.001 # binance fee
    i += 1

  return total

# get number of signals present
def num_signals(token1, token2, a, lookback, hold):
  prices1 = prices[token1]
  prices2 = prices[token2]
  assert len(prices1) == len(prices2), "list of price data must be equal length"

  i = lookback
  total = 0
  while i <= len(prices1) - hold:
    # buy at i-1?
    signal = get_signal(prices1[i-lookback:i], prices2[i-lookback:i], a)
    if signal == 1:
      total += 1
    i += 1

  return total

In [24]:
# to decide: optimal a, time to look back, time to hold in the future

alphas = list(np.arange(0., 0.01, 0.0005))
lookbacks = list(range(5, 120, 5))
holds = list(range(5, 120, 5))

returns = []
best = (0, -1, -1, -1)
for a in alphas:
  for l in lookbacks:
    for h in holds:
      check = (total_return('BTCUSDT', 'ETHUSDT', a, l, h), a, l, h)
      if check > best:
        best = check

print(best) # best is (0.10995056448839616, 0.0015, 60, 65)

(0.10995056448839616, 0.0015, 60, 65)


In [42]:
# use optimal weights to inspect variance
ret, alpha, l, h = best # (0.10995056448839616, 0.0015, 60, 65)

num_signals_best = num_signals('BTCUSDT', 'ETHUSDT', alpha, l, h)
returns = get_returns('BTCUSDT', 'ETHUSDT', alpha, l, h)
total_ret = total_return('BTCUSDT', 'ETHUSDT', alpha, l, h)
positive = len(list(filter(lambda x: x >= 0, returns)))
sd = np.var(returns) ** 0.5

print(f"Num signals. :  {num_signals_best}")
print(f"Num pos sign :  {positive}")
print(f"Tot return   :  {total_ret}")
print(f"Standard dev :  {sd}")

Num signals. :  1402
Num pos sign :  658
Tot return   :  0.011602181836971207
Standard dev :  0.007934591991312384


# Multi Model


In [13]:
# return list of prices for each token
def get_prices(tokens):
  data_start_date = "26 Nov, 2021"
  data_end_date = "26 Nov, 2022"

  # all tokens must have equal amount of price data
  out = {}
  df = get_closing_prices(tokens[0], '15m', data_start_date, data_end_date)
  amount_data = len(df.index)
  out[tokens[0]] = df['close'].tolist()

  i = 1
  for i in range(len(tokens)):
    df = get_closing_prices(tokens[i], '15m', data_start_date, data_end_date)
    if amount_data == len(df.index):
      out[tokens[i]] = df['close'].tolist()
    else:
      print(f"Not enought data, excluding {tokens[i]}")
  return out

# get prices data
tokens = ['BTCUSDT','ETHUSDT','DOTUSDT','DOGEUSDT','XRPUSDT','BNBUSDT','MATICUSDT','ADAUSDT','TRXUSDT','AVAXUSDT','NEARUSDT']
prices = get_prices(tokens)

# given: past data for token1 and token2 in 15 min increments
# returns: True if should buy token1, False to buy nothing
def get_signal(t1, t_others, a):
  change1 = (t1[-1] - t1[0]) / t1[0]
  changes = []
  for t2 in t_others:
    changes += [(t2[-1] - t2[0]) / t2[0]]
  change2 = sum(changes) / len(changes)

  if change1 < -a and change2 > a:
    return True
  return False

# list of returns obtained using signal
def get_returns(token1, others, a, lookback, hold):
  prices1 = prices[token1]
  prices_others = []
  for other in others:
    prices_others += [prices[other]]
    assert len(prices1) == len(prices[other]), "list of price data must be equal length"

  i = lookback
  returns = []
  while i <= len(prices1) - hold:
    # buy at i-1?
    signal = get_signal(prices1[i-lookback:i], list(map(lambda x: x[i-lookback:i],prices_others)), a)
    if signal == 1:
      returns += [(prices1[i+hold-1]-prices1[i-1])/prices1[i-1] - 0.001] # binance fee
    i += 1

  return returns

# total return using signal
def total_return(token1, others, a, lookback, hold):
  return sum(get_returns(token1, others, a, lookback, hold))

# get number of signals present
def num_signals(token1, others, a, lookback, hold):
  return len(get_returns(token1, others, a, lookback, hold))

In [10]:
# to decide: optimal a, time to look back, time to hold in the future

alphas = list(np.arange(0., 0.01, 0.0005))
lookbacks = list(range(1, 9))
holds = list(range(1, 9))
others = ['ETHUSDT','DOTUSDT','DOGEUSDT','XRPUSDT','BNBUSDT','MATICUSDT','ADAUSDT','TRXUSDT','AVAXUSDT','NEARUSDT']

returns = []
best = (0, -1, -1, -1)
for a in alphas:
  for l in lookbacks:
    for h in holds:
      
      check = (total_return('BTCUSDT', others, a, l, h), a, l, h)
      if check > best:
        best = check

print(best) # best is (0.09179482926122871, 0.0035, 7, 3)

(0.09179482926122871, 0.0035, 7, 3)


In [15]:
# use optimal weights to inspect variance
ret, alpha, l, h = (0.09179482926122871, 0.0035, 4, 4)

num_signals_best = num_signals('BTCUSDT', others, alpha, l, h)
returns = get_returns('BTCUSDT', others, alpha, l, h)
total_ret = total_return('BTCUSDT', others, alpha, l, h)
positive = len(list(filter(lambda x: x >= 0, returns)))
sd = np.var(returns) ** 0.5

print(f"Num signals. :  {num_signals_best}")
print(f"Num pos sign :  {positive}")
print(f"Tot return   :  {total_ret}")
print(f"Standard dev :  {sd}")
print(returns)

Num signals. :  41
Num pos sign :  24
Tot return   :  0.014577789699355425
Standard dev :  0.011908985315088232
[-0.011071428312967427, 0.006569294000119456, 0.011929946996447657, 0.007144231628427433, 0.008543145576396623, 0.0011358771474355924, 0.0028321770325903336, 0.0007402311073360581, 0.00038921813519718807, -0.0003053285071038539, -0.020849582710340243, -0.008212768230088993, -0.014670286680217332, 0.005501593072982752, 0.007848675114192825, -0.004218705126785639, 0.006419833797130423, -0.004066623884254051, -0.023580878057331323, -0.01638692789136529, 0.02085874533341953, 0.0015287773528304717, 0.0073407974532613265, -0.004273836996648434, 0.009911624493330634, 0.040766132150019084, 0.0019446423382800029, 0.010254009909753822, 0.009991595492924985, -0.0039348478275362635, 0.004217533331720183, 0.00758885501396971, -0.007639854057000398, 0.002115439317914311, -0.0015148626474823222, -0.0004489103384317097, -0.03030715780977449, 0.0008503306862840537, -0.010619053389512773, 0.00