In [86]:
import pandas as pd
from pandas import DataFrame
import requests
import os
import os.path
import datetime as dt
import json
import numpy as np
from datetime import timedelta, datetime
from dateutil import parser
import math
%matplotlib inline
from matplotlib import pyplot as plt

from pathlib import Path
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima_model import ARMA

from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings('ignore')

In [87]:
# Load environment variables 
binance_api_key = os.getenv("BINANCE_API_KEY")
binance_secret_key= os.getenv("BINANCE_SECRET_KEY")


In [88]:
type(binance_api_key)

str

In [89]:
type(binance_secret_key)

str

In [90]:
# Set up client 
from binance.client import Client
binance_client = Client(api_key=binance_api_key, api_secret=binance_secret_key)

# Constants
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750

# Create collecting new data function
def minutes_of_new_data(symbol, kline_size, data, source):
    if len(data) > 0:  old = parser.parse(data["timestamp"].iloc[-1])
    elif source == "binance": old = datetime.strptime('28 Aug 2020', '%d %b %Y')#<-- Can change start date here
    if source == "binance": new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')
    return old, new

# Create function to retreive tickers and timestamps
def get_all_binance(symbol, kline_size, save = False):
    filename = '%s-%s-data.csv' % (symbol, kline_size)
    if os.path.isfile(filename): data_df = pd.read_csv(filename)
    else: data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "binance")
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[kline_size])
    if oldest_point == datetime.strptime('1 Jan 2015', '%d %b %Y'): print('Downloading all available %s data for %s. Be patient..!' % (kline_size, symbol))
    else: print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol, available_data, kline_size))
    klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(klines, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else: data_df = data
    data_df.set_index('timestamp', inplace=True)
    if save: data_df.to_csv(filename)
    print('All caught up..!')
    return data_df

In [91]:
symbols = ['ETHUSDT', 'LINKUSDT','LTCUSDT', 'EOSUSDT']

In [92]:
price_data = []
for symbol in symbols:
    prices=get_all_binance(symbol, kline_size='1m')
    prices=prices.loc[:,['close']]
    price_data.append(prices.assign(coin=symbol))

coins = pd.concat(price_data)
coins

Downloading 986 minutes of new data available for ETHUSDT, i.e. 986 instances of 1m data.
All caught up..!
Downloading 986 minutes of new data available for LINKUSDT, i.e. 986 instances of 1m data.
All caught up..!
Downloading 986 minutes of new data available for LTCUSDT, i.e. 986 instances of 1m data.
All caught up..!
Downloading 986 minutes of new data available for EOSUSDT, i.e. 986 instances of 1m data.
All caught up..!


Unnamed: 0_level_0,close,coin
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-08-28 00:00:00,383.69000000,ETHUSDT
2020-08-28 00:01:00,382.90000000,ETHUSDT
2020-08-28 00:02:00,381.72000000,ETHUSDT
2020-08-28 00:03:00,381.75000000,ETHUSDT
2020-08-28 00:04:00,381.45000000,ETHUSDT
2020-08-28 00:05:00,381.38000000,ETHUSDT
2020-08-28 00:06:00,381.57000000,ETHUSDT
2020-08-28 00:07:00,381.66000000,ETHUSDT
2020-08-28 00:08:00,381.25000000,ETHUSDT
2020-08-28 00:09:00,381.06000000,ETHUSDT


In [93]:
coins.index.name = None 

In [94]:
coins.head()

Unnamed: 0,close,coin
2020-08-28 00:00:00,383.69,ETHUSDT
2020-08-28 00:01:00,382.9,ETHUSDT
2020-08-28 00:02:00,381.72,ETHUSDT
2020-08-28 00:03:00,381.75,ETHUSDT
2020-08-28 00:04:00,381.45,ETHUSDT


In [95]:
coins = coins.pivot(columns='coin')

In [96]:
coins.head()

Unnamed: 0_level_0,close,close,close,close
coin,EOSUSDT,ETHUSDT,LINKUSDT,LTCUSDT
2020-08-28 00:00:00,2.9813,383.69,14.5358,56.17
2020-08-28 00:01:00,2.9759,382.9,14.5214,56.07
2020-08-28 00:02:00,2.9668,381.72,14.4291,55.93
2020-08-28 00:03:00,2.9674,381.75,14.3985,55.88
2020-08-28 00:04:00,2.9659,381.45,14.3945,55.81


In [97]:
coins.columns = ['EOSUSDT','ETHUSDT','LINKUSDT','LTCUSDT']

In [98]:
coins.dtypes

EOSUSDT     object
ETHUSDT     object
LINKUSDT    object
LTCUSDT     object
dtype: object

In [99]:
coins=coins.apply(pd.to_numeric)

In [100]:
df1 = coins

In [101]:
df1.head()

Unnamed: 0,EOSUSDT,ETHUSDT,LINKUSDT,LTCUSDT
2020-08-28 00:00:00,2.9813,383.69,14.5358,56.17
2020-08-28 00:01:00,2.9759,382.9,14.5214,56.07
2020-08-28 00:02:00,2.9668,381.72,14.4291,55.93
2020-08-28 00:03:00,2.9674,381.75,14.3985,55.88
2020-08-28 00:04:00,2.9659,381.45,14.3945,55.81


In [102]:
df1.tail()

Unnamed: 0,EOSUSDT,ETHUSDT,LINKUSDT,LTCUSDT
2020-08-28 16:22:00,3.1553,396.1,15.4188,58.07
2020-08-28 16:23:00,3.1519,395.86,15.3982,58.0
2020-08-28 16:24:00,3.1491,395.67,15.3896,57.97
2020-08-28 16:25:00,3.1451,395.85,15.4035,58.03
2020-08-28 16:26:00,3.1442,395.68,15.442,58.0


In [103]:
independent = 'ETHUSDT'
dependent = 'LTCUSDT'

In [104]:
def generate_signals(df,independent,dependent,multiplier):
    df = df.pct_change()
    df = df.dropna()
    # define forecasting sample
    #f_sample = 50
    #df_insample = pd.DataFrame()
    #S = []
    residuals2_mean = []
    residuals2_std = []
    #for n in range(f_sample): #f_sample is the out-of-sample data
    #df_insample = df.iloc[0 : len(df)] #################################
    #X = df.ETHUSDT.values.reshape(-1, 1)
    #y = df.LTCUSDT
    X = df[independent].values.reshape(-1, 1)
    y = df[dependent]
    model = LinearRegression()
    model.fit(X, y)
    predicted_y_values = model.predict(X)
    residuals = df.loc[:,'ETHUSDT'] - predicted_y_values
    model_AR = ARMA(residuals.values, order = (1,0))
    results = model_AR.fit()
    a = results.params[0]
    b = results.params[1]
    kappa = -np.log(b)
    m = a / (1 - np.exp(-kappa))
    residuals_shifted = residuals.shift()
    residuals2 = residuals - (a + b*residuals_shifted)
    #residuals2_mean.append(residuals2.mean())
    #residuals2_std.append(residuals2.std())
    sigma_epsilon = residuals2.std()
    #sigma = sigma_epsilon / np.sqrt( (1 - np.exp(-2*kappa))/(2*kappa))
    #S.append(- m*np.sqrt(2*kappa)/sigma)
    #print(len(residuals2))
    df_results = pd.DataFrame()
    df_results['signal'] = np.where(residuals2 > multiplier*sigma_epsilon, -1.0, 0.0)
    df_results['signal'] = np.where(residuals2 < -multiplier*sigma_epsilon, 1.0, 0.0)
    return df_results['signal'].iloc[-1]

In [112]:
generate_signals(df,independent,dependent,1.2)

0.0

In [106]:
position = generate_signals(df,independent,dependent,1.2)
if position == 1:
    print('sell dependent, buy independent')
elif position == -1:
    print('buy dependent, sell independent')
else:
    print('hold')

hold
