In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("/Users/Jimo/Python Scripts/Crypto_Currency_Trading")

import re
import numpy as np
import pandas as pd
import scipy.stats as scs

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

from Utils.Data import Feeder
from Utils.Metrics import Metrics
from Utils.Trading_Slm import calc_pattern_freq, create_unique_price_data_df, tune_execute_slm

  from numpy.core.umath_tests import inner1d


## Trading Strategy Based on Statistical Language Modeling (SLM)

Use depth data to predict, since it is bid-ask market data. 
Trade data is decision or actions of other competitiors. It is decisions rather than reasons leading to the decisions. 

In our stragegy, we focus on bid 1 (ask 1) price. 
Our trading size should also be within ask 1 (bid 1) size. Demand size must be within offer size. 

Reason: 
    1. In actual trading, bid 1 and ask 1 are closest to demand of the opposite side. 
    2. They are easiest and earliest to be absorbed.
    3. We don't know when bid 2 or ask 2 will be exposed to us. 
    4. If prediction is based on level 1 price, then decision should be within based on the same level. 

Series length will be between 5 to 8. 
This could be set as a parameter in our eventual code. 

Reason: 
    1. If too short, then it doesn't contain enough historical information and thus is not convincing enough. 
    2. If too long, then there are too many scenarios to store and the chance that a series in not recored in our "Database" will be larger. 
    It is possible that one series never showed up in history. Reducing length of series to increase the frequency of each series. 

BIT coin markets disallow short operations. 
We will not short in our strategy. 

Codes for directions: 
    1. P(i) < P(i-1), s(i) = -1
    2. P(i) > P(i-1), s(i) = 1
    3. P(i)==P(i-1), s(i) = 1 (if according to paper) or 0 (if according to our intuition). 

Since most adjacent seconds have the same mid price, just much fewer adjacent seconds have different mid prices, it is better to combine equal prices to either 1 or -1. 
For now, let's combine equal price with lower price. The reason is: 
    1. When we hold no position, prediction of equal price at next second will not lead to buy. This is the same as prediction of lower price.
    2. When we hold long position, if next second price will be equal or lower, we can sell and make money. 
    3. Bitcoin disallow short position. 

Basic trading strategy is described below. 
    1. We hold no position: 
        - If predicting higher price: buy 
        - If predicting lower price: no action
    2. We hold long positions
        - If predicting higher price: no action
        - If predicting lower price: sell all our position
    3. There is no short scenario since BIT disallows short operation. 

After googling, I think shorting operation exists for BIT coins. So refined strategy is described below. 
    1. We hold no position: 
        - If predicting higher price: buy 
        - If predicting lower price: short
    2. We hold long positions
        - If predicting higher price: no action
        - If predicting lower price: close long position, enter short position
    3. We hold short positions
        - If predicting higher price: close short position, enter long position
        - If predicting lower price: no action

### Step 1: Load Data

In [2]:
Depth_data = Feeder("../Data/BINANCE_SPOT_BTC_USDT_01012018_depth.csv").get()
Quote_data = Feeder("../Data/BINANCE_SPOT_BTC_USDT_01012018_quote.csv").get()
Trade_data = Feeder("../Data/BINANCE_SPOT_BTC_USDT_01012018_trade.csv").get()

In [3]:
bid_ask_df = Depth_data.filter(regex="(ask|bid)[1]_price").reset_index()
bid_ask_df.head()

Unnamed: 0,time_exchange,bid1_price,ask1_price
0,2018-01-01 00:00:00,13681.02,13716.36
1,2018-01-01 00:00:01,13681.02,13716.36
2,2018-01-01 00:00:02,13681.02,13716.36
3,2018-01-01 00:00:03,13681.02,13715.67
4,2018-01-01 00:00:04,13681.02,13715.67


In [4]:
analysis = Metrics(Depth_data, Quote_data, Trade_data)
mid_prices = analysis.depth_data["mid_price"]
mid_prices.head()

time_exchange
2018-01-01 00:00:00    13698.690
2018-01-01 00:00:01    13698.690
2018-01-01 00:00:02    13698.690
2018-01-01 00:00:03    13698.345
2018-01-01 00:00:04    13698.345
Name: mid_price, dtype: float64

In [5]:
mid_price_df = mid_prices.to_frame().reset_index()
mid_price_df.head()

Unnamed: 0,time_exchange,mid_price
0,2018-01-01 00:00:00,13698.69
1,2018-01-01 00:00:01,13698.69
2,2018-01-01 00:00:02,13698.69
3,2018-01-01 00:00:03,13698.345
4,2018-01-01 00:00:04,13698.345


In [6]:
price_df = pd.merge(bid_ask_df, mid_price_df, how='inner', on='time_exchange')
price_df.head()

Unnamed: 0,time_exchange,bid1_price,ask1_price,mid_price
0,2018-01-01 00:00:00,13681.02,13716.36,13698.69
1,2018-01-01 00:00:01,13681.02,13716.36,13698.69
2,2018-01-01 00:00:02,13681.02,13716.36,13698.69
3,2018-01-01 00:00:03,13681.02,13715.67,13698.345
4,2018-01-01 00:00:04,13681.02,13715.67,13698.345


### Step 2: Calculate Historical Directions

In [7]:
price_increments = mid_prices - mid_prices.shift(1)
price_increments_df = price_increments.dropna().to_frame().reset_index()
price_increments_df = price_increments_df.rename(columns={'mid_price': 'mid_price_increments'})
price_increments_df["mid_price_direction"] = -1

In [8]:
price_increments_df[price_increments_df["mid_price_increments"] > 0].head()

Unnamed: 0,time_exchange,mid_price_increments,mid_price_direction
13,2018-01-01 00:00:14,0.045,-1
24,2018-01-01 00:00:25,0.035,-1
26,2018-01-01 00:00:27,0.01,-1
45,2018-01-01 00:00:46,14.115,-1
51,2018-01-01 00:00:52,0.01,-1


In [9]:
price_increments_df[price_increments_df["mid_price_increments"] < 0].head()

Unnamed: 0,time_exchange,mid_price_increments,mid_price_direction
2,2018-01-01 00:00:03,-0.345,-1
11,2018-01-01 00:00:12,-0.305,-1
22,2018-01-01 00:00:23,-2.335,-1
33,2018-01-01 00:00:34,-0.225,-1
34,2018-01-01 00:00:35,-13.805,-1


In [10]:
price_increments_df["mid_price_direction"].loc[price_increments_df["mid_price_increments"] > 0] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


### Step 3: Calculate Historical Frequency

In [11]:
window_len = 6

In [12]:
series_freq_dict = calc_pattern_freq(price_increments_df["mid_price_direction"].tolist(), window_len)

### Step 4: Pre-Trade Analysis

In [13]:
price_data_df = pd.merge(price_df, price_increments_df, how='inner', on='time_exchange')
price_data_df.head(n=30)

Unnamed: 0,time_exchange,bid1_price,ask1_price,mid_price,mid_price_increments,mid_price_direction
0,2018-01-01 00:00:01,13681.02,13716.36,13698.69,0.0,-1
1,2018-01-01 00:00:02,13681.02,13716.36,13698.69,0.0,-1
2,2018-01-01 00:00:03,13681.02,13715.67,13698.345,-0.345,-1
3,2018-01-01 00:00:04,13681.02,13715.67,13698.345,0.0,-1
4,2018-01-01 00:00:05,13681.02,13715.67,13698.345,0.0,-1
5,2018-01-01 00:00:06,13681.02,13715.67,13698.345,0.0,-1
6,2018-01-01 00:00:07,13681.02,13715.67,13698.345,0.0,-1
7,2018-01-01 00:00:08,13681.02,13715.67,13698.345,0.0,-1
8,2018-01-01 00:00:09,13681.02,13715.67,13698.345,0.0,-1
9,2018-01-01 00:00:10,13681.02,13715.67,13698.345,0.0,-1


In [14]:
unique_price_data_df = price_data_df.drop_duplicates(subset=["mid_price"], keep='first')
unique_price_data_df.head(n=20)

Unnamed: 0,time_exchange,bid1_price,ask1_price,mid_price,mid_price_increments,mid_price_direction
0,2018-01-01 00:00:01,13681.02,13716.36,13698.69,0.0,-1
2,2018-01-01 00:00:03,13681.02,13715.67,13698.345,-0.345,-1
11,2018-01-01 00:00:12,13681.14,13714.94,13698.04,-0.305,-1
13,2018-01-01 00:00:14,13681.23,13714.94,13698.085,0.045,1
22,2018-01-01 00:00:23,13681.5,13710.0,13695.75,-2.335,-1
24,2018-01-01 00:00:25,13681.57,13710.0,13695.785,0.035,1
26,2018-01-01 00:00:27,13681.59,13710.0,13695.795,0.01,1
33,2018-01-01 00:00:34,13681.83,13709.31,13695.57,-0.225,-1
34,2018-01-01 00:00:35,13681.76,13681.77,13681.765,-13.805,-1
45,2018-01-01 00:00:46,13681.79,13709.97,13695.88,14.115,1


In [15]:
unique_price_data_df.describe()

Unnamed: 0,bid1_price,ask1_price,mid_price,mid_price_increments,mid_price_direction
count,12955.0,12955.0,12955.0,12955.0,12955.0
mean,13348.82578,13364.948144,13356.886962,-0.021545,0.011038
std,224.879156,225.566787,225.139627,8.306653,0.999978
min,12750.0,12760.97,12755.815,-139.505,-1.0
25%,13165.775,13179.965,13171.7,-2.4725,-1.0
50%,13400.0,13410.0,13404.45,0.005,1.0
75%,13531.0,13548.235,13539.54,2.3875,1.0
max,13811.36,13817.53,13812.08,143.875,1.0


In [16]:
max(unique_price_data_df['bid1_price'].tolist())

13811.36

In [17]:
min(unique_price_data_df['ask1_price'].tolist())

12760.97

### Step 5: Run Basic Strategy

In [18]:
unique_price_df = unique_price_data_df.reset_index()
unique_price_df.head(n=20)

Unnamed: 0,index,time_exchange,bid1_price,ask1_price,mid_price,mid_price_increments,mid_price_direction
0,0,2018-01-01 00:00:01,13681.02,13716.36,13698.69,0.0,-1
1,2,2018-01-01 00:00:03,13681.02,13715.67,13698.345,-0.345,-1
2,11,2018-01-01 00:00:12,13681.14,13714.94,13698.04,-0.305,-1
3,13,2018-01-01 00:00:14,13681.23,13714.94,13698.085,0.045,1
4,22,2018-01-01 00:00:23,13681.5,13710.0,13695.75,-2.335,-1
5,24,2018-01-01 00:00:25,13681.57,13710.0,13695.785,0.035,1
6,26,2018-01-01 00:00:27,13681.59,13710.0,13695.795,0.01,1
7,33,2018-01-01 00:00:34,13681.83,13709.31,13695.57,-0.225,-1
8,34,2018-01-01 00:00:35,13681.76,13681.77,13681.765,-13.805,-1
9,45,2018-01-01 00:00:46,13681.79,13709.97,13695.88,14.115,1


In [19]:
train_percent = 0.8
pattern_len = 6
profit, buy_order_count, sell_order_count = tune_execute_slm(unique_price_df, train_percent, pattern_len)

Profit is: -2733.9299999998184...
Buy order counts: 246...
Sell order counts: 246...


### Step 6: Run Revised Strategy

In [20]:
train_percent = 0.8
pattern_len = 6
profit, buy_order_count, sell_order_count = tune_execute_slm(unique_price_df, train_percent, pattern_len, allow_short=True)

Profit is: 7717.560000000871...
Buy order counts: 246...
Sell order counts: 246...
