## Logical predicates defined on financial (stock) time-series

In [None]:
import import_ipynb
import pandas as pd
import numpy as np
import torch
from datetime import datetime
from datetime import datetime as dt
# from tqdm.notebook import tqdm
import pickle

In [None]:
from pyswip import Prolog, registerForeign

In [None]:
FEAT_COLS=['row_num','Open_n','High_n','Low_n','Close_n','Volume_n','VWAP_D',
           'MOM_30','CMO_14','RSI_14']
TO_SCALE=['Open_n','High_n','Low_n','Close_n','VWAP_D']

In [None]:
from logicstrat import LogicStrat

In [None]:
logicStrat=LogicStrat()

In [None]:
logicStrat.load_log()

In [None]:
df=logicStrat.logL[60]['SBIN.NS']

In [None]:
# df.to_csv('./data.csv',index=False)

At any point of time time-series for a particular stock (df above) is used to compute logical predicates.

In [None]:
df[FEAT_COLS].head()

In [None]:
import plotly.graph_objects as go
fig = go.Figure(data=
    [go.Candlestick(x = df.index,
                    open  = df["Open_n"],
                    high  = df["High_n"],
                    low   = df["Low_n"],
                    close = df["Close_n"])]
)
fig.update_layout(
    title='Incoming Window',
    yaxis_title="Normalized Price"
)
fig.show()

In [None]:
p=Prolog()

In [None]:
factsD={'tick':'tick(T,O,H,L,C,V,W,B,R)'}

Base fact is a 'tick' at time T open,high,low,close prices and trading volume attributes. The body of the 'candle' B=open-close, and the range beyond the body is R=high-low-body. Additionally W=volume weighted average price is also included (may not be used). Facts are asserted below.

In [None]:
for f in FEAT_COLS:
    factsD[f.lower()]=f.lower()+'(R,V)'

In [None]:
def clean_kb(factsD):
    for pred in factsD:
        for f in p.query("retract("+factsD[pred]+"),fail"): pass

In [None]:
clean_kb(factsD)

In [None]:
def retract_rules(predsL):
    for pred in predsL:
        for f in p.query("abolish("+pred+"),fail"): pass

In [None]:
predsL=[]

In [None]:
retract_rules(predsL)

In [None]:
def assert_rules():
    predsL=[]
    p.assertz("red(T):-tick(T,O,H,L,C,V,W,B,R),O>C")
    predsL+=['red/1']
    p.assertz("green(T):-tick(T,O,H,L,C,V,W,B,R),O=<C")
    predsL+=['green/1']
    p.assertz("doji(T):-tick(T,O,H,L,C,V,W,B,R),R>B,small(B)")
    predsL+=['doji/1']
    p.assertz("small(V):-V<2")
    predsL+=['small/1']
    p.assertz("list_min([L|Ls], Min) :- foldl(num_num_min, Ls, L, Min)")
    predsL+=['list_min/2']
    p.assertz("num_num_min(X, Y, Min) :- Min is min(X, Y)")
    predsL+=['num_num_mun/3']
    p.assertz("list_max([L|Ls], Max) :- foldl(num_num_max, Ls, L, Max)")
    predsL+=['list_max/2']
    p.assertz("num_num_max(X, Y, Max) :- Max is max(X, Y)")
    predsL+=['num_num_max/3']
    p.assertz("low(LT,Min):-findall(X, close_n(T,X),L),list_min(L,Min),close_n(LT,Min)")
    predsL+=['low/2']
    p.assertz("high(LT,Max):-findall(X, close_n(T,X),L),list_max(L,Max),close_n(LT,Max)")
    predsL+=['high/2']
    p.assertz("len([], LenResult):-LenResult is 0")
    p.assertz("len([X|Y], LenResult):-len(Y, L),LenResult is L + 1")
    predsL+=['len/2']
    return predsL

Addditional rules above include:
 - 'red(T)' and 'green' indicating whether open>close or open<close at time T
 - 'low(T,L)' indicates that the lowest value in the history was L at time T and 'high(T,H)' the highest similarly.

Further a 'doji' denotes a tick where the body is small (see below)and smaller than than the extented range.
                                                                    
A few utility predicates are also defined and used in the defintion of the above predicates, including hyper-parameters such as what is 'small' (above, this is defined as anything <2).                                     

In [None]:
predsL=assert_rules()

Predicates are asserted below; note that some values are scaled so that Prolog is able to handle these more easily.

In [None]:
def assert_row(row):
    factor=10000 # so that everything is in 'basis points'
    def scale(row):
        for key in TO_SCALE:
            row[key]=factor*row[key]
        return row
    row=scale(row)
    size=abs(row['Open_n']-row['Close_n'])
    wick=abs(row['High_n']-row['Low_n'])-size
    argstr=str(row['row_num'])+','+str(row['Open_n'])+','+str(
        row['High_n'])+','+str(row['Low_n'])+','+str(
        row['Close_n'])+','+str(row['Volume_n'])+','+str(
        row['VWAP_D'])+','+str(size)+','+str(wick)
    factstr='tick('+argstr+')'
    p.assertz(factstr)
    for key in FEAT_COLS:
        factstr=key.lower()+'('+str(row['row_num'])+','+str(row[key])+')'
        p.assertz(factstr)

In [None]:
_=df.apply(assert_row,axis=1)

- 'now(T)' indicates the 'current' time, i.e., last element of the time-series

In [None]:
p.assertz("now("+str(df.iloc[-1]['row_num'])+")")

some test queries

In [None]:
list(p.query('now(T)'))

In [None]:
list(p.query('tick(T,O,H,L,C,V,W,B,R)'))

In [None]:
list(p.query('open_n(R,V)'))

In [None]:
list(p.query('rsi_14(R,V)'))

Above: number of technical analysis features, such as rse_14 (relative strength index), mom_30 and cmo_14 (two different momentum measures) etc. are also asserted according to those listed in FEAT_COLS way above.

In [None]:
list(p.query('low(T,L)'))

In [None]:
list(p.query('doji(T)'))

In [None]:
p.query('row_num(T,T),not(doji(T))')

In [None]:
list(p.query('green(T)'))

## More Complex Logical Patterns

In [None]:
for f in p.query("abolish("+"bearish/2"+"),fail"): pass
for f in p.query("abolish("+"bullish/2"+"),fail"): pass

'bullish' pattern is a subsequence where the number of greens is delta more than reds, and vice versa for 'bearish'. Value of delta is a hyperparameter asserted also.

In [None]:
p.assertz("delta(3)")

In [None]:
p.assertz("bearish(TB,TN):-findall(TZ,(red(TZ),TZ>=TB,TZ=<TN),RL),\
                            findall(TY,(green(TY),TY>=TB,TY=<TN),GL),\
                            len(RL,LRL),len(GL,LGL),delta(D),LRL>=LGL+D")

In [None]:
p.assertz("bullish(TB,TN):-findall(TZ,(red(TZ),TZ>=TB,TZ=<TN),RL),\
                            findall(TY,(green(TY),TY>=TB,TY=<TN),GL),\
                            len(RL,LRL),len(GL,LGL),delta(D),LRL+D<LGL")

there are many 'candlestick patterns' that can be asserted as rules, e.g. three-horsemen and three-crows:

In [None]:
p.assertz("succ(T2,T1):-T2>T1,T2=<T1+1")

In [None]:
for f in p.query("abolish("+"threehorsemen/1"+"),fail"): pass
for f in p.query("abolish("+"threecrows/1"+"),fail"): pass

In [None]:
threehorsemen="threehorsemen(T3):-green(T3),green(T2),green(T1),succ(T3,T2),succ(T2,T1),"
threehorsemen+="open_n(T3,O3),close_n(T3,C3),open_n(T2,O2),close_n(T2,C2),open_n(T1,O1),close_n(T1,C1),"
threehorsemen+="O2>O1,C2>C1,O3>O2,C3>C2"
p.assertz(threehorsemen)

In [None]:
threecrows="threecrows(T3):-red(T3),red(T2),red(T1),succ(T3,T2),succ(T2,T1),"
threecrows+="open_n(T3,O3),close_n(T3,C3),open_n(T2,O2),close_n(T2,C2),open_n(T1,O1),close_n(T1,C1),"
threecrows+="O2<O1,C2<C1,O3<O2,C3<C2"
p.assertz(threecrows)

In [None]:
list(p.query("threehorsemen(T)"))

In [None]:
list(p.query("threecrows(T)"))

Similarly we assert thresholds and associated predicates for various technical indicators:

In [None]:
p.assertz("cmohighval(50)")
p.assertz("cmolowval(-50)")

In [None]:
p.assertz("highcmo(H):-cmo(T,V),cmohighval(H),V>H")
p.assertz("lowcmo(H):-cmo(T,V),cmolowval(L),V<L")

We also define when an interval follows another:

In [None]:
p.assertz("follows([AS,AE],[BS,BE]):-BS>AE,BS<BE,AS<AE")

## Goal is to use the above logical predicates to learn a policy

In [None]:
policy_predicates=['policy_buy','policy_sell','policy_hold'] 

## in terms of the above background knowledge

For example, we assert all the facts as above as well as all that are true at time T where now(T), including bearish or bullish trends ending at T, values of technical indicators at T and whether they are above or below thresholds, as well as above or below zero.

Now, if we were doing pure ILP, we might learn policies such as:

In [None]:
"policy_buy:-now(N),bullish(TS,N),cmo_14(TS,C),C>0"

In [None]:
"policy_sell:-now(N),bullish(TS,N),follows([TS,N],[BS,TS]),bearish(BS,TS),N>BS-TS-2,cmo_14(N,C),C>0"

## Questions (for Ashwin):
(a) can we learn such policies without labels in pure ILP, i.e., in an RL setting?
(b) which differentiable logical neural network formulation might be best to learn a policy using logical background knowledge as above. (c) ideally one would also like to learn various thresholds (hyperparameters in the logical theory above) during the process of learning a policy.

## Experiments: 
various complex queries just to test

In [None]:
list(p.query("findall(TS,(row_num(TS,TS),now(N),bullish(TS,N),cmo_14(TS,C),C>0),BL)"))

In [None]:
list(p.query("findall([TS,C],(row_num(TS,TS),now(N),bearish(TS,N),cmo_14(TS,C),C<0),BL)"))

In [None]:
list(p.query("findall([TS,TE,BS,BE],(row_num(TS,TS),row_num(TE,TE),row_num(BS,BS),row_num(BE,BE),follows([TS,TE],[BS,BE]),bearish(TS,TE),bearish(BS,BE)),BL)"))

# Saving Data as csv for Ashwin

In [None]:
L=logicStrat.logL

In [None]:
#!mkdir ../temp_data/for_ashwin

In [None]:
for i in range(1):#range(len(L)):
    for s in L[i]:
        L[0][s].to_csv('../temp_data/for_ashwin/'+str(s)+'_'+str(i))

In [None]:
# Directly from Feed:
with open('../temp_data/feed_sim.pickle','rb') as f: feed_sim=pickle.load(f)

In [None]:
feed_sim.data[list(feed_sim.data.keys())[0]].columns

In [None]:
for s in feed_sim.data:
    feed_sim.data[s].to_csv('../temp_data/for_ashwin/'+str(s)+'.csv')

# ADF Test

In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
X=df['Close'].values

In [None]:
result = adfuller(X[10:50])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.plot(X)