In [1]:
import import_ipynb
import pandas as pd
import numpy as np
import torch
from datetime import datetime
from datetime import datetime as dt
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
import pickle
from statsmodels.tsa.stattools import adfuller

In [2]:
from aiagentbase import AIAgent,Controller,Memory,Perception,Actor

importing Jupyter notebook from aiagentbase.ipynb


In [3]:
OHLCV_COLS=['Open_n','High_n','Low_n','Close_n','Volume_n']
TA_COLS=['SMA_10', 'SMA_20','VOL_SMA_20','RSI_14','BBL_5_2.0','BBM_5_2.0','BBU_5_2.0',
       'BBB_5_2.0', 'BBP_5_2.0','MACD_12_26_9','MACDh_12_26_9','MACDs_12_26_9','VWAP_D',
        'MOM_30','CMO_14']
COLS=OHLCV_COLS+TA_COLS

### Rule-based strategies as agents
See ruleagents_dev: tested with backtest. Also with tradeserver. TBD: common rewards format
Note: only works for local strategies (not remote: TBD).

In [4]:
class RuleAgent(AIAgent):
    def __init__(self):
        self.agent=True
        self.tidx=0
        self.owner=None
        super().__init__()
        self.memory=SplMemory()
        self.memory.parent=self
        self.use_memory=False
        ## Augmentations of AIAgent
        self.actor.call_model=self.call_model
        self.perception.perceive_state=self.perceive_state
        self.perception.perceive_reward=self.perceive_reward
        self.perception.action_perceptual=self.action_perceptual
        self.actor.compute_reward=self.compute_reward
        self.logL=[]
##Interface to tradeserver
    def set_alt_data(self,alt_data_func,remote=False):
        if remote: self.gdata=anvil.server.call(alt_data_func)['gdata']
        else: self.gdata=alt_data_func()['gdata']
    def act_on_entry(self):
        if self.owner==None: return True
        elif self.owner.status[self.scantickers[self.tidx]]=='deployed': return False
        else: return True
    def act_on_exit(self):
        if self.owner==None: return True
        elif self.owner.status[self.scantickers[self.tidx]]=='active': return False
        else: return True
    def check_entry_batch(self,dfD):
        if self.act_on_entry(): return self.act(('entry',dfD))
        else: return self.check_entry(dfD)
    def save_func(self,episode_state):
        return ()
    def check_exit_batch(self,dfD,posf):
        def exit_fn(row):
            if self.act_on_exit() and row.ticker==self.scantickers[self.tidx]:
                return self.act(('exit',row,dfD[row.ticker]))
            else: return self.exit_func(row,dfD[row.ticker])
        posf['to_exit']=posf.apply(exit_fn,axis=1).values
        return posf
    def exit_predicate(self,row,df):
        return self.act(('exit',row,df))
    def Check(strat,dfD):
        return strat.check_entry_batch(dfD)
    def Exit(strat,dfD,posf):
        return strat.check_exit_batch(dfD,posf)
## Augmentations of AIAgent for trade sim world
    # def reward(self,reward):
    #     ##Augmenting AIAgent
    #     if self.owner!=None: 
    #         ticknow=self.scantickers[self.tidx]
    #         reward+=[{'ticknow':ticknow,'status':self.owner.status[ticknow]}]
    #     return super().reward(reward)
    # def begin(self,state):
    #     ##Augmenting AIAgent
    #     super().begin(state)
    def call_model(self,state):
        ##Overriding AIAgent.Model
        #override with actual policy in subclass
        return None
    def perceive_state(self,world_state):
        percept=world_state
        return percept,'default',{'ticker':self.scantickers[self.tidx]}
    def compute_state(self,percept):
        trt=self.scantickers[self.tidx]
        if percept[0]=='entry':
            state=torch.tensor(percept[1][trt][COLS].values)
        elif percept[0]=='exit':
            state=torch.tensor(percept[2][COLS].values)
        self.logL+=[percept,state]
        return state
    def perceive_reward(self,reward):
        #Override AIAgent
        actor_reward=reward
        return actor_reward,'default',{'ticker':self.scantickers[self.tidx]}
    def action_perceptual(self,action):
        trt=self.scantickers[self.tidx]
        if type(action)==tuple: action_to_store=action[0][trt]
        elif type(action)==bool: action_to_store=int(action)
        else: action_to_store=type(action)
        return action_to_store
    def compute_reward(self,reward):
        if type(reward)==list:
            if len(reward)==0: return 0
            return reward[0]['ppnl']
        elif type(reward)==tuple: return reward[0]
        elif reward is None: return 0
        else: return type(reward)

In [5]:
class SplMemory(Memory):
    def __init__(self):
        super().__init__()
    def add_state_action(self,actor_state,action,time):
        actor_state=self.parent.compute_state(actor_state)
        super().add_state_action(actor_state,action,time)
    def update_next_state(self,actor_state,time):
        if time not in self.sar_memory: self.sar_memory[time]={}

In [6]:
class AdaMomCMOAgent(RuleAgent):
    # Adaptive momentum strategy using CMO and ADF for non-stationarity
    def __init__(self,high=80,low=20,mid=50):
        super(AdaMomCMOAgent,self).__init__()
        self.logL=[]
        self.high=high
        self.low=low
        self.mid=mid
        self.model_type='rule_based'
        self.data_cols=['datetime']+COLS
        self.regime={}
        self.entry_val={}
        self.exit_val={}
        self.logL=[]
        self.rewL=[]
    def call_model(self,state):
        # super().act(state)
        if state[0]=='entry': return self.check_entry(state[1])
        elif state[0]=='exit': return self.exit_func(state[1],state[2])
    def check_entry(self,dfD):
        # return always_buy(dfD)
        timenow=[dfD[t].iloc[-1]['datetime'] for t in dfD][0]
        hour,minute=timenow.hour,timenow.minute
        decisionsD={t:0 for t in dfD}
        stopD={t:5 for t in dfD}
        targetD={t:5 for t in dfD}
        if hour==9 and minute<=35: return decisionsD,stopD,targetD
        dataD={}
        log_entry={}
        high=self.high
        low=self.low
        mid=self.mid
        for t in dfD.keys():
            data=dfD[t]
            row=dfD[t].iloc[-1]
            if data.shape[0]>=65:
                # self.logL+=[(t,timenow,data['Close_n'])]
                adf=adfuller(data['Close_n'],maxlag=30,autolag=None)
                if adf[0]>adf[4]['1%']: self.regime[t]='tr'
                else: self.regime[t]='mr'
                regime=self.regime[t]
                if regime=='tr' and row['CMO_14']>low and row['CMO_14']<mid: decisionsD[t]=1
                elif regime=='tr' and row['CMO_14']<-low and row['CMO_14']>-mid: decisionsD[t]=-1
                elif regime=='mr' and row['CMO_14']>high: decisionsD[t]=-1
                elif regime=='mr' and row['CMO_14']<-high: decisionsD[t]=1
                else: decisionsD[t]=0
                self.entry_val[t]=row['CMO_14']
                self.exit_val[t]='not_set'
        return decisionsD,stopD,targetD
    def exit_func(self,row,df):
        # return True
        data=df
        dfrow=df.iloc[-1]
        high=self.high
        low=self.low
        mid=self.mid
        regime=self.regime[row['ticker']]
        self.exit_val[row['ticker']]=dfrow['CMO_14']
        if regime=='tr' and row['quant']>0 and dfrow['CMO_14']>high: return True
        elif regime=='tr' and row['quant']<0 and dfrow['CMO_14']<-high: return True
        elif regime=='mr' and row['quant']>0 and dfrow['CMO_14']>-low: return True
        elif regime=='mr' and row['quant']<0 and dfrow['CMO_14']<low: return True
        # exit cases for detecting a trade on incorrect trend direction
        # elif regime=='tr' and row['quant']>0 and dfrow['CMO_14']<=-mid: return True
        # elif regime=='tr' and row['quant']<0 and dfrow['CMO_14']>=mid: return True
        else: return False
    def save_func(self,episode_state):
        ticker=[t for t in episode_state][0]
        return ticker,self.entry_val[ticker],self.exit_val[ticker]
    def reward(self,reward):
        self.rewL+=[reward]
        super().reward(reward)

In [7]:
class GapBetAgent(RuleAgent):
    def __init__(self,entry=50,exit=75,direction=-1):
        super(GapBetAgent,self).__init__()
        self.entry=entry
        self.exit=exit
        self.data_cols=['CMO_14','datetime']
        self.model_type='rule-based'
        # self.direction=direction
    def act(self,state):
        if state[0]=='entry': return self.check_entry(state[1])
        elif state[0]=='exit': return self.exit_func(state[1],state[2])
    def check_entry(self,dfD):
        decisionsD={t:0 for t in dfD}
        stopD={t:0.25 for t in dfD}
        targetD={t:2 for t in dfD}
        timenow=[dfD[t].iloc[-1]['datetime'] for t in dfD][0]
        date=timenow.strftime('%d-%b-%Y')
        gdir=global_direction(self.gdata[date][0])
        if abs(gdir)>.5: self.direction=1
        else: self.direction=-1
        hour,minute=timenow.hour,timenow.minute
        if hour>9 or (hour==9 and minute>35): return decisionsD,stopD,targetD
        for t in dfD:
            row=dfD[t].iloc[-1]
            if row['CMO_14']>self.entry: decisionsD[t]=self.direction
            elif row['CMO_14']<-self.entry: decisionsD[t]=-self.direction
        return decisionsD,stopD,targetD
    def exit_func(self,row,posf):
        return False

In [8]:
def do_nothing(dfD):
    empty={t:0 for t in dfD}
    return empty,empty,empty
def always_buy(dfD):
    buy={t:1 for t in dfD}
    empty={t:0 for t in dfD}
    return buy,empty,empty
def always_sell(dfD):
    sell={t:-1 for t in dfD}
    empty={t:0 for t in dfD}
    return sell,empty,empty

In [9]:
def global_direction(gdata):
    global_tickers=['^NYA','LSEG.L','^IXIC']
    direction={}
    for g in global_tickers:
        direction[g]=gdata['Close_'+g]-gdata['Open_'+g]
    return 100*sum([direction[k] for k in direction])/len(direction)
def domestic_direction(gdata):
    tickers=['^NSEI']
    direction={}
    for g in tickers:
        direction[g]=gdata['Close_'+g]-gdata['Open_'+g]
    return 100*sum([direction[k] for k in direction])/len(direction)