In [1]:
import pandas as pd
import numpy as np
import time
import datetime
import os

N_STATES = 5   # ['action','close-avg','deal','amp_cover','vol_cover'] 
ACTIONS = ['Wait','Buy','Sell','OpenInteres','close-avg','deal','amp_cover','vol_cover'] #行為 空手、買進、賣出、持有、多單平倉、空單平倉
EPSILON = 0.9   # 贪婪度 greedy
ALPHA = 0.1     # 学习率
GAMMA = 0.9    # 奖励递减值
MAX_EPISODES = 13   # 最大回合数
FRESH_TIME = 0.3    # 移动间隔时间
global OpenInteres
OpenInteres = 0 # 是否持倉中
global priceAtPurchase #損益
priceAtPurchase = 0

In [2]:
def get_data():
    direct=os.path.abspath('../data')
    filelist = os.listdir('../data')
    file = filelist[-1]
    print(file)
    dayticks = pd.read_csv(direct+'\\'+file,header=None,names=['ndatetime','nbid','nask','close','volume','deal'])
    dayticks['ndatetime'] = pd.to_datetime(dayticks['ndatetime'], format='%Y-%m-%d %H:%M:%S.%f')
    dayticks.sort_values(by=['ndatetime'],ascending=True)
    dayticks=dayticks[(dayticks.ndatetime.dt.hour>=8) & (dayticks.ndatetime.dt.hour<15)]
    dayticks.index = dayticks.ndatetime
    Candledf=dayticks['close'].resample('1min',closed='right').ohlc()
    tmpdf=dayticks['volume'].resample('1min').sum()
    Candledf=pd.concat([Candledf,tmpdf],axis=1)
    del tmpdf
    tmpdf=dayticks['deal'].resample('1min').sum()
    Candledf=pd.concat([Candledf,tmpdf],axis=1)
    del tmpdf
    Candledf['dealcumsum']=Candledf['deal'].cumsum()
    del Candledf['deal']
    Candledf.rename(columns={'dealcumsum':'deal'},inplace=True)
    Candledf=Candledf.rename_axis('ndatetime').reset_index()
    Candledf['ndatetime'] = pd.to_datetime(Candledf['ndatetime'], format='%Y-%m-%d %H:%M:%S.%f')
    Candledf.sort_values(by=['ndatetime'],ascending=True)
    tmpline=Candledf.close.cumsum()
    Candledf['Avg_line'] = tmpline.apply(lambda x: x/(tmpline[tmpline==x].index[0]+1))
    del tmpline
    Candledf['amplitude'] = Candledf.high - Candledf.low
    Candledf['close_Avg'] = Candledf.close - Candledf.Avg_line
    Candledf['close_diff'] = Candledf.close-Candledf.close.shift(1).fillna(Candledf.close)
    Candledf['close_diff'] = Candledf.close_diff.shift(-1).fillna(0)
    global CandleData
    CandleData = Candledf[['ndatetime','close_Avg','deal','amplitude','volume','close_diff','close']]
    print(CandleData.tail())
    return CandleData

In [3]:
class QLearning:
    def __init__(self,actionslist=None,LearnRate=0.01,disscount_factory=0.9,e_greedy=0.1):
        self.actions = actionslist # action 列表
        self.strategy =['Action','close_Avg','deal','amplitude','volume','close_diff']
        self.strategy_action = ['Wait','Buy','Sell','Short','BuyCover','ShortStopLoss','BuyCoverStopLoss'] #行為 空手、買進、賣出、持有、多單平倉、空單平倉
        self.lr = LearnRate #學習效率
        self.gamma = disscount_factory # 折扣因子
        self.espilon = e_greedy #貪婪度
        self.q_strategy = pd.DataFrame(np.zeros((len(self.strategy_action),len(self.strategy))),index=self.strategy_action,columns=self.strategy,dtype=np.float32) #建立空Q Table
        self.q_table = pd.DataFrame(columns=self.actions,dtype=np.float32) #建立空Q Table
        self.openInteres = 0
        
    def check_state_exist(self,state):
        if state.name not in self.q_table.index:
            self.q_table = self.q_table.append(pd.Series([0]*len(self.actions[:4])+state.to_list()
            ,index=self.q_table.columns,name=state.name))
    
    def choose_action(self,state,OI):
        self.check_state_exist(state)
        i=0 # 非隨機時，策略總分
        #隨機選行為 Epsilon Greedy貪婪方法
        if OI > 0:
            state_action = ['Wait','Short','ShortStopLoss']
        elif OI < 0:
            state_action = ['Wait','BuyCover','BuyCoverStopLoss']
        else:
            state_action = ['Wait','Buy','Sell']

        if np.random.uniform()>self.espilon:
        # if np.random.uniform()>1:
            #隨機選 1個action
            action = np.random.choice(state_action)
        else: #取Q值最高的action
            for t in state_action[0:]:
                for key in state[1:6].index:
                    if state[key] > self.q_strategy.at[t,key]:
                        i = i+1
                    elif state[key] < self.q_strategy.at[t,key]:
                        i = i-1
                    else:
                        i = i+0
            if i > self.q_strategy[state_action[1],'Action'] : 
                action = state_action[1]
            elif i < self.q_strategy[state_action[2],'Action'] :
                action = state_action[2]
            else:
                action = state_action[0] 
            # state_action = state_action.reindex(np.random.permutation(state_action.index))
            # 取當下最大Q值
            # print(state_action[state_action == state_action.max()].index,'動作列')
            # action = state_action.index[state_action == state_action.max()].to_list()            
            # action = str(action[0])
            # action = state_action.idxmax()
        return action
    # Q Table 學習
    def learn(self,s,a,r,s_): #state,action,result,s_(下個state)
        #檢查下個 state
        self.check_state_exist(s_) #檢查s_ 是否在q_table
        # Q(S,A) <- Q(S,A)+a*[R+v*max(Q(S',a)-Q(S,A))]
        q_predict = self.q_table.loc[s.name,a] #根據Q表 得到 預估值 predict
        #q_target 現實值
        if s_.name != -100: #下一步 不是最終
            q_target = r + self.gamma * self.q_table.loc[s_.name,['Wait','Buy','Sell']].max()
            
        else: # 下一步終結
            q_target = r 
        # 更新Q表
        self.q_table.loc[s.name,a] += self.lr * (q_target - q_predict)

In [4]:
# https://blog.csdn.net/joinquantdata/article/details/87254611?spm=1001.2101.3001.6650.6&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-6.no_search_link&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-6.no_search_link
class Market:
    def __init__(self,data) -> None:
        self.action_list = ['Wait','Buy','Sell','OpenInteres','ndatetime','close_Avg','deal','amplitude','volume','close','close_diff'] # 買進、賣出、觀望
        self.actions = len(self.action_list)
        self.data = data # 1分鐘 K線資料
        self.idx = 10 #預設開始跑策略的K線
        self.time = self.data.at[self.idx,'ndatetime'].time().strftime('%H:%M')
        self.OI = 0

    def get_state(self,idx):
        
        return self.data.loc[idx,:] #ndatetime  close_Avg  deal  amplitude  volume close_diff close 

    def step(self,idx,action):
        state = self.get_state(idx)
        ntime = state['ndatetime'].time().strftime('%H:%M:%S')
        if ntime == '13:20:00':
            done = True
            reward = 0
            s_ = -100
        else:
            done = False
            reward = self.data.at[idx,'close_diff']
            s_ = idx + 1

        if action == 'Buy':
            pass
        if action == 'Sell':
            reward = reward * -1
        if action == 'Wait':
            reward = 0
        if s_ != -100:
            s_ = idx + 1
    
        return s_,reward,done

    def reset(self):
        self.idx=10
        self.OI = 0
        return [self.idx,self.OI]
def update(CandleData):
    evn = Market(CandleData)
    RL = QLearning(evn.action_list)
    
    for episode in range(50):
        idx = evn.reset()
        step_count = 0
        while True:
            state = evn.get_state(idx)
            action = RL.choose_action(state)
            idx_,reward,done = evn.step(idx,action)
            if done:
                break
            else: 
                state_ = evn.get_state(idx_)
                # step_count+=1
                RL.learn(state,action,reward,state_)
                idx = idx_
            # print(idx,action)
    return RL.q_table

In [7]:
data = get_data()
# q_table = update(data)
# print(q_table.tail())

evn = Market(data)
state = evn.get_state(10)
print(state[1:6])
RL = QLearning(evn.action_list)
print(RL.q_strategy)
strategy = RL.q_strategy.loc['Wait',['close_Avg','deal','amplitude','volume','close_diff']]
# print(state[1:6].to_list() < RL.q_strategy.loc['Wait',['close_Avg','deal','amplitude','volume','close']].to_list())
print('QTable',RL.q_table.head())

# RL.check_state_exist(state)
action = RL.choose_action(state,0)
print(action)
# print(RL.q_table.loc[10,['Wait','Buy','Sell']])

Ticks2022-01-21.txt
              ndatetime  close_Avg  deal  amplitude  volume  close_diff  close
295 2022-01-21 13:40:00 -29.530405 -2349          5     408         9.0  17891
296 2022-01-21 13:41:00 -20.461279 -2072         11     737        -9.0  17900
297 2022-01-21 13:42:00 -29.362416 -2111         12     583         4.0  17891
298 2022-01-21 13:43:00 -25.277592 -2046          8     613         0.0  17895
299 2022-01-21 13:44:00 -25.193333 -2050          8    1316         0.0  17895
close_Avg    -19.0
deal          -908
amplitude       17
volume         476
close_diff     3.0
Name: 10, dtype: object
                  Action  close_Avg  deal  amplitude  volume  close_diff
Wait                 0.0        0.0   0.0        0.0     0.0         0.0
Buy                  0.0        0.0   0.0        0.0     0.0         0.0
Sell                 0.0        0.0   0.0        0.0     0.0         0.0
Short                0.0        0.0   0.0        0.0     0.0         0.0
BuyCover             0

In [6]:
print(q_table.Buy.sum(),q_table.Sell.sum(),q_table.close_diff.sum())

NameError: name 'q_table' is not defined