In [22]:
import pandas as pd
import numpy as np
import time
import datetime
import os

N_STATES = 5   # ['action','close-avg','deal','amp_cover','vol_cover'] 
ACTIONS = ['None','Buy','Sell','OpenInteres','close-avg','deal','amp_cover','vol_cover'] #行為 空手、買進、賣出、持有、多單平倉、空單平倉
# ACTIONS = ['action','close-avg','deal','amp_cover','vol_cover']     # 探索者的可用动作
EPSILON = 0.9   # 贪婪度 greedy
ALPHA = 0.1     # 学习率
GAMMA = 0.9    # 奖励递减值
MAX_EPISODES = 13   # 最大回合数
FRESH_TIME = 0.3    # 移动间隔时间
global OpenInteres
OpenInteres = 0 # 是否持倉中
global priceAtPurchase #損益
priceAtPurchase = 0


In [23]:
def get_data():
    direct=os.path.abspath('../data')
    filelist = os.listdir('../data')
    file = filelist[-1]
    print(file)
    dayticks = pd.read_csv(direct+'\\'+file,header=None,names=['ndatetime','nbid','nask','close','volume','deal'])
    dayticks['ndatetime'] = pd.to_datetime(dayticks['ndatetime'], format='%Y-%m-%d %H:%M:%S.%f')
    dayticks.sort_values(by=['ndatetime'],ascending=True)
    dayticks=dayticks[(dayticks.ndatetime.dt.hour>=8) & (dayticks.ndatetime.dt.hour<15)]
    dayticks.index = dayticks.ndatetime
    Candledf=dayticks['close'].resample('1min',closed='right').ohlc()
    tmpdf=dayticks['volume'].resample('1min').sum()
    Candledf=pd.concat([Candledf,tmpdf],axis=1)
    del tmpdf
    tmpdf=dayticks['deal'].resample('1min').sum()
    Candledf=pd.concat([Candledf,tmpdf],axis=1)
    del tmpdf
    Candledf['dealcumsum']=Candledf['deal'].cumsum()
    del Candledf['deal']
    Candledf.rename(columns={'dealcumsum':'deal'},inplace=True)
    Candledf=Candledf.rename_axis('ndatetime').reset_index()
    Candledf['ndatetime'] = pd.to_datetime(Candledf['ndatetime'], format='%Y-%m-%d %H:%M:%S.%f')
    Candledf.sort_values(by=['ndatetime'],ascending=True)
    tmpline=Candledf.close.cumsum()
    Candledf['Avg_line'] = tmpline.apply(lambda x: x/(tmpline[tmpline==x].index[0]+1))
    del tmpline
    Candledf['amplitude'] = Candledf.high - Candledf.low
    Candledf['close_Avg'] = Candledf.close - Candledf.Avg_line
    Candledf['close_diff'] = Candledf.close.shift(1).fillna(Candledf.close)-Candledf.close
    global CandleData
    CandleData = Candledf[['ndatetime','close_Avg','deal','amplitude','volume','close','close_diff']]
    print(CandleData.tail())
    return CandleData

In [None]:
# data = get_data()
# QT = pd.DataFrame(columns=ACTIONS,dtype=np.float32)
# print(QT)
# QT=QT.append(pd.Series([0]*len(ACTIONS),index=ACTIONS,name='state'))
# print(QT.loc['state',QT.columns])

In [41]:
class QLearning:
    def __init__(self,actionslist=None,LearnRate=0.01,disscount_factory=0.9,e_greedy=0.1):
        self.actions = actionslist # action 列表
        self.lr = LearnRate #學習效率
        self.gamma = disscount_factory # 折扣因子
        self.espilon = e_greedy #貪婪度
        self.q_table = pd.DataFrame(columns=self.actions,dtype=np.float32) #建立空Q Table
        self.openInteres = 0
        
    def check_state_exist(self,state):
        if state.name not in self.q_table.index:
            self.q_table = self.q_table.append(pd.Series([0]*len(self.actions),index=self.q_table.columns,name=state))
    
    def choose_action(self,state):
        self.check_state_exist(state)
        #隨機選行為 Epsilon Greedy貪婪方法
        if np.random.uniform()>self.espilon:
            #隨機選 1個action
            action = np.random.choice(self.actions[:3])
        else: #取Q值最高的action
            state_action = self.q_table.loc[state,:3]
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            # 取當下最大Q值
            action = state_action.idxmax()
        return action
    # Q Table 學習
    def learn(self,s,a,r,s_): #state,action,result,s_(下個state)
        #檢查下個 state
        self.check_state_exist(s_) #檢查s_ 是否在q_table
        # Q(S,A) <- Q(S,A)+a*[R+v*max(Q(S',a)-Q(S,A))]
        q_predict = self.q_table[s,a] #根據Q表 得到 預估值 predict
        #q_target 現實值
        if s_ != 'terminal': #下一步 不是最終
            q_target = r + self.gamma * self.q_table[s_,:].max() 
        else: # 下一步終結
            q_target = r 
        # 更新Q表
        self.q_table.loc[s,a] += self.lr * (q_target - q_predict)


In [42]:
# QT = QLearning(actionslist=ACTIONS)
# QT.choose_action(1)


In [43]:
# https://blog.csdn.net/joinquantdata/article/details/87254611?spm=1001.2101.3001.6650.6&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-6.no_search_link&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-6.no_search_link
class Market:
    def __init__(self,data) -> None:
        self.action_list = ['Wait','Buy','Sell','OpenInteres','close-avg','deal','amp_cover','vol_cover'] # 買進、賣出、觀望
        self.actions = len(self.action_list)
        self.data = data # 1分鐘 K線資料
        self.idx = 10 #預設開始跑策略的K線
        self.time = self.data.at[self.idx,'ndatetime'].time().strftime('%H:%M')
        self.OpenInterest = 0

    def get_state(self,idx):
        
        return self.data.loc[idx,:] #ndatetime  close_Avg  deal  amplitude  volume  close  close_diff

    def step(self,idx,action):
        state = self.get_state(idx)
        ntime = state['ndatetime'].time().strftime('%H:%M:%S')
        if ntime == '13:20:00':
            done = True
            reward = 0
            s_ = -100
        else:
            done = False
            reward = self.data.at[idx,'close_diff']
            s_ = idx + 1

        if action == 'Buy':
            pass
        if action == 'Sell':
            reward = reward * -1
        if action == 'Wait':
            reward = 0
        if s_ != -100:
            s_ = idx + 1
    
        return s_,reward,done

    def reset(self):
        self.idx=10
        return self.idx


In [44]:
def update(CandleData):
    evn = Market(CandleData)
    RL = QLearning(evn.action_list)
    
    for episode in range(10):
        idx = evn.reset()
        step_count = 0
        while True:
            state = evn.get_state(idx)
            action = RL.choose_action(state)
            idx_,reward,done = evn.step(action)
            state_ = get_data(idx_)
            step_count+=1
            RL.learn(state,action,reward,state_[:3])
            idx = idx_
            if done:
                break
            print(step_count)
    return RL.q_table



In [45]:
data = get_data()
evn = Market(data)
state = evn.get_state(10)
print(state)
RL = QLearning(evn.action_list)
RL.check_state_exist(state)
print(RL.q_table)
# qtable = update(data)
# print(qtable)

Ticks2021-12-23.txt
              ndatetime  close_Avg  deal  amplitude  volume  close  close_diff
295 2021-12-23 13:40:00  12.817568  1219          4     199  17947         0.0
296 2021-12-23 13:41:00  11.777778  1110          3     191  17946         1.0
297 2021-12-23 13:42:00  14.728188  1111          5     325  17949        -3.0
298 2021-12-23 13:43:00  15.675585  1146          3     237  17950        -1.0
299 2021-12-23 13:44:00  19.610000  1614          9    1156  17954        -4.0
ndatetime     2021-12-23 08:55:00
close_Avg                5.454545
deal                          446
amplitude                       4
volume                        224
close                       17944
close_diff                   -2.0
Name: 10, dtype: object


TypeError: Series.name must be a hashable type