In [1]:
import pandas as pd
import numpy as np
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import yfinance as yf

2024-05-31 11:31:19.330865: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-31 11:31:19.331026: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-31 11:31:19.332843: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-31 11:31:19.355870: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
# 定義股票列表
stocks = ['2330.TW', '2317.TW', '6505.TW', '2412.TW', '2882.TW']  # 添加更多股票代碼 # , '1301.TW', '2308.TW', '3008.TW', '2002.TW', '2454.TW'
num_stocks = len(stocks)

# 抓取多支股票資料
all_stock_data = []

for stock in stocks:
    stock_data = yf.download(stock, start='2023-10-01', end='2023-12-31')
    stock_data = stock_data.reset_index()
    stock_data = stock_data[['Date', 'High', 'Low', 'Open', 'Close']]
    stock_data.columns = ['date', 'high', 'low', 'open', 'close']
    stock_data['ticker'] = stock  # 添加一列來標識股票
    all_stock_data.append(stock_data)

# 合併所有股票數據
combined_data = pd.concat(all_stock_data)
combined_data['date'] = pd.to_datetime(combined_data['date'])
combined_data = combined_data.sort_values(by=['ticker', 'date']).reset_index(drop=True)

# 將資料輸出至CSV檔案
combined_data.to_csv('./RL_combined_10_stock_prices_datetime_2023.csv', index=False)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


### Dataset Preprocessing

In [18]:
# 讀取CSV檔案並處理數據
data = pd.read_csv('./RL_combined_10_stock_prices_datetime_2023.csv', parse_dates=['date'])
state_size = 4  # 'high', 'low', 'open', 'close'

processed_data = []

for stock in stocks:
    stock_data = data[data['ticker'] == stock].sort_values(by='date').reset_index(drop=True)
    stock_data = stock_data[['high', 'low', 'open', 'close']].values
    processed_data.append(stock_data)

min_length = min(len(stock) for stock in processed_data)
processed_data = [stock[:min_length] for stock in processed_data]
processed_data = np.hstack(processed_data)

assert processed_data.shape == (min_length, state_size * num_stocks)

np.save('./processed_stock_data.npy', processed_data)

### MultiStockRLAgent Definition

In [2]:
class MultiStockRLAgent:
    def __init__(self, state_size, action_size, num_stocks, initial_cash, initial_stocks):
        self.state_size = state_size * num_stocks  # 多支股票的狀態大小
        self.action_size = action_size  # 每支股票的行動大小
        self.num_stocks = num_stocks  # 股票數量
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # 折扣率
        self.epsilon = 0.5   # 初始探索率，減少為 0.5
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        
        self.cash = initial_cash  # 初始資金
        self.stocks = initial_stocks  # 初始持有股票及其數量，格式為 {'ticker': shares}

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size * self.num_stocks, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size * self.num_stocks)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

    def interpret_action(self, action):
        stock_index = action // self.action_size
        stock_action = action % self.action_size
        return stock_index, stock_action

### Agent Training

In [3]:
def compute_reward(action, state, next_state, num_stocks):
    reward = 0
    
    # 將狀態和下一個狀態轉換為對應的股票價格
    state_prices = np.reshape(state, (num_stocks, -1))
    next_state_prices = np.reshape(next_state, (num_stocks, -1))
    
    # 獎勵計算
    for i in range(num_stocks):
        current_price = state_prices[i, -1]  # 當前收盤價
        next_price = next_state_prices[i, -1]  # 下一天的收盤價
        
        if action == 0:  # 保持
            reward += -1  # 小的負獎勵以減少不動作的傾向
        elif action == 1:  # 買入
            reward += next_price - current_price  # 價格上升則獎勵為正，反之為負
        elif action == 2:  # 賣出
            reward += current_price - next_price  # 價格下降則獎勵為正，反之為負
    
    return reward

# def compute_reward(action, state, next_state, num_stocks):
#     reward = 0
    
#     # 將狀態和下一個狀態轉換為對應的股票價格
#     state_prices = np.reshape(state, (num_stocks, -1))
#     next_state_prices = np.reshape(next_state, (num_stocks, -1))
    
#     # 獎勵計算
#     for i in range(num_stocks):
#         current_price = state_prices[i, -1]  # 當前收盤價
#         next_price = next_state_prices[i, -1]  # 下一天的收盤價
        
#         if action == 0:  # 保持
#             reward += -1  # 小的負獎勵以減少不動作的傾向
#         elif action == 1:  # 買入
#             reward += next_price - current_price  # 價格上升則獎勵為正，反之為負
#         elif action == 2:  # 賣出
#             reward += current_price - next_price  # 價格下降則獎勵為正，反之為負
    
#     return reward




# def train_agent(agent, stock_data, episodes, batch_size, num_stocks):
#     for e in range(episodes):
#         state = np.reshape(stock_data[0], [1, agent.state_size])
#         total_reward = 0  # 記錄每個 episode 的總 reward
#         for time in range(1, len(stock_data)):
#             print(f"Episode {e}/{episodes}, Time {time}/{len(stock_data)}")
#             action = agent.act(state)
#             stock_index, stock_action = agent.interpret_action(action)
#             next_state = np.reshape(stock_data[time], [1, agent.state_size])
#             reward = compute_reward(stock_action, state, next_state, num_stocks)
#             total_reward += reward  # 累加每個時間步的 reward
#             done = time == len(stock_data) - 1
#             agent.remember(state, action, reward, next_state, done)
#             state = next_state
#             print(f"Action: {action} (Stock: {stock_index}, Action: {stock_action}), Reward: {reward}")  # 打印每次動作和對應的 reward
#             if done:
#                 print(f"Episode: {e}/{episodes}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")
#                 break
#             if len(agent.memory) > batch_size:
#                 agent.replay(batch_size)



def train_agent(agent, stock_data, episodes, batch_size, num_stocks):
    for e in range(episodes):
        state = np.reshape(stock_data[0], [1, agent.state_size])
        total_reward = 0  # 記錄每個 episode 的總 reward
        for time in range(1, len(stock_data)):
            print(f"Episode {e}/{episodes}, Time {time}/{len(stock_data)}")
            action = agent.act(state)
            stock_index, stock_action = agent.interpret_action(action)
            next_state = np.reshape(stock_data[time], [1, agent.state_size])
            reward = compute_reward(stock_action, state, next_state, num_stocks)
            total_reward += reward  # 累加每個時間步的 reward
            done = time == len(stock_data) - 1
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            print(f"Action: {action} (Stock: {stock_index}, Action: {stock_action}), Reward: {reward}")  # 打印每次動作和對應的 reward
            if done:
                print(f"Episode: {e}/{episodes}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

In [4]:
# def trade(agent, transformer_model, stock_data, num_stocks):
#     state = np.reshape(stock_data[0], [1, agent.state_size])
#     for time in range(1, len(stock_data)):
#         action = agent.act(state)
#         predicted_prices = [transformer_model.predict(stock_data[time - 30:time, stock_index]) for stock_index in range(num_stocks)]
#         for stock_index in range(num_stocks):
#             if action == 1:  # 買入
#                 Buy_Stock("your_account", "your_password", stock_index, 1, predicted_prices[stock_index])
#             elif action == 2:  # 賣出
#                 Sell_Stock("your_account", "your_password", stock_index, 1, predicted_prices[stock_index])
#         state = np.reshape(stock_data[time], [1, agent.state_size])

def trade(agent, transformer_model, stock_data, num_stocks):
    state = np.reshape(stock_data[0], [1, agent.state_size])
    for time in range(1, len(stock_data)):
        action = agent.act(state)
        predicted_prices = [transformer_model.predict(stock_data[time - 30:time, stock_index]) for stock_index in range(num_stocks)]
        for stock_index in range(num_stocks):
            ticker = stocks[stock_index]
            if action == 1:  # 買入
                # 假設我們買入1股
                if agent.cash >= predicted_prices[stock_index]:  # 確保有足夠資金
                    agent.cash -= predicted_prices[stock_index]
                    if ticker in agent.stocks:
                        agent.stocks[ticker] += 1
                    else:
                        agent.stocks[ticker] = 1
                    print(f"Bought 1 share of {ticker} at {predicted_prices[stock_index]}")
            elif action == 2:  # 賣出
                # 假設我們賣出1股
                if ticker in agent.stocks and agent.stocks[ticker] > 0:
                    agent.cash += predicted_prices[stock_index]
                    agent.stocks[ticker] -= 1
                    print(f"Sold 1 share of {ticker} at {predicted_prices[stock_index]}")
        state = np.reshape(stock_data[time], [1, agent.state_size])
    print(f"Final cash: {agent.cash}")
    print(f"Final stocks: {agent.stocks}")


In [5]:
if __name__ == "__main__":
    num_stocks = 5  # 假設有10支股票
    state_size = 4  # 每支股票的狀態維度(features)
    action_size = 3  # 每支股票的動作數量（買、賣、保持）# consider to expand the action space to include multiple scale of buy/sell
    initial_cash = 1000000  # 初始資金
    initial_stocks = {'2330.TW': 10, '2317.TW': 5}  # 初始持有股票及其數量
    agent = MultiStockRLAgent(state_size, action_size, num_stocks, initial_cash, initial_stocks)

    # 載入處理後的數據
    processed_data = np.load('./processed_stock_data.npy')

    # 確認 state_size 和 num_stocks 正確性
    print(f"State size: {state_size}")
    print(f"Number of stocks: {num_stocks}")
    print(f"Agent state size: {agent.state_size}")
    print(f"Processed data shape: {processed_data.shape}")

    # 訓練 Agent
    train_agent(agent, processed_data, episodes=5, batch_size=32, num_stocks=num_stocks)

    # 保存模型
    # agent.save("multi_stock_rl_agent_weights.h5")

    # # 載入模型並進行交易
    # agent.load("multi_stock_rl_agent_weights.h5")
    # transformer_model = ...  # 載入預訓練的 Transformer 模型
    # trade(agent, transformer_model, processed_data, num_stocks)


State size: 4
Number of stocks: 5
Agent state size: 20
Processed data shape: (63, 20)
Episode 0/5, Time 1/63
Action: 3 (Stock: 1, Action: 0), Reward: -5
Episode 0/5, Time 2/63
Action: 2 (Stock: 0, Action: 2), Reward: 12.450000762939453
Episode 0/5, Time 3/63
Action: 10 (Stock: 3, Action: 1), Reward: 9.099998474121094
Episode 0/5, Time 4/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 5/63
Action: 13 (Stock: 4, Action: 1), Reward: 17.900001525878906
Episode 0/5, Time 6/63
Action: 1 (Stock: 0, Action: 1), Reward: 7.200000762939453
Episode 0/5, Time 7/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 8/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 9 (Stock: 3, Action: 0), Reward: -5
Episode 0/5, Time 9/63
Action: 11 (Stock: 3, Action: 2), Reward: -5.650001525878906
Episode 0/5, Tim

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 13/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 9 (Stock: 3, Action: 0), Reward: -5
Episode 0/5, Time 14/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 15/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 9 (Stock: 3, Action: 0), Reward: -5
Episode 0/5, Time 16/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 17/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 18/63
Action: 6 (Stock: 2, Action: 0), Reward: -5
Episode 0/5, Time 19/63
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Action: 0 (Stock: 0, Action: 0), Reward: -5
Episode 0/5, Time 20/63
[1m1/1[0m [32m━━━━━━━━━━━━

KeyboardInterrupt: 

In [11]:
agent.save("multi_stock_rl_agent.weights.h5")

In [15]:
import yfinance as yf
import pandas as pd
import numpy as np

# 定義股票列表
stocks = ['2330.TW', '2317.TW', '6505.TW', '2412.TW', '2882.TW', '1301.TW', '2308.TW', '3008.TW', '2002.TW', '2454.TW']  # 添加更多股票代碼
num_stocks = len(stocks)

# 抓取多支股票資料
all_stock_data = []

for stock in stocks:
    stock_data = yf.download(stock, start='2010-01-01', end='2023-12-31')
    stock_data = stock_data.reset_index()
    stock_data = stock_data[['Date', 'High', 'Low', 'Open', 'Close']]
    stock_data.columns = ['date', 'high', 'low', 'open', 'close']
    stock_data['ticker'] = stock  # 添加一列來標識股票
    all_stock_data.append(stock_data)

# 合併所有股票數據
combined_data = pd.concat(all_stock_data)
combined_data['date'] = pd.to_datetime(combined_data['date'])
combined_data = combined_data.sort_values(by=['ticker', 'date']).reset_index(drop=True)

# 將資料輸出至CSV檔案
combined_data.to_csv('./test.csv', index=False)

# 讀取CSV檔案並處理數據
data = pd.read_csv('./test.csv', parse_dates=['date'])
state_size = 4  # 'high', 'low', 'open', 'close'

processed_data = []

for stock in stocks:
    stock_data = data[data['ticker'] == stock].sort_values(by='date').reset_index(drop=True)
    stock_data = stock_data[['high', 'low', 'open', 'close']].values
    processed_data.append(stock_data)

min_length = min(len(stock) for stock in processed_data)
processed_data = [stock[:min_length] for stock in processed_data]
processed_data = np.hstack(processed_data)

assert processed_data.shape == (min_length, state_size * num_stocks)

np.save('./processed_stock_data.npy', processed_data)

# 初始化Agent並進行訓練
initial_cash = 1000000  # 初始資金
initial_stocks = {'2330.TW': 10, '2317.TW': 5}  # 初始持有股票及其數量
agent = MultiStockRLAgent(state_size, 3, num_stocks, initial_cash, initial_stocks)

# 確認 state_size 和 num_stocks 正確性
print(f"State size: {state_size}")
print(f"Number of stocks: {num_stocks}")
print(f"Agent state size: {agent.state_size}")
print(f"Processed data shape: {processed_data.shape}")

# # 讀取處理後的數據
# processed_data = np.load('./processed_stock_data.npy')

# # 訓練 Agent
# train_agent(agent, processed_data, episodes=100, batch_size=32, num_stocks=num_stocks)

# # 保存模型
# agent.save("multi_stock_rl_agent_weights.h5")


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

State size: 4
Number of stocks: 10
Agent state size: 40
Processed data shape: (3429, 40)



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
