In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import random

tickers = ["AB.PA", "A2A.MI", "ABCA.PA", "ABNX.PA", "ABVX.PA", "AC.PA", "ADP.PA", "AIR.PA"]
start_date = "2007-07-01"
end_date = "2023-10-01"

num_actions = 5

initial_wallet = 10000

window_size = 10 # how many previous days to look at to take a decision

num_epochs = 3
batch_size = 64

Download data

In [None]:
# Fetch historical data for the stocks
data = yf.download(tickers, start='2022-01-01', end='2023-01-01')

def remove_top_column_name(df):
    return df.droplevel(0, axis=1)

data = data[['Close']]
data = remove_top_column_name(data)

data.head(5)

Preprocess data

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the data column by column
scaler = MinMaxScaler(feature_range=(-1, 1))

normalized_data = pd.DataFrame()
normalized_data[tickers] = scaler.fit_transform(data[tickers])

normalized_data.head(5)

In [None]:
def create_sequences(data, window_size):
    sequences = []

    for i in range(len(data) - window_size):
        sequence = data[i : i + window_size]
        sequences.append(sequence)
    return np.array(sequences)

# Create sliding window sequences
sequences = create_sequences(normalized_data, window_size)

print(sequences)

In [None]:
# Define the state space for the model (input shape)
input_shape = sequences.shape[1:]  # Shape of each sequence

print(input_shape)

Define Reinforcement Learning Environment

In [None]:
class StockTradingEnvironment:
    def __init__(self, data, initial_wallet=10000):
        self.data = data
        self.reset(initial_wallet)

    def reset(self, initial_wallet):
        self.current_step = 0
        self.wallet = initial_wallet
        self.positions = {stock: 0 for stock in self.data.columns}  # Initialize positions for each stock
        self.initial_prices = self.data.iloc[0]  # Initial prices of stocks
        self.current_prices = self.initial_prices  # Current prices start at initial prices
        self.done = False
        self.history = []  # To store the history of actions taken

    def step(self, action):
        if self.done:
            raise ValueError("Episode is done, please reset the environment.")

        # Update the environment based on the action taken
        self.current_step += 1
        self.current_prices = self.data.iloc[self.current_step]

        # Perform action based on the chosen action code
        # Action codes: 0 - Buy, 1 - Buy with stop loss, 2 - Sell, 3 - Hold
        # Implement your action handling logic here based on the action code

        if action == 0:  # Buy
            for stock, price in zip(self.positions.keys(), self.current_prices):
                if self.wallet >= price:
                    self.wallet -= price
                    self.positions[stock] += 1
                else:
                    break # Insufficient funds for buying the stock
        elif action == 1:  # Sell
            for stock, position in self.positions.items():
                if position > 0:
                    sell_price = self.current_prices[stock]
                    self.wallet += sell_price * position
                    self.positions[stock] -= position
                else:
                    # No position to sell for the stock
                    break
        elif action == 3:  # Hold
            pass

        # Calculate the reward based on the action taken (Example: based on wallet changes)
        reward = self.calculate_reward(action)

        # Check termination conditions (if episode is done)
        if self.current_step == len(self.data) - 1:
            self.done = True

        # Collect relevant information for next state
        next_state = {
            'wallet': self.wallet,
            'positions': self.positions,
            'prices': self.current_prices
            # Add other relevant state information
        }

        # Append action to history for monitoring or analysis
        self.history.append(action)

        return next_state, reward, self.done, {}

    def calculate_reward(self, action):
        # Calculate reward based on the action taken and its impact on wallet, positions, etc.
        # Example: Reward based on changes in wallet amount after the action
        # Return the calculated reward
        return 0  # Placeholder reward value, replace with actual calculation

    # Implement additional methods as needed for action handling, state representation, etc.


Model Architecture

In [None]:

# Assuming 'input_shape' is defined from Step 2
# Define your neural network architecture using Keras Sequential API

model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(num_actions, activation='softmax')  # Output layer for action predictions
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
# Assuming 'sequences' and 'actions' are prepared appropriately
model.fit(sequences, actions, epochs=num_epochs, batch_size=batch_size)