In [10]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
# from statsmodels.tsa.seasonal import seasonal_decompose
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

In [3]:
# Define the ticker symbol for DJIA
ticker = "^DJI"

# Download historical data from Yahoo Finance
df = yf.download(ticker, start="2000-01-01", end="2024-01-01")

# Display the first few rows of the dataframe
print(df.info())

[*********************100%***********************]  1 of 1 completed
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6037 entries, 2000-01-03 to 2023-12-29
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       6037 non-null   float64
 1   High       6037 non-null   float64
 2   Low        6037 non-null   float64
 3   Close      6037 non-null   float64
 4   Adj Close  6037 non-null   float64
 5   Volume     6037 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 330.1 KB
None


In [4]:
# Ensure the data is sorted by date
df_time = df.sort_index()

# Perform time series decomposition on the open price
open_price = df_time['Open']
result = seasonal_decompose(open_price, model='additive', period=252)  # Assuming 252 trading days in a year

# Plot the decomposition
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(15, 12))

# Observed
result.observed.plot(ax=ax1)
ax1.set_ylabel('Observed')
ax1.set_title('Observed Open Prices')

# Trend
result.trend.plot(ax=ax2)
ax2.set_ylabel('Trend')
ax2.set_title('Trend Component')

# Seasonal
result.seasonal.plot(ax=ax3)
ax3.set_ylabel('Seasonal')
ax3.set_title('Seasonal Component')

# Residual
result.resid.plot(ax=ax4)
ax4.set_ylabel('Residual')
ax4.set_title('Residual Component')

plt.tight_layout()
plt.show()

NameError: name 'seasonal_decompose' is not defined

### Adding more features

In [11]:
# Calculate Moving Averages
df['SMA_20'] = df['Close'].rolling(window=20).mean()  # 20-day Simple Moving Average
df['SMA_50'] = df['Close'].rolling(window=50).mean()  # 50-day Simple Moving Average
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()  # 12-day Exponential Moving Average
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()  # 26-day Exponential Moving Average

# Calculate RSI
delta = df['Close'].diff(1)
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Handle missing values by filling forward
df.fillna(method='ffill', inplace=True)
df.dropna(inplace=True)

# Display the updated DataFrame with new features
print(df.head())

                    Open          High           Low         Close  \
Date                                                                 
2000-05-23  10539.120117  10557.450195  10416.780273  10422.269531   
2000-05-24  10420.900391  10554.879883  10364.009766  10535.349609   
2000-05-25  10529.870117  10558.650391  10266.000000  10323.919922   
2000-05-26  10322.889648  10387.660156  10258.809570  10299.240234   
2000-05-30  10302.309570  10528.150391  10302.309570  10527.129883   

               Adj Close     Volume        SMA_20        SMA_50        EMA_12  \
Date                                                                            
2000-05-23  10422.269531  157080000  10656.271533  10784.760176  10634.866825   
2000-05-24  10535.349609  225350000  10635.764014  10799.242363  10619.556484   
2000-05-25  10323.919922  184380000  10607.555029  10803.092559  10574.073936   
2000-05-26  10299.240234  129200000  10585.821533  10796.465371  10531.791828   
2000-05-30  10527.12988

In [12]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
scaled_data = scaler.fit_transform(df)

# Convert the scaled data back to a DataFrame with the same column names
scaled_df = pd.DataFrame(scaled_data, index=df.index, columns=df.columns)

# Display the first few rows of the scaled data
print(scaled_df.head())

                Open      High       Low     Close  Adj Close    Volume  \
Date                                                                      
2000-05-23 -0.789406 -0.795516 -0.795405 -0.803653  -0.803653 -0.753865   
2000-05-24 -0.803564 -0.795822 -0.801755 -0.790112  -0.790112 -0.155611   
2000-05-25 -0.790514 -0.795373 -0.813551 -0.815431  -0.815431 -0.514633   
2000-05-26 -0.815303 -0.815757 -0.814416 -0.818386  -0.818386 -0.998179   
2000-05-30 -0.817768 -0.799009 -0.809181 -0.791097  -0.791097 -0.680518   

              SMA_20    SMA_50    EMA_12    EMA_26       RSI      MACD  \
Date                                                                     
2000-05-23 -0.774469 -0.756633 -0.777748 -0.772309 -0.372503 -0.333055   
2000-05-24 -0.776937 -0.754879 -0.779588 -0.773540 -0.036292 -0.357914   
2000-05-25 -0.780332 -0.754412 -0.785052 -0.776568 -0.718098 -0.457218   
2000-05-26 -0.782948 -0.755215 -0.790131 -0.779593 -0.808907 -0.541075   
2000-05-30 -0.784661 -0.755380

### Create the Environment

In [13]:
class TradingEnv(gym.Env):
    def __init__(self, df):
        super(TradingEnv, self).__init__()
        self.df = df.reset_index(drop=True)
        self.current_step = 0
        self.initial_balance = 10000
        self.balance = self.initial_balance
        self.asset = 0
        self.net_worth = self.initial_balance
        self.transaction_cost = 0.001  # Example transaction cost (0.1%)
        self.max_position_size = 100000  # Limit on position size

        # Define the action space (Hold, Buy, Sell)
        self.action_space = spaces.Discrete(3)

        # Observation space: balance, asset, net worth, SMA, RSI, MACD
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(6,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.asset = 0
        self.net_worth = self.initial_balance
        return self._next_observation()

    def _next_observation(self):
        obs = np.array([
            self.balance, 
            self.asset, 
            self.net_worth,
            self.df.iloc[self.current_step]['SMA_20'],
            self.df.iloc[self.current_step]['RSI'],
            self.df.iloc[self.current_step]['MACD']
        ])
        return obs.astype(np.float32)

    def step(self, action):
        current_price = self.df.iloc[self.current_step]['Close']
        self.current_step += 1

        prev_net_worth = self.net_worth

        if action == 1:  # Buy
            amount_to_buy = min(self.balance, self.max_position_size)
            self.asset += (amount_to_buy * (1 - self.transaction_cost)) / current_price
            self.balance -= amount_to_buy
        elif action == 2:  # Sell
            self.balance += self.asset * current_price * (1 - self.transaction_cost)
            self.asset = 0

        self.net_worth = self.balance + self.asset * current_price

        # Ensure balance doesn't go negative
        if self.balance < 0:
            self.balance = 0
            self.asset = prev_net_worth / current_price
            self.net_worth = prev_net_worth

        # Calculate reward
        reward = self.net_worth - prev_net_worth

        done = self.current_step >= len(self.df) - 1

        obs = self._next_observation()
        return obs, reward, done, {}

    def render(self, mode='human'):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Asset: {self.asset}')
        print(f'Net Worth: {self.net_worth}')

### Trading Agent(s)

#### Q-Learning Agent

In [14]:
# Define the Q-learning Agent
class QLearningAgent:
    def __init__(self, state_bins, action_size):
        self.state_bins = state_bins
        self.action_size = action_size
        self.q_table = np.zeros(tuple(len(bins) + 1 for bins in state_bins) + (action_size,))
        self.learning_rate = 0.1
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01

    def discretize_state(self, state):
        state_indices = []
        for i, value in enumerate(state):
            state_index = np.digitize(value, self.state_bins[i]) - 1
            state_indices.append(state_index)
        return tuple(state_indices)

    def choose_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        return np.argmax(self.q_table[state])

    def learn(self, state, action, reward, next_state, done):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.gamma * self.q_table[next_state][best_next_action] * (1 - done)
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.learning_rate * td_error

        if done:
            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

In [15]:

# Performance Metrics Calculation
def calculate_metrics(balance_history, initial_balance=10000):
    balance_series = pd.Series(balance_history)
    returns = balance_series.pct_change().dropna()

    # Sharpe Ratio
    risk_free_rate = 0.0
    average_return = returns.mean()
    std_dev = returns.std()
    sharpe_ratio = (average_return - risk_free_rate) / std_dev if std_dev != 0 else 0

    # Cumulative Returns
    cumulative_returns = (balance_series.iloc[-1] / initial_balance) - 1

    # Maximum Drawdown
    rolling_max = balance_series.cummax()
    drawdown = (balance_series - rolling_max) / rolling_max
    max_drawdown = drawdown.min()

    return {
        'Sharpe Ratio': sharpe_ratio,
        'Cumulative Returns': cumulative_returns,
        'Maximum Drawdown': max_drawdown
    }

# Plotting Performance
def plot_performance(balance_history, asset_history, net_worth_history):
    plt.figure(figsize=(14, 7))
    plt.plot(balance_history, label='Balance')
    plt.plot(asset_history, label='Assets')
    plt.plot(net_worth_history, label='Net Worth')
    plt.xlabel('Time Steps')
    plt.ylabel('Value')
    plt.title('Trading Bot Performance')
    plt.legend()
    plt.show()

#### DQN Agent

In [27]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99    # discount rate
        self.epsilon = 1.0   # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)

    def _build_model(self):
        model = nn.Sequential(
            nn.Linear(self.state_size, 24),  # Input layer should match state_size (6)
            nn.ReLU(),
            nn.Linear(24, 24),               # Hidden layer
            nn.ReLU(),
            nn.Linear(24, self.action_size)  # Output layer should match action_size (3)
        )
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state)
        act_values = self.model(state)
        return np.argmax(act_values.detach().numpy())

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return

        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.FloatTensor(next_state)
                target = reward + self.gamma * torch.max(self.model(next_state)).item()

            state = torch.FloatTensor(state)
            target_f = self.model(state)

            # Ensure target_f is a clone of the original prediction to avoid in-place operations on the computational graph
            target_f = target_f.clone().detach()
            target_f[action] = target

            # Perform the backward pass and update the weights
            self.optimizer.zero_grad()
            loss = nn.MSELoss()(self.model(state), target_f.unsqueeze(0))
            loss.backward()
            self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

### Train the Trading Bot

In [17]:
# Create the environment
env = TradingEnv(scaled_df)

# Reset the environment
state = env.reset()

# Take a sample step
action = env.action_space.sample()  # Random action (for illustration)
next_state, reward, done, _ = env.step(action)

# Render the environment
env.render()

Step: 1
Balance: 10000.0
Asset: 0
Net Worth: 10000.0


#### Train the Q-learning Bot

In [18]:
# Define bins for state discretization
state_bins = [
    np.linspace(0, env.initial_balance * 2, 20),  # Balance bins
    np.linspace(0, env.initial_balance * 2, 20),  # Asset bins
    np.linspace(0, env.initial_balance * 2, 20),  # Net worth bins
    np.linspace(-5, 5, 20),                      # SMA_20 bins
    np.linspace(0, 100, 20),                     # RSI bins
    np.linspace(-5, 5, 20)                       # MACD bins
]

agent = QLearningAgent(state_bins, env.action_space.n)

#  Train the Q-learning Agent
num_episodes = 100

for episode in range(num_episodes):
    state = agent.discretize_state(env.reset())
    done = False
    total_reward = 0

    while not done:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        next_state = agent.discretize_state(next_state)
        agent.learn(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

    print(f"Episode {episode + 1}: Total Reward: {total_reward}")

env.render()

Episode 1: Total Reward: -9994.438022686312
Episode 2: Total Reward: -9995.901320072517
Episode 3: Total Reward: 12903861.78288681
Episode 4: Total Reward: 21814968.113301948
Episode 5: Total Reward: 1559801.528322975
Episode 6: Total Reward: -9999.89829420422
Episode 7: Total Reward: 2855418.538616876
Episode 8: Total Reward: -10000.014542896055
Episode 9: Total Reward: 1249.5534081670721
Episode 10: Total Reward: 135092.31610664912
Episode 11: Total Reward: -9999.995845177662
Episode 12: Total Reward: -74437.05670996814
Episode 13: Total Reward: -8577.934827293862
Episode 14: Total Reward: 8181454.293919381
Episode 15: Total Reward: -9999.999677221562
Episode 16: Total Reward: -17081.701134995677
Episode 17: Total Reward: -9998.698398250881
Episode 18: Total Reward: 262034663.29734993
Episode 19: Total Reward: -184577.6458305115
Episode 20: Total Reward: -9960.96205707423
Episode 21: Total Reward: -12030.556053762062
Episode 22: Total Reward: -8560.66479648588
Episode 23: Total Rewar

#### Train the DQN Bot

In [28]:
# Initialize Environment and Agent
env_dqn = TradingEnv(scaled_df)
state_size = env_dqn.observation_space.shape[0]
action_size = env_dqn.action_space.n
agent_dqn = DQNAgent(state_size, action_size)
batch_size = 32
num_episodes_dqn = 100

# Train the DQN Agent
for e in range(num_episodes_dqn):
    state = env_dqn.reset()
    total_reward = 0

    for time in range(5000):
        action = agent_dqn.act(state)
        next_state, reward, done, _ = env_dqn.step(action)
        total_reward += reward
        agent_dqn.remember(state, action, reward, next_state, done)
        state = next_state

        if done:
            print(f"DQN Episode {e + 1}/{num_episodes_dqn}, Reward: {total_reward}, Epsilon: {agent_dqn.epsilon}")
            break

        if len(agent_dqn.memory) > batch_size:
            agent_dqn.replay(batch_size)

  return F.mse_loss(input, target, reduction=self.reduction)


### Run the RL Trading Bot

#### Run Qlearning Bot

In [103]:
state = agent.discretize_state(env.reset())
done = False
while not done:
    action = agent.choose_action(state)
    next_state, reward, done, _ = env.step(action)
    next_state = agent.discretize_state(next_state)
    state = next_state
    env.render()

Step: 1
Balance: 0
Asset: -11454.673659117103
Net Worth: 9990.0
Step: 2
Balance: 0
Asset: -11454.673659117103
Net Worth: 9550.065407964763
Step: 3
Balance: 0
Asset: -11454.673659117103
Net Worth: 8864.146171320854
Step: 4
Balance: 0
Asset: -11454.673659117103
Net Worth: 8912.74570359538
Step: 5
Balance: 0
Asset: -11454.673659117103
Net Worth: 8795.936678124688
Step: 6
Balance: 8475.402947829842
Asset: 0
Net Worth: 8475.402947829842
Step: 7
Balance: 8475.402947829842
Asset: 0
Net Worth: 8475.402947829842
Step: 8
Balance: 8475.402947829842
Asset: 0
Net Worth: 8475.402947829842
Step: 9
Balance: 8475.402947829842
Asset: 0
Net Worth: 8475.402947829842
Step: 10
Balance: 0.0
Asset: -11655.491982057969
Net Worth: 8466.927544882012
Step: 11
Balance: 0.0
Asset: -11655.491982057969
Net Worth: 8592.396980487327
Step: 12
Balance: 0.0
Asset: -11655.491982057969
Net Worth: 8476.89622964967
Step: 13
Balance: 0.0
Asset: -11655.491982057969
Net Worth: 8530.682888881605
Step: 14
Balance: 0.0
Asset: -1165

#### Run DQN Bot

In [None]:
# Run the DQN Agent in the Environment
state = env_dqn.reset()
done = False

while not done:
    action = agent_dqn.act(state)
    next_state, reward, done, _ = env_dqn.step(action)
    state = next_state
    env_dqn.render()