# Trading Recommendation Model (DQN)


#Environment

In [24]:
import numpy as np
# from data_manager import download_stock_data, preprocess_data, save_data

class StockTradingEnv:
    def __init__(self, data):
        self.data = data
        self.n_step = len(self.data)
        self.current_step = None
        self.cash_in_hand = None
        self.shares_held = None
        self.current_stock_price = None
        self.total_shares_owned = 0
        self.initial_investment = 10000
        self.action_space = 3
        self.state_size = 4
        self.reset()

    def reset(self):
        self.current_step = 0
        self.cash_in_hand = self.initial_investment
        self.shares_held = 0
        self.total_shares_owned = 0
        self.current_stock_price = self.data['Close'].iloc[self.current_step]
        return self._get_state()

    def step(self, action):
        self.current_stock_price = self.data['Close'].iloc[self.current_step]
        prev_val = self._get_val()

        if action == 0:
            self._buy_stock()
        elif action == 1:
            self._sell_stock()

        self.current_step += 1
        if self.current_step >= len(self.data):
            done = True
            self.current_step = 0
        else:
            done = False

        cur_val = self._get_val()
        reward = cur_val - prev_val
        info = {'current_portfolio_value': cur_val}

        print(f"Step: {self.current_step}, Action: {action}, Done: {done}, Portfolio Value: {cur_val}, Reward: {reward}")

        return self._get_state(), reward, done, info

    def _get_state(self):
        return np.array([self.current_stock_price, self.shares_held, self.cash_in_hand, self.total_shares_owned])

    def _get_val(self):
        return self.shares_held * self.current_stock_price + self.cash_in_hand

    def _buy_stock(self):
        if self.cash_in_hand >= self.current_stock_price:
            self.shares_held += 1
            self.cash_in_hand -= self.current_stock_price
            self.total_shares_owned += 1

    def _sell_stock(self):
        if self.shares_held > 0:
            self.shares_held -= 1
            self.cash_in_hand += self.current_stock_price
            self.total_shares_owned = max(0, self.total_shares_owned - 1)




#Model

In [25]:
import numpy as np
import random
from collections import deque
from tensorflow.keras import models, layers, optimizers

class DQN:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99  # Higher discount rate for long-term profit
        self.epsilon = 1.0  # Initial exploration rate
        self.epsilon_min = 0.05  # Reduce the minimum exploration rate
        self.epsilon_decay = 0.997  # Slower decay for better exploration-exploitation balance
        self.target_update_interval = 10  # Update the target model every 10 steps
        self.target_model = self._build_model()
        self.model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.target_update_counter = 0

    def _build_model(self):
        """Neural network for Deep Q-learning model."""
        model = models.Sequential()
        model.add(layers.Dense(48, activation='relu', input_shape=(self.state_size,)))
        model.add(layers.Dense(48, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=optimizers.Adam(learning_rate=0.0005))
        return model

    def update_target_model(self):
        """Update the target model to match the main model."""
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        """Select an action based on the state (epsilon-greedy policy)."""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        if len(state.shape) == 1:
            state = np.expand_dims(state, axis=0)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        """Replay function for full batch processing and loss reporting."""
        if len(self.memory) < batch_size:
            print("Not enough samples for replay.")
            return None

        minibatch = random.sample(self.memory, batch_size)
        losses = []

        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                q_next = self.target_model.predict(next_state, verbose=0)[0]
                target += self.gamma * np.amax(q_next)

            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target

            loss = self.model.fit(state, target_f, epochs=1, verbose=0).history['loss'][0]
            losses.append(loss)

        avg_loss = np.mean(losses) if losses else None
        print(f"Average batch loss: {avg_loss}")

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        self.target_update_counter += 1
        if self.target_update_counter % self.target_update_interval == 0:
            self.update_target_model()

        return avg_loss

    def load(self, name):
        """Load the entire model from a file."""
        self.model = models.load_model(name)
        self.target_model.set_weights(self.model.get_weights())

    def save(self, name):
        """Save the entire model to a file."""
        self.model.save(name)

#Train

In [39]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**UTLILTY FUNCTIONS**

In [27]:
def validate_dqn(val_env, dqn, batch_size):
    """Evaluate the DQN model on validation data and calculate validation accuracy."""
    print("Starting validation...")
    state = val_env.reset()
    state = np.reshape(state, [1, val_env.state_size])
    total_loss = []
    correct_predictions = 0
    total_predictions = 0

    while True:
        action = dqn.act(state)
        next_state, reward, done, _ = val_env.step(action)
        next_state = np.reshape(next_state, [1, val_env.state_size])

        # Check if the action taken was profitable
        correct_predictions += reward > 0
        total_predictions += 1

        # Calculate loss without fitting
        target = reward
        if not done:
            q_next = dqn.model.predict(next_state, verbose=0)[0]
            target += dqn.gamma * np.amax(q_next)

        target_f = dqn.model.predict(state, verbose=0)
        target_f[0][action] = target

        # Calculate loss manually without fitting the model
        loss = np.mean((target_f - state) ** 2)
        total_loss.append(loss)

        if done:
            break
        state = next_state

    # Calculate accuracy as the ratio of profitable actions to total actions
    validation_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
    avg_loss = np.mean(total_loss) if total_loss else None

    print(f"Validation completed. Accuracy: {validation_accuracy}, Loss: {avg_loss}")
    return avg_loss, validation_accuracy





def evaluate_on_test_set(test_data, dqn):
    """Evaluate the DQN model's final performance on the test set."""
    test_env = StockTradingEnv(data=test_data)
    state = test_env.reset()
    state = np.reshape(state, [1, test_env.state_size])

    correct_predictions = 0
    total_predictions = 0
    total_loss = []

    while True:
        action = dqn.act(state)
        next_state, reward, done, _ = test_env.step(action)
        next_state = np.reshape(next_state, [1, test_env.state_size])

        # Check if the action was profitable
        correct_predictions += reward > 0
        total_predictions += 1

        # Compute the loss
        target = reward
        if not done:
            q_next = dqn.model.predict(next_state, verbose=0)[0]
            target += dqn.gamma * np.amax(q_next)

        target_f = dqn.model.predict(state, verbose=0)
        target_f[0][action] = target

        loss = dqn.model.fit(state, target_f, epochs=1, verbose=0).history['loss'][0]
        total_loss.append(loss)

        if done:
            break
        state = next_state

    test_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
    return np.mean(total_loss), test_accuracy


In [33]:
from sklearn.model_selection import train_test_split

def aggregate_and_split_half(directory, test_size=0.2, validation_size=0.2):
    files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith('.csv')]
    all_data = pd.DataFrame()
    for file in files:
        df = pd.read_csv(file)
        all_data = pd.concat([all_data, df], ignore_index=True)

    # Randomly sample half of the dataset
    sampled_data = all_data.sample(frac=0.5, random_state=42)

    train_val_data, test_data = train_test_split(sampled_data, test_size=test_size, shuffle=True)
    train_data, val_data = train_test_split(train_val_data, test_size=validation_size, shuffle=True)

    print(f"Training data shape: {train_data.shape}")
    print(f"Validation data shape: {val_data.shape}")
    print(f"Test data shape: {test_data.shape}")

    return train_data, val_data, test_data

def compute_reward(portfolio_value, previous_value):
    if portfolio_value > previous_value:
        return (portfolio_value - previous_value) * 2  # Double the reward for profitable actions
    else:
        return (portfolio_value - previous_value) - 1  # Penalize poor trades more heavily

# Modify the training function to include the new DQN structure and reward function
def train_dqn(episodes, batch_size, directory):
    train_data, val_data, test_data = aggregate_and_split_data(directory)
    train_env = StockTradingEnv(data=train_data)
    val_env = StockTradingEnv(data=val_data)
    state_size = train_env.state_size
    action_size = 3

    dqn = DQN(state_size, action_size)

    total_rewards = []
    episode_losses = []
    best_reward = -float('inf')
    save_path = '/content/drive/My Drive/best_dqn_model.h5'

    for episode in range(episodes):
        state = train_env.reset()
        state = np.reshape(state, [1, state_size])
        total_reward = 0
        episode_loss = []

        while True:
            action = dqn.act(state)
            next_state, reward, done, info = train_env.step(action)
            reward = compute_reward(info['current_portfolio_value'], train_env._get_val())
            next_state = np.reshape(next_state, [1, state_size])

            dqn.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if done:
                average_loss = np.mean(episode_loss) if episode_loss else None
                total_rewards.append(total_reward)
                episode_losses.append(average_loss)

                print(f"Episode {episode + 1}/{episodes} finished with Reward: {total_reward}, "
                      f"Avg Loss: {average_loss}, Portfolio Value: {info['current_portfolio_value']}")
                break

            if len(dqn.memory) > batch_size:
                loss = dqn.replay(batch_size)
                if loss is not None:
                    episode_loss.append(loss)

        val_loss, val_accuracy = validate_dqn(val_env, dqn, batch_size)
        print(f"Validation Loss after Episode {episode + 1}: {val_loss}, "
              f"Validation Accuracy: {val_accuracy}")

        if total_reward > best_reward:
            best_reward = total_reward
            dqn.save(save_path)
            print(f"New best model saved with Reward: {best_reward}")

    print("Evaluating on Test Set")
    test_loss, test_accuracy = evaluate_on_test_set(test_data, dqn)
    print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

    print("Training completed.")
    print(f"Final Avg Reward: {np.mean(total_rewards)}")
    print(f"Final Avg Loss: {np.mean([l for l in episode_losses if l is not None])}")

# Set the directory where 'historical_data' is stored
historical_data_directory = '/content/drive/MyDrive/historical_data'

# Train the DQN model with the specified episodes and batch size
train_dqn_half_data(1000, 16, historical_data_directory)

Training data shape: (17116, 8)
Validation data shape: (4279, 8)
Test data shape: (5349, 8)
Step: 1, Action: 0, Done: False, Portfolio Value: 10000.0, Reward: 0.0
Step: 2, Action: 1, Done: False, Portfolio Value: 9999.883790447473, Reward: 0.0
Step: 3, Action: 0, Done: False, Portfolio Value: 9999.883790447473, Reward: 0.0
Step: 4, Action: 1, Done: False, Portfolio Value: 9999.846514869992, Reward: 0.0
Step: 5, Action: 0, Done: False, Portfolio Value: 9999.846514869992, Reward: 0.0
Step: 6, Action: 1, Done: False, Portfolio Value: 9999.613369138857, Reward: 0.0
Step: 7, Action: 2, Done: False, Portfolio Value: 9999.613369138857, Reward: 0.0
Step: 8, Action: 2, Done: False, Portfolio Value: 9999.613369138857, Reward: 0.0
Step: 9, Action: 2, Done: False, Portfolio Value: 9999.613369138857, Reward: 0.0
Step: 10, Action: 2, Done: False, Portfolio Value: 9999.613369138857, Reward: 0.0
Step: 11, Action: 0, Done: False, Portfolio Value: 9999.613369138857, Reward: 0.0
Step: 12, Action: 2, Done

KeyboardInterrupt: 

#Training model on only AAPL data for testing (Testing Model)


In [29]:
# import pandas as pd
# import os

# def load_and_reduce_aapl_data(directory):
#     """Load and reduce the AAPL stock data by half for testing purposes."""
#     aapl_file = [file for file in os.listdir(directory) if 'AAPL' in file and file.endswith('.csv')]

#     if len(aapl_file) != 1:
#         raise FileNotFoundError("AAPL data not found or multiple files match the pattern.")

#     # Load the data
#     aapl_data = pd.read_csv(os.path.join(directory, aapl_file[0]))
#     print(f"Original AAPL data shape: {aapl_data.shape}")

#     # Reduce data size by half
#     reduced_data = aapl_data.sample(frac=0.5, random_state=42)
#     print(f"Reduced AAPL data shape: {reduced_data.shape}")

#     # Split the reduced data into training and testing datasets
#     return train_test_split(reduced_data, test_size=0.2, shuffle=True)


In [30]:
# def train_dqn_on__aapl(episodes, batch_size, directory):
#     print("Loading reduced AAPL data...")
#     train_data, test_data = load_and_reduce_aapl_data(directory)

#     # Initialize environments using only the reduced AAPL data
#     train_env = StockTradingEnv(data=train_data)
#     val_env = StockTradingEnv(data=test_data)
#     state_size = train_env.state_size
#     action_size = 3

#     dqn = DQN(state_size, action_size)

#     total_rewards = []
#     episode_losses = []
#     best_reward = -float('inf')
#     # Use the .h5 extension to save the entire model
#     save_path = 'best_dqn_reduced_aapl_model.h5'
#     validation_interval = 5  # Validate every 5 episodes

#     for episode in range(episodes):
#         print(f"Starting Episode {episode + 1}/{episodes}...")
#         state = train_env.reset()
#         state = np.reshape(state, [1, state_size])
#         total_reward = 0
#         episode_loss = []

#         for step in range(100):  # Limit the steps per episode
#             action = dqn.act(state)
#             next_state, reward, done, info = train_env.step(action)
#             next_state = np.reshape(next_state, [1, state_size])

#             dqn.remember(state, action, reward, next_state, done)
#             state = next_state
#             total_reward += reward

#             if done:
#                 break

#             # Perform experience replay if sufficient samples are available
#             if len(dqn.memory) > batch_size:
#                 loss = dqn.replay(batch_size)
#                 if loss is not None:
#                     episode_loss.append(loss)

#         average_loss = np.mean(episode_loss) if episode_loss else None
#         total_rewards.append(total_reward)
#         episode_losses.append(average_loss)

#         print(f"Episode {episode + 1}/{episodes} finished with Reward: {total_reward}, "
#               f"Avg Loss: {average_loss}, Portfolio Value: {info['current_portfolio_value']}")

#         # Perform validation evaluation every `validation_interval` episodes
#         if (episode + 1) % validation_interval == 0:
#             print(f"Starting validation for Episode {episode + 1}")
#             val_loss, val_accuracy = validate_dqn(val_env, dqn, batch_size)
#             print(f"Validation Loss after Episode {episode + 1}: {val_loss}, "
#                   f"Validation Accuracy: {val_accuracy}")

#         # Save the entire model if the current episode reward is greater
#         if total_reward > best_reward:
#             best_reward = total_reward
#             dqn.save(save_path)
#             print(f"New best reduced AAPL model saved with Reward: {best_reward}")

#     # Test set evaluation
#     print("Evaluating on Test Set")
#     test_loss, test_accuracy = evaluate_on_test_set(test_data, dqn)
#     print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

#     print("Training completed.")
#     print(f"Final Avg Reward: {np.mean(total_rewards)}")
#     print(f"Final Avg Loss: {np.mean([l for l in episode_losses if l is not None])}")



# # def train_dqn_on_aapl(episodes, batch_size, directory):
# #     print("Loading reduced AAPL data...")
# #     train_data, test_data = load_and_reduce_aapl_data(directory)
# #     train_env = StockTradingEnv(data=train_data)
# #     val_env = StockTradingEnv(data=test_data)
# #     state_size = train_env.state_size
# #     action_size = 3
# #     dqn = DQN(state_size, action_size)

# #     for episode in range(episodes):
# #         state = train_env.reset()
# #         state = np.reshape(state, [1, state_size])
# #         total_reward = 0
# #         for step in range(100):  # Limit the number of steps per episode for debugging
# #             action = dqn.act(state)
# #             next_state, reward, done, info = train_env.step(action)
# #             print(f"Step {step}: Reward={reward}, Done={done}")
# #             if done:
# #                 break
# #             state = np.reshape(next_state, [1, state_size])
# #             total_reward += reward

# #         print(f"Episode {episode + 1} finished. Total Reward: {total_reward}")


In [31]:
# from sklearn.model_selection import train_test_split
# # Set your directory path for the historical data
# historical_data_directory = '/content/drive/My Drive/historical_data'

# # Number of episodes and batch size for training
# episodes = 50  # For initial testing, adjust as needed
# batch_size = 32

# # Call the function to train the model on AAPL stock data
# train_dqn_on__aapl(episodes, batch_size, historical_data_directory)


#Testing

In [37]:
from tensorflow.keras import models

dqn_model = models.load_model('/content/best_dqn_reduced_aapl_model.h5', compile=False)

# Manually compile the model
dqn_model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss='mse'
)

In [38]:
test_data = np.array([[50.0, 5, 10000.0, 15]])  # Example: [current_price, shares_held, cash_in_hand, total_shares_owned]
test_data = test_data.reshape(1, -1)  # Ensure it's in a 2D shape, with (batch_size, input_dim)

# Make predictions using the loaded model
predictions = dqn_model.predict(test_data, verbose=0)

# Interpret predictions
# Assuming an action space with three actions (0: Buy, 1: Sell, 2: Hold)
actions = ['Buy', 'Sell', 'Hold']
predicted_action = np.argmax(predictions[0])

# Output the prediction
print(f"Predicted Action: {actions[predicted_action]}")
print(f"Raw Model Output: {predictions}")

Predicted Action: Buy
Raw Model Output: [[10.745877   0.867387  -4.8331656]]


In [45]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model

# Load your trained model
dqn_model = load_model('/content/best_dqn_reduced_aapl_model.h5', compile=False)
dqn_model.compile(optimizer='adam', loss='mse')

# Read the CSV file
csv_path = '/content/drive/MyDrive/AAPL_daily_data.csv'
data = pd.read_csv(csv_path)

# Specify the feature columns expected by the model
required_features = ['Open', 'High', 'Low', 'Close']  # Replace with your actual features
data = data[required_features]

# Normalize the features (adjust if needed)
def normalize_features(df):
    return (df - df.min()) / (df.max() - df.min())

# Apply normalization to the last row
last_row = data.iloc[-1]
last_row_normalized = normalize_features(data).iloc[-1].values.reshape(1, -1)

# Make a prediction for the latest row
prediction = dqn_model.predict(last_row_normalized, verbose=0)

# Interpret the prediction
actions = ['Buy', 'Sell', 'Hold']
predicted_action = np.argmax(prediction)
print(f"Predicted Action: {actions[predicted_action]}")
print(f"Raw Model Output: {prediction}")


Predicted Action: Sell
Raw Model Output: [[-0.1452975   0.12315124 -0.09859727]]
