<a href="https://colab.research.google.com/github/abhinavarorags/CoolStuff/blob/test/DQN_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#Deep Q Network
import numpy as np
import pandas as pd
import tensorflow as tf
from collections import deque
import random

# Hyperparameters
episodes = 100
learning_rate = 0.001
gamma = 0.99
batch_size = 64
memory_size = 10000
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995

def load_data(file_path,url = 'https://raw.githubusercontent.com/abhinavarorags/CoolStuff/refs/heads/test/sample_data.csv'):
    # Load dataset
    try:
        data = pd.read_csv(file_path)
    except FileNotFoundError:
        # If file not found, download from URL
        data = pd.read_csv(url)
    # Splitting dataset into training and testing
    train_data = data.iloc[:20000]
    test_data = data.iloc[20000:]
    return train_data, test_data

# Load dataset
file_path = '/mnt/data/sample_data.csv'
train_data, test_data = load_data(file_path)

# Placeholder dataset parameters
state_space = train_data.shape[1] - 1  # Number of features in state (assuming last column is the action/reward)
action_space = 3  # Number of possible actions (Buy Long, Sell Short, Hold)

# Neural Network for Deep Q-Learning
class DQN(tf.keras.Model):
    def __init__(self, action_space):
        super(DQN, self).__init__()
        self.fc1 = tf.keras.layers.Dense(24, activation='relu')
        self.fc2 = tf.keras.layers.Dense(24, activation='relu')
        self.out = tf.keras.layers.Dense(action_space, activation='linear')

    def call(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return self.out(x)

# Experience Replay Memory
memory = deque(maxlen=memory_size)

def remember(state, action, reward, next_state, done):
    memory.append((state, action, reward, next_state, done))

def act(model, state, epsilon):
    if np.random.rand() <= epsilon:
        return random.randrange(action_space)
    q_values = model(np.array([state]))
    return np.argmax(q_values[0])

def replay(model, target_model, optimizer):
    if len(memory) < batch_size:
        return
    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target = reward + gamma * np.amax(target_model(np.array([next_state]))[0])
        target_f = model(np.array([state])).numpy()
        target_f[0][action] = target
        with tf.GradientTape() as tape:
            q_values = model(np.array([state]))
            loss = tf.keras.losses.MSE(target_f, q_values)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

def train_model(model, target_model, optimizer):
    epsilon = 1.0
    for e in range(episodes):
        sample = train_data.sample()  # Randomly sample a row from training data
        state = sample.iloc[:, :-1].values.flatten()  # Features as state
        total_reward = 0
        done = False
        while not done:
            action = act(model, state, epsilon)
            next_sample = train_data.sample()  # Get next state from training data
            next_state = next_sample.iloc[:, :-1].values.flatten()
            reward = np.random.rand()  # Placeholder for reward
            done = np.random.rand() > 0.95  # Randomly ending the episode
            remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
        replay(model, target_model, optimizer)
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay
        target_model.set_weights(model.get_weights())
        print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward:.2f}, Epsilon: {epsilon:.2f}")
    print("Training completed.")

def test_model(model, test_episodes=100):
    correct_predictions = {"Buy Long": 0, "Sell Short": 0, "Hold": 0}
    wrong_predictions = {"Buy Long": 0, "Sell Short": 0, "Hold": 0}

    def test_act(model, state):
        q_values = model(np.array([state]))
        return np.argmax(q_values[0])

    for e in range(test_episodes):
        sample = test_data.sample()  # Randomly sample a row from testing data
        state = sample.iloc[:, :-1].values.flatten()  # Features as state
        done = False
        while not done:
            action = test_act(model, state)
            next_sample = test_data.sample()  # Get next state from testing data
            next_state = next_sample.iloc[:, :-1].values.flatten()
            reward = np.random.rand()  # Placeholder for reward
            done = np.random.rand() > 0.95  # Randomly ending the episode
            action_name = ["Buy Long", "Sell Short", "Hold"][action]
            if reward > 0.7:  # Assuming a reward above 0.7 is a correct prediction
                correct_predictions[action_name] += 1
            else:
                wrong_predictions[action_name] += 1
            state = next_state

    print("Test Data saved with recommendations")
    print("\t\tCorrect Predictions  Wrong Predictions")
    for action in ["Buy Long", "Sell Short", "Hold"]:
        print(f"{action}\t\t{correct_predictions[action]}\t\t\t{wrong_predictions[action]}")

# Load, Train and Test Model
model = DQN(action_space)
target_model = DQN(action_space)
target_model.set_weights(model.get_weights())
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

train_model(model, target_model, optimizer)
test_model(model)


Episode 1/100, Total Reward: 12.26, Epsilon: 0.99
Episode 2/100, Total Reward: 23.26, Epsilon: 0.99
Episode 3/100, Total Reward: 3.17, Epsilon: 0.99
Episode 4/100, Total Reward: 21.16, Epsilon: 0.98
Episode 5/100, Total Reward: 3.33, Epsilon: 0.98
Episode 6/100, Total Reward: 15.78, Epsilon: 0.97
Episode 7/100, Total Reward: 1.74, Epsilon: 0.97
Episode 8/100, Total Reward: 18.46, Epsilon: 0.96
Episode 9/100, Total Reward: 0.46, Epsilon: 0.96
Episode 10/100, Total Reward: 1.81, Epsilon: 0.95
Episode 11/100, Total Reward: 37.68, Epsilon: 0.95
Episode 12/100, Total Reward: 3.02, Epsilon: 0.94
Episode 13/100, Total Reward: 30.16, Epsilon: 0.94
Episode 14/100, Total Reward: 0.80, Epsilon: 0.93
Episode 15/100, Total Reward: 24.74, Epsilon: 0.93
Episode 16/100, Total Reward: 5.31, Epsilon: 0.92
Episode 17/100, Total Reward: 7.46, Epsilon: 0.92
Episode 18/100, Total Reward: 5.42, Epsilon: 0.91
Episode 19/100, Total Reward: 20.20, Epsilon: 0.91
Episode 20/100, Total Reward: 2.31, Epsilon: 0.90


In [8]:
#Double - Deep Q Network
import numpy as np
import pandas as pd
import tensorflow as tf
from collections import deque
import random

# Hyperparameters
episodes = 100
learning_rate = 0.001
gamma = 0.99
batch_size = 64
memory_size = 10000
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995

def load_data(file_path, url='https://raw.githubusercontent.com/abhinavarorags/CoolStuff/refs/heads/test/sample_data.csv'):
    # Load dataset
    try:
        data = pd.read_csv(file_path)
    except FileNotFoundError:
        # If file not found, download from URL
        data = pd.read_csv(url)
    # Splitting dataset into training and testing
    train_data = data.iloc[:20000]
    test_data = data.iloc[20000:]
    return train_data, test_data

# Load dataset
file_path = '/mnt/data/sample_data.csv'
train_data, test_data = load_data(file_path)

# Placeholder dataset parameters
state_space = train_data.shape[1] - 1  # Number of features in state (assuming last column is the action/reward)
action_space = 3  # Number of possible actions (Buy Long, Sell Short, Hold)

# Neural Network for Deep Q-Learning
class DQN(tf.keras.Model):
    def __init__(self, action_space):
        super(DQN, self).__init__()
        self.fc1 = tf.keras.layers.Dense(24, activation='relu')
        self.fc2 = tf.keras.layers.Dense(24, activation='relu')
        self.out = tf.keras.layers.Dense(action_space, activation='linear')

    def call(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return self.out(x)

# Experience Replay Memory
memory = deque(maxlen=memory_size)

def remember(state, action, reward, next_state, done):
    memory.append((state, action, reward, next_state, done))

def act(model, state, epsilon):
    if np.random.rand() <= epsilon:
        return random.randrange(action_space)
    q_values = model(np.array([state]))
    return np.argmax(q_values[0])

def replay(model, target_model, optimizer):
    if len(memory) < batch_size:
        return
    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target = reward + gamma * target_model(np.array([next_state]))[0][np.argmax(model(np.array([next_state]))[0])]
        target_f = model(np.array([state])).numpy()
        target_f[0][action] = target
        with tf.GradientTape() as tape:
            q_values = model(np.array([state]))
            loss = tf.keras.losses.MSE(target_f, q_values)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

def train_model(model, target_model, optimizer):
    epsilon = 1.0
    for e in range(episodes):
        sample = train_data.sample()  # Randomly sample a row from training data
        state = sample.iloc[:, :-1].values.flatten()  # Features as state
        total_reward = 0
        done = False
        while not done:
            action = act(model, state, epsilon)
            next_sample = train_data.sample()  # Get next state from training data
            next_state = next_sample.iloc[:, :-1].values.flatten()
            reward = np.random.rand()  # Placeholder for reward
            done = np.random.rand() > 0.95  # Randomly ending the episode
            remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
        replay(model, target_model, optimizer)
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay
        target_model.set_weights(model.get_weights())
        print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward:.2f}, Epsilon: {epsilon:.2f}")
    print("Training completed.")

def test_model(model, test_episodes=100):
    correct_predictions = {"Buy Long": 0, "Sell Short": 0, "Hold": 0}
    wrong_predictions = {"Buy Long": 0, "Sell Short": 0, "Hold": 0}

    def test_act(model, state):
        q_values = model(np.array([state]))
        return np.argmax(q_values[0])

    for e in range(test_episodes):
        sample = test_data.sample()  # Randomly sample a row from testing data
        state = sample.iloc[:, :-1].values.flatten()  # Features as state
        done = False
        while not done:
            action = test_act(model, state)
            next_sample = test_data.sample()  # Get next state from testing data
            next_state = next_sample.iloc[:, :-1].values.flatten()
            reward = np.random.rand()  # Placeholder for reward
            done = np.random.rand() > 0.95  # Randomly ending the episode
            action_name = ["Buy Long", "Sell Short", "Hold"][action]
            if reward > 0.7:  # Assuming a reward above 0.7 is a correct prediction
                correct_predictions[action_name] += 1
            else:
                wrong_predictions[action_name] += 1
            state = next_state

    print("Test Data saved with recommendations")
    print("\t\tCorrect Predictions  Wrong Predictions")
    for action in ["Buy Long", "Sell Short", "Hold"]:
        print(f"{action}\t\t{correct_predictions[action]}\t\t\t{wrong_predictions[action]}")

# Load, Train and Test Model
model = DQN(action_space)
target_model = DQN(action_space)
target_model.set_weights(model.get_weights())
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

train_model(model, target_model, optimizer)
test_model(model)


Episode 1/100, Total Reward: 7.34, Epsilon: 0.99
Episode 2/100, Total Reward: 5.21, Epsilon: 0.99
Episode 3/100, Total Reward: 14.48, Epsilon: 0.99
Episode 4/100, Total Reward: 13.90, Epsilon: 0.98
Episode 5/100, Total Reward: 5.73, Epsilon: 0.98
Episode 6/100, Total Reward: 0.82, Epsilon: 0.97
Episode 7/100, Total Reward: 10.92, Epsilon: 0.97
Episode 8/100, Total Reward: 6.65, Epsilon: 0.96
Episode 9/100, Total Reward: 3.74, Epsilon: 0.96
Episode 10/100, Total Reward: 11.04, Epsilon: 0.95
Episode 11/100, Total Reward: 4.41, Epsilon: 0.95
Episode 12/100, Total Reward: 7.78, Epsilon: 0.94
Episode 13/100, Total Reward: 12.44, Epsilon: 0.94
Episode 14/100, Total Reward: 17.83, Epsilon: 0.93
Episode 15/100, Total Reward: 4.35, Epsilon: 0.93
Episode 16/100, Total Reward: 4.57, Epsilon: 0.92
Episode 17/100, Total Reward: 7.32, Epsilon: 0.92
Episode 18/100, Total Reward: 1.00, Epsilon: 0.91
Episode 19/100, Total Reward: 3.36, Epsilon: 0.91
Episode 20/100, Total Reward: 16.65, Epsilon: 0.90
Ep