In [3]:
from environments.single_stock import SingleStockEnv
from model_architecture.DQN_all import DQNAgents
from agent_training.train import train_agent
from visualisation.visualisation import visualize_trades_with_benchmark
from data_preperation.data_perp import prepare_data


In [4]:
df = prepare_data('CSCO')

env = SingleStockEnv(df)


                                Open       High        Low      Close  \
Date                                                                    
2020-02-27 00:00:00-05:00  35.583489  36.290331  34.488744  34.514606   
2020-02-28 00:00:00-05:00  33.359521  34.954226  33.057818  34.419785   
2020-03-02 00:00:00-05:00  34.609430  35.505911  33.549163  35.488670   
2020-03-03 00:00:00-05:00  35.566248  36.117933  34.230144  34.514606   
2020-03-04 00:00:00-05:00  35.100766  35.730028  34.307722  35.678307   

                             Volume  Dividends  Stock Splits        RSI  \
Date                                                                      
2020-02-27 00:00:00-05:00  51442900        0.0           0.0  18.407578   
2020-02-28 00:00:00-05:00  80188000        0.0           0.0  19.266035   
2020-03-02 00:00:00-05:00  48864300        0.0           0.0  21.311419   
2020-03-03 00:00:00-05:00  49294700        0.0           0.0  18.194535   
2020-03-04 00:00:00-05:00  30022100   

In [7]:
# Create agent
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgents(state_size, action_size, model_type='standard')

rewards_history, portfolio_history, best_state_dict = train_agent(env, agent, episodes=500)

agent.policy_net.load_state_dict(best_state_dict)

visualize_trades_with_benchmark(env, agent, df)

New best reward! Portfolio Value: 152162.5146810295 Total Reward: 306.2807478818631
Episode: 10, Reward: 320.92, Portfolio Value: $145428.10, Epsilon: 0.01
New best reward! Portfolio Value: 145428.10429516927 Total Reward: 320.92265748823615
New best reward! Portfolio Value: 158382.47610544672 Total Reward: 355.6241852237578
Episode: 20, Reward: 283.86, Portfolio Value: $153207.07, Epsilon: 0.01
New best reward! Portfolio Value: 160814.37853646785 Total Reward: 361.6627328095262
New best reward! Portfolio Value: 164583.05825444034 Total Reward: 383.59350442986323
New best reward! Portfolio Value: 169389.4945191704 Total Reward: 390.4075568958165
Episode: 30, Reward: 322.28, Portfolio Value: $153595.92, Epsilon: 0.01
Episode: 40, Reward: 344.91, Portfolio Value: $158031.22, Epsilon: 0.01
New best reward! Portfolio Value: 168372.16368392343 Total Reward: 396.58699021325197
Episode: 50, Reward: 211.33, Portfolio Value: $143581.18, Epsilon: 0.01
New best reward! Portfolio Value: 171759.790

In [None]:
agent.policy_net.load_state_dict(best_state_dict)

visualize_trades_with_benchmark(env, agent, df)

In [None]:
# Function to compare different models
import matplotlib.pyplot as plt
import numpy as np

def compare_models(env, episodes=100):
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    
    models = {
        'Standard DQN': DQNAgents(state_size, action_size, model_type='standard'),
        'Dueling DQN': DQNAgents(state_size, action_size, model_type='dueling'),
        'LSTM DQN': DQNAgents(state_size, action_size, model_type='lstm')
    }
    
    results = {}
    
    for model_name, agent in models.items():
        print(f"\nTraining {model_name}...")
        rewards_history, portfolio_history, _ = train_agent(env, agent, episodes=episodes)
        results[model_name] = {
            'rewards': rewards_history,
            'portfolio': portfolio_history
        }
    
    # Plot comparison results
    plt.figure(figsize=(15, 10))
    
    # Plot rewards
    plt.subplot(2, 1, 1)
    for model_name, data in results.items():
        plt.plot(data['rewards'], label=model_name)
    plt.title('Training Rewards Comparison')
    plt.xlabel('Episode')
    plt.ylabel('Total Reward')
    plt.legend()
    
    # Plot portfolio values
    plt.subplot(2, 1, 2)
    for model_name, data in results.items():
        plt.plot(data['portfolio'], label=model_name)
    plt.title('Portfolio Value Comparison')
    plt.xlabel('Episode')
    plt.ylabel('Portfolio Value ($)')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Print final statistics
    print("\nFinal Statistics:")
    for model_name, data in results.items():
        final_reward = data['rewards'][-1]
        final_portfolio = data['portfolio'][-1]
        print(f"\n{model_name}:")
        print(f"Final Reward: {final_reward:.2f}")
        print(f"Final Portfolio Value: ${final_portfolio:.2f}")
        print(f"Average Reward: {np.mean(data['rewards']):.2f}")
        print(f"Average Portfolio Value: ${np.mean(data['portfolio']):.2f}")
    
    return results

df = prepare_data('CSCO')  # or any other stock
env = SingleStockEnv(df)
results = compare_models(env, episodes=300)