#### PPO

In [None]:
import time
from stable_baselines3 import PPO

import torch.nn as nn
import torch
from cryptoportfolio.rlagent.rlagent_ppo import RLAgent as ppo_agent
from cryptoportfolio.rlagent.network import CustomCNN_PPO
from cryptoportfolio.rlagent.network import CustomActorCriticPolicy 
import pprint
import torch.nn as nn

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from cryptoportfolio.tools.eval_mod import custom_eval_callback_scenario4
from cryptoportfolio.tools.eval_mod import custom_eval_callback_testset

features_list = {"None": [],
        "RSI": ["RSI"],
        "mcd": ["mcd", "mcd_signal"],
        "SMA": ["SMA_50", "SMA_200"],
        "RSI_mcd": ["RSI", "mcd", "mcd_signal"],
        "RSI_SMA": ["RSI", "SMA_50", "SMA_200"],
        "OSZ": ["stoch_oszillator", "stoch_oszillator_signal"],
        "RSI_OSZ": ["RSI", "stoch_oszillator", "stoch_oszillator_signal"],
        "Bollinger": ["Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "Bollinger_mcd": ["mcd", "mcd_signal", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "RSI_SMA_Bollinger": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "mcd_OSZ": ["mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"],
        "Everything": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high", "mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"]
    }
activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU}

def get_batches(n_steps):
    print("n_steps:", n_steps)
    batches = []
    for i in range(10,300):
        if n_steps % i == 0:
            batches.append(i)
    return batches

def backtest_ppo(hyperparams, total_iterations, eval_freq, load_model=None, scenario=None):
    timer = int(time.time())
    
    pp = pprint.PrettyPrinter(indent=0)
    pp.pprint(hyperparams)

    """ First iteration """
    agent = ppo_agent(lookback_window_size=hyperparams["window_size"],
                      features=hyperparams["features"],
                      batch_size=hyperparams["batch_size"])
    agent.generate_portfolio(year="scenario4", synthetic=False, split="train")
    n_steps = int(agent._crash_length - agent._lookback_window_size)
    hyperparams.update({"n_steps": n_steps})
    hyperparams.update({"env": agent})
    hyperparams["policy_kwargs"].update(
        {"features_extractor_class": CustomCNN_PPO,
        "features_extractor_kwargs": dict(features_dim=13, agent_env=agent)}        
    )
    
    if load_model is not None:
        # Wrap the env
        env = Monitor(agent)
        env = DummyVecEnv([lambda: env])
        _ = env.reset()

        # Load the model and overwrite env
        model = PPO.load(f"models/ppo/{load_model}")
        model.env = env
        print("Model policy:", model.policy)
        
    else:
        logdir = f"logs/ppo/{scenario}"
        model = PPO(
            policy = hyperparams["policy"],
            env = hyperparams["env"],
            learning_rate = hyperparams["learning_rate"],
            n_steps = hyperparams["n_steps"],
            batch_size = hyperparams["batch_size"],
            n_epochs = hyperparams["n_epochs"],
            gamma = hyperparams["gamma"],
            gae_lambda = hyperparams["gae_lambda"],
            clip_range = hyperparams["clip_range"],
            ent_coef = hyperparams["ent_coef"],
            vf_coef = hyperparams["vf_coef"],
            max_grad_norm = hyperparams["max_grad_norm"],
            tensorboard_log = logdir,
            policy_kwargs = hyperparams["policy_kwargs"],
            verbose = 1,
            seed = hyperparams["seed"],
        )

    total_timesteps = total_iterations * n_steps

    # Creating the custom EvalCallback
    #eval_callback = custom_eval_callback_scenario4(hyperparams, total_timesteps, "PPO", eval_freq)
    eval_callback = custom_eval_callback_testset(hyperparams, total_timesteps, "PPO", eval_freq)
    model.learn(total_timesteps, tb_log_name=f"{timer}", callback=eval_callback)

    # Backtest
    test_agent = ppo_agent(lookback_window_size=hyperparams["window_size"],
                      features=hyperparams["features"],
                      batch_size=hyperparams["batch_size"])
    test_agent.generate_portfolio(year="2022", synthetic=False, split="whole")

    done = False
    obs = test_agent.reset()
    while done is False:
        w, _ = model.predict(torch.from_numpy(obs).float(), deterministic=True)
        obs, reward, done, info = test_agent.step(w)
    test_agent.render()
    
    return test_agent, model

hyperparams = {
        "policy": CustomActorCriticPolicy, 
        "clip_range": 0.4,
        "ent_coef" : 1.3396973226004333e-07,
        "features" : ["close", "low", "high"] + features_list["Bollinger"],
        "gae_lambda": 0.98,
        "gamma": 0.999,
        "learning_rate" : 0.3470439486864241,
        "max_grad_norm" : 0.9,
        "n_epochs" : 20,
        "seed" : 683,
        "vf_coef" : 0.7187328940807216,
        "window_size" : 141,
        "batch_size" : 106,
        "policy_kwargs": dict(
            layer_size=32,
            n_layers=1,
            activation_fn=activation_fn["relu"],
            ortho_init=False,
        )} 

total_iterations = 1; eval_freq = 1
scenario = f"scenario4_nosoft-cnn-mlp_it{total_iterations}"; load_model = None#"scenario4_nosoft-cnn-mlp_it50"
test_agent, model = backtest_ppo(hyperparams, total_iterations=total_iterations, eval_freq=eval_freq, load_model=load_model, scenario=scenario)

##### Saving the model

In [None]:
model.save(f"models/ppo/{scenario}")

##### Saving the results

In [None]:
from cryptoportfolio.tools.performance_measures import max_drawdown, sharpe_ratio

# Save the results
data = open("logs/baselines_performance_metrics.csv", "a")
data.write("\n")
data.write(test_agent._name + "_" + scenario); data.write(",")
data.write("2022"); data.write(",")
data.write(str(test_agent._lookback_window_size)); data.write(",")
data.write(str(max_drawdown(test_agent._portfolio_values))); data.write(",")
data.write(str(test_agent._portfolio_values[-1])); data.write(",")
data.write(str(sharpe_ratio(test_agent._rates_of_return))); data.write(",")
data.write(str(test_agent._sum_of_transaction_costs))
data.close()

# Save the results
data = open("logs/baselines_portfolio_values.csv", "a")   
data.write("\n")
data.write(test_agent._name + "_" + scenario); data.write(";")
data.write("2022"); data.write(";")
data.write(str(test_agent._lookback_window_size)); data.write(";")
data.write(str(list(test_agent._portfolio_values)))
data.close()

#### DDPG

In [None]:
import datetime, time
import pandas as pd
import gym
import numpy as np
from stable_baselines3 import PPO, DDPG
import torch.nn as nn
import torch
from typing import Any, Dict

from stable_baselines3 import DDPG
from cryptoportfolio.rlagent.rlagent_ddpg import RLAgent as ddpg_agent
from cryptoportfolio.rlagent.network import CustomCNN_DDPG
import pprint
import torch.nn.functional as F

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from cryptoportfolio.tools.eval_mod import custom_eval_callback_scenario4
from cryptoportfolio.tools.eval_mod import custom_eval_callback_testset

features_list = {"None": [],
        "RSI": ["RSI"],
        "mcd": ["mcd", "mcd_signal"],
        "SMA": ["SMA_50", "SMA_200"],
        "RSI_mcd": ["RSI", "mcd", "mcd_signal"],
        "RSI_SMA": ["RSI", "SMA_50", "SMA_200"],
        "OSZ": ["stoch_oszillator", "stoch_oszillator_signal"],
        "RSI_OSZ": ["RSI", "stoch_oszillator", "stoch_oszillator_signal"],
        "Bollinger": ["Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "Bollinger_mcd": ["mcd", "mcd_signal", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "RSI_SMA_Bollinger": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "mcd_OSZ": ["mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"],
        "Everything": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high", "mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"]
    }
net_arch = {"default":[400,300], "tiny":[10,10], "tiny+":[10,10,10], "small":[100,100], "medium":[300,300], "large":[500,500]}
activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU}


def backtest_ddpg(hyperparams, total_iterations, eval_freq,
                  load_model=False, scenario=None):
    timer = int(time.time())

    pp = pprint.PrettyPrinter()
    pp.pprint(hyperparams)

    """ First iteration """
    agent = ddpg_agent(lookback_window_size=hyperparams["window_size"],
                      features=hyperparams["features"])
    agent.generate_portfolio(year="scenario4", synthetic=False, split="train")
    agent.set_weight_storage(hyperparams["buffer_size"])
    agent.set_batch_size(hyperparams["batch_size"])
    
    hyperparams.update({"env": agent})
    hyperparams["policy_kwargs"].update(
        {"features_extractor_class": CustomCNN_DDPG,
        "features_extractor_kwargs": dict(features_dim=13, agent_env=agent)}        
    )
    
    if load_model is not None:
        # Wrap the env
        env = Monitor(agent)
        env = DummyVecEnv([lambda: env])
        _ = env.reset()

        # Load the model and overwrite env
        model = DDPG.load(f"models/ddpg/{load_model}")
        model.env = env

    else:
        logdir = f"logs/ddpg/{scenario}"
        model = DDPG(
            policy = "CnnPolicy", 
            env = hyperparams["env"],
            learning_rate = hyperparams["learning_rate"],
            buffer_size = hyperparams["buffer_size"],
            batch_size = hyperparams["batch_size"],
            learning_starts = hyperparams["learning_starts"],
            gamma = hyperparams["gamma"],
            tau = hyperparams["tau"],
            gradient_steps = hyperparams["gradient_steps"],
            tensorboard_log = logdir,
            policy_kwargs = hyperparams["policy_kwargs"],
            seed = hyperparams["seed"],
            verbose = 1,
        )

    total_timesteps = total_iterations * agent._crash_length
    
    # Creating the custom EvalCallback
    #eval_callback = custom_eval_callback_scenario4(hyperparams, total_timesteps, "DDPG", eval_freq)
    eval_callback = custom_eval_callback_testset(hyperparams, total_timesteps, "DDPG", eval_freq)
    model.learn(total_timesteps, tb_log_name=f"{timer}", callback=eval_callback)
    
    """ Backtest """
    test_agent = ddpg_agent(lookback_window_size=hyperparams["window_size"],
                      features=hyperparams["features"])
    test_agent.generate_portfolio(year="2022", synthetic=False, split="whole")

    actions = [np.ones(13)/13]
    done = False
    obs = test_agent.reset()
    while done is False:
        w, _ = model.predict(torch.from_numpy(obs).float(), deterministic=True)
        obs, reward, done, info = test_agent.step(w)
        action = F.softmax(torch.from_numpy(w), dim=0).numpy()
        actions.append(action)
    test_agent._weight_storage = actions
    test_agent.render()
    
    return test_agent, model

hyperparams = {
        "batch_size": 131,
        "buffer_size": 15000,
        "features" : ["close", "low", "high"] + features_list["RSI_SMA"],
        "gamma": 0.98,
        "gradient_steps": 2500,
        "learning_rate": 0.0741856008814114,
        "learning_starts" : 110,
        "seed" : 278,
        "tau": 0.005,
        "window_size" : 155,
        "policy_kwargs": dict(
            net_arch=net_arch["tiny"],
            activation_fn=activation_fn["elu"],
        )}

#total_iterations=50; eval_freq=10
total_iterations=1; eval_freq=1
scenario = f"scenario4_nosoft-cnn_it{total_iterations}"; load_model = None
test_agent, model = backtest_ddpg(hyperparams, total_iterations=total_iterations, eval_freq=eval_freq, scenario=scenario, load_model=load_model)

##### Saving the results

In [None]:
from cryptoportfolio.tools.performance_measures import max_drawdown, sharpe_ratio

# Save the results
data = open("logs/baselines_performance_metrics.csv", "a")
data.write("\n")
data.write(test_agent._name + "_" + scenario); data.write(",")
data.write("2022"); data.write(",")
data.write(str(test_agent._lookback_window_size)); data.write(",")
data.write(str(max_drawdown(test_agent._portfolio_values))); data.write(",")
data.write(str(test_agent._portfolio_values[-1])); data.write(",")
data.write(str(sharpe_ratio(test_agent._rates_of_return))); data.write(",")
data.write(str(test_agent._sum_of_transaction_costs))
data.close()

# Save the results
data = open("logs/baselines_portfolio_values.csv", "a")   
data.write("\n")
data.write(test_agent._name + "_" + scenario); data.write(";")
data.write("2022"); data.write(";")
data.write(str(test_agent._lookback_window_size)); data.write(";")
data.write(str(list(test_agent._portfolio_values)))
data.close()

##### Saving Model

In [None]:
model.save(f"models/ddpg/{scenario}")

##### Loading model and continuing learning

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

scenario = "_scenario2_nosoft-cnn_it50"

agent = ddpg_agent(lookback_window_size=hyperparams["window_size"],
                      features=hyperparams["features"])
agent.generate_portfolio(year="2021", synthetic=False, split="train")
agent.set_weight_storage(hyperparams["buffer_size"])
agent.set_batch_size(hyperparams["batch_size"])

env = Monitor(agent)
env = DummyVecEnv([lambda: env])
obs = env.reset()

new_model = DDPG.load(f"models/ddpg/{scenario}")
new_model.env = env

total_iterations = 50
total_timesteps = agent._crash_length * total_iterations
new_model.learn(total_timesteps, tb_log_name="scenario2_training", reset_num_timesteps=True)

In [None]:
new_model.tensorboard_log

In [None]:
# Backtest
test_agent = ddpg_agent(lookback_window_size=hyperparams["window_size"],
                    features=hyperparams["features"])
test_agent.generate_portfolio(year="2022", synthetic=False, split="whole")

actions = [np.ones(13)/13]
done = False
obs = test_agent.reset()
while done is False:
    w, _ = new_model.predict(torch.from_numpy(obs).float(), deterministic=True)
    obs, reward, done, info = test_agent.step(w)
    action = F.softmax(torch.from_numpy(w), dim=0).numpy()
    actions.append(action)
test_agent._weight_storage = actions
test_agent.render()

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

agent = ddpg_agent(lookback_window_size=hyperparams["window_size"],
                      features=hyperparams["features"])
agent.generate_portfolio(year="2021", synthetic=False, split="train")
agent.set_weight_storage(hyperparams["buffer_size"])
agent.set_batch_size(hyperparams["batch_size"])

model = DDPG(
        policy = "CnnPolicy", 
        env = agent,
        learning_rate = hyperparams["learning_rate"],
        buffer_size = hyperparams["buffer_size"],
        batch_size = hyperparams["batch_size"],
        learning_starts = hyperparams["learning_starts"],
        gamma = hyperparams["gamma"],
        tau = hyperparams["tau"],
        gradient_steps = hyperparams["gradient_steps"],
        policy_kwargs = hyperparams["policy_kwargs"],
        seed = hyperparams["seed"]
    )
model.learn(1)

new_model = DDPG.load(f"models/ddpg/{scenario}")
new_model.env = model.env
new_model.learn(100, tb_log_name="scenario2_training", reset_num_timesteps=False)

In [None]:
import datetime, time
from stable_baselines3 import PPO
from stable_baselines3 import DDPG
from cryptoportfolio.rlagent.rlagent_ddpg import RLAgent as ddpg_agent
from cryptoportfolio.rlagent.network import CustomCNN_DDPG
import torch.nn.functional as F

import numpy as np
import torch.nn as nn
import torch
import pprint
import torch.nn as nn
from stable_baselines3.common.buffers import RolloutBuffer

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

features_list = {"None": [],
        "RSI": ["RSI"],
        "mcd": ["mcd", "mcd_signal"],
        "SMA": ["SMA_50", "SMA_200"],
        "RSI_mcd": ["RSI", "mcd", "mcd_signal"],
        "RSI_SMA": ["RSI", "SMA_50", "SMA_200"],
        "OSZ": ["stoch_oszillator", "stoch_oszillator_signal"],
        "RSI_OSZ": ["RSI", "stoch_oszillator", "stoch_oszillator_signal"],
        "Bollinger": ["Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "Bollinger_mcd": ["mcd", "mcd_signal", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "RSI_SMA_Bollinger": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "mcd_OSZ": ["mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"],
        "Everything": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high", "mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"]
    }
net_arch = {"default":[400,300], "tiny":[10,10], "tiny+":[10,10,10], "small":[100,100], "medium":[300,300], "large":[500,500]}
activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU}

scenario1 = "ppo/scenario1_nosoft-cnn-mlp_it50"
scenario2 = "ppo/scenario2_nosoft-cnn-mlp_it50"
scenario3 = "ppo/scenario3_nosoft-cnn-mlp_it30"
scenario4 = "ppo/scenario4_nosoft-cnn-mlp_it50"

hyperparams = {
        "batch_size": 15,
        "buffer_size": 3000,
        "features" : ["close", "low", "high"] + features_list["RSI_mcd"],
        "gamma": 0.98,
        "gradient_steps": 500,
        "learning_rate": 0.03377375285211111,
        "learning_starts" : 30,
        "seed" : 501,
        "tau": 0.005,
        "window_size" : 156,
        "policy_kwargs": dict(
            net_arch=net_arch["tiny"],
            activation_fn=activation_fn["elu"],
)}

# Backtest
test_agent = ddpg_agent(lookback_window_size=hyperparams["window_size"],
                    features=hyperparams["features"])
test_agent.generate_portfolio(year="2022", synthetic=False, split="whole")

# Wrap the env
env = Monitor(test_agent)
env = DummyVecEnv([lambda: env])
_ = env.reset()

# Loading the model
scenario = "scenario4_nosoft-cnn_it50"
model = DDPG.load(f"models/ddpg/{scenario}")
#new_model.env = env

actions = [np.ones(13)/13]
done = False
obs = test_agent.reset()
while done is False:
    w, _ = model.predict(torch.from_numpy(obs).float(), deterministic=True)
    obs, reward, done, info = test_agent.step(w)
    action = F.softmax(torch.from_numpy(w), dim=0).numpy()
    actions.append(action)
test_agent._weight_storage = actions
test_agent.render()

In [None]:
import datetime, time
from stable_baselines3 import PPO
from cryptoportfolio.rlagent.rlagent_ppo import RLAgent as ppo_agent
from cryptoportfolio.rlagent.network import CustomCNN_PPO
import torch.nn.functional as F
from cryptoportfolio.rlagent.network import CustomActorCriticPolicy 

import numpy as np
import torch.nn as nn
import torch
import pprint
import torch.nn as nn

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

features_list = {"None": [],
        "RSI": ["RSI"],
        "mcd": ["mcd", "mcd_signal"],
        "SMA": ["SMA_50", "SMA_200"],
        "RSI_mcd": ["RSI", "mcd", "mcd_signal"],
        "RSI_SMA": ["RSI", "SMA_50", "SMA_200"],
        "OSZ": ["stoch_oszillator", "stoch_oszillator_signal"],
        "RSI_OSZ": ["RSI", "stoch_oszillator", "stoch_oszillator_signal"],
        "Bollinger": ["Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "Bollinger_mcd": ["mcd", "mcd_signal", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "RSI_SMA_Bollinger": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high"],
        "mcd_OSZ": ["mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"],
        "Everything": ["RSI", "SMA_50", "SMA_200", "Bollinger_middle" ,"Bollinger_low" ,"Bollinger_high", "mcd", "mcd_signal", "stoch_oszillator", "stoch_oszillator_signal"]
    }
activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU}

scenario1 = "ppo/scenario1_nosoft-cnn-mlp_it50"
scenario2 = "ppo/scenario2_nosoft-cnn-mlp_it50"
scenario3 = "ppo/scenario3_nosoft-cnn-mlp_it30"
scenario4 = "ppo/scenario4_nosoft-cnn-mlp_it50"

hyperparams = {
        "policy": CustomActorCriticPolicy, 
        "clip_range": 0.4,
        "ent_coef" : 1.3396973226004333e-07,
        "features" : ["close", "low", "high"] + features_list["Bollinger"],
        "gae_lambda": 0.98,
        "gamma": 0.999,
        "learning_rate" : 0.3470439486864241,
        "max_grad_norm" : 0.9,
        "n_epochs" : 20,
        "seed" : 683,
        "vf_coef" : 0.7187328940807216,
        "window_size" : 141,
        "batch_size" : 106,
        "policy_kwargs": dict(
            layer_size=32,
            n_layers=1,
            activation_fn=activation_fn["relu"],
            ortho_init=False,
        )} 

# Backtest
test_agent = ppo_agent(lookback_window_size=hyperparams["window_size"],
                    features=hyperparams["features"],
                    batch_size=hyperparams["batch_size"])
test_agent.generate_portfolio(year="2022", synthetic=False, split="whole")

# Loading the model
scenario = "scenario4_nosoft-cnn-mlp_it50"
model = PPO.load(f"models/ppo/{scenario}")
#new_model.env = env

done = False
obs = test_agent.reset()
while done is False:
    w, _ = model.predict(torch.from_numpy(obs).float(), deterministic=True)
    obs, reward, done, info = test_agent.step(w)
test_agent.render()