### Imports
The *gym_anytrading* and *crypto_env* imports are needed to create a gymnasium environment.

In [None]:
import utils
import crypto_env

import gym_anytrading
import gymnasium as gym
import matplotlib.pyplot as plt
import pandas as pd
import quantstats as qs

from stable_baselines3 import A2C, PPO, DQN

### Defining the DataFrame

In [None]:
dataset_path = "data/crypto/ada-usd.csv"
dataset_type = "crypto-v0"  # "stocks-v0", "forex-v0", "crypto-v0"

df = pd.read_csv(
    dataset_path,
    header=0,
    parse_dates=["Date"],
    index_col="Date",
)
df.head()

### Creating the environment

In [None]:
seed = 69  # Nice

total_num_episodes = 10
total_learning_timesteps = 100_000

window_size = 15
end_index = len(df)

env = gym.make(
    dataset_type,
    df=df,
    window_size=window_size,
    frame_bound=(window_size, end_index),
)

# Matplotlib
plot_settings = {}
plot_data = {"x": [i for i in range(1, total_num_episodes + 1)]}

### Training and testing the model

1. With Advantage Actor-Critic algorithm
2. With Proximal Policy Optimization algorithm
3. With random actions

In [None]:
def train_and_get_rewards(model_name):
    print(f"Training {model_name} model…")

    if model_name == "PPO":
        model = PPO("MlpPolicy", env)
    elif model_name == "DQN":
        model = DQN("MlpPolicy", env)
    elif model_name == "A2C":
        model = A2C("MlpPolicy", env)
    else:
        model = None

    rewards, info = utils.train_test_model(model, env, seed, total_learning_timesteps, total_num_episodes)
    _, _, avg_res = utils.get_results(rewards, model_name, print_results=True)
    plot_data[f"{model_name}_rewards"] = rewards
    plot_settings[f"{model_name}_rewards"] = {"label": model_name}

    profit = info[0]["total_profit"]
    # money_spent = env.unwrapped.get_money_spent()
    #money_left = env.unwrapped.get_wallet_value()
    #roi = 100 * (money_left - money_spent) / money_spent
    roi = (profit - 1) * 100

    print(f"Total Profit = {profit:.8f}")
   # print(f"Total Money Spent = {money_spent:.2f}")
    #print(f"Money Left = {money_left:.2f}")
    print(f"ROI = {roi:.2f}%")


train_and_get_rewards("DQN")
train_and_get_rewards("PPO")
train_and_get_rewards("A2C")

### Plotting the results

In [None]:
data = pd.DataFrame(plot_data)
plt.figure(figsize=(12, 6))
for key in plot_data:
    if key == "x":
        continue
    line = plt.plot("x", key, data=data, linewidth=1, label=plot_settings[key]["label"])


plt.xlabel("episode")
plt.ylabel("reward")
plt.title("Random vs Agents")
plt.legend()
plt.show()