#A2C 1 - instalacje:

In [None]:
!pip install stable-baselines3

#A2C 2 - kod:

In [None]:
import torch
import os
from typing import List
import numpy as np
import pandas as pd
from torch import nn
from gym import Env, spaces
import matplotlib.pyplot as plt
from datetime import datetime
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from google.colab import drive
drive.mount('/content/gdrive')

#A2C 3 - kod:

In [None]:
class LSTM_Model(BaseFeaturesExtractor):
    def __init__(self, observation_space, input_size=36, features_dim=36, hidden_size=128, layers_count=3, dropout=0.2, device="cuda"):
        super(LSTM_Model, self).__init__(observation_space, features_dim)
        self.layers_count = layers_count
        self.hidden_size = hidden_size
        self.device = device

        self.lstm = nn.LSTM(input_size, hidden_size, layers_count, dropout=dropout, batch_first=True).to(device)
        self.linear_1 = nn.Linear(hidden_size * observation_space.shape[1], hidden_size).to(device)
        self.linear_2 = nn.Linear(hidden_size, features_dim).to(device)
        self.dropout = nn.Dropout(dropout)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.squeeze(x, dim=-3)
        x = torch.nan_to_num(x)
        x, (_, _) = self.lstm(x)
        x = torch.flatten(x, start_dim=1)
        x = self.linear_1(x)
        x = nn.functional.relu(x)
        x = self.linear_2(x)
        x = self.dropout(x)
        x = self.sigmoid(x)
        return x

#A2C 4 - kod:

In [None]:
def znorm(data):
    std, mean = torch.std_mean(data, dim=-2)
    data = (data - mean) / (std + 1e-8)
    return data

def read_file(fname, v, rolling_len, offset):
    df = pd.read_excel(fname)
    data = df.iloc[offset:offset+rolling_len, 1:].values
    if v == "train":
        data = data[:int(len(data) * 8 / 10)]
    elif v == "test":
        data = data[int(len(data) * 8 / 10):]
    else:
        pass
    return data.shape[1], torch.Tensor(data)

class FinanceEnv(Env):
    def __init__(
        self,
        fname,
        v,
        rolling_len,
        offset
    ):
        super().__init__()

        self.observation_length = 20
        self.sequence_count, self.raw_sequences = read_file(fname, v, rolling_len, offset)

        self.total_profit = 0
        self.total_profits = []

        self.curr_step = 0

        first_state = self.read_frame(0)
        self.observation_space = spaces.Box(
            low=-100, high=100, shape=(first_state.unsqueeze(0).shape)
        )
        self.action_space = spaces.Box(low=-1, high=1, shape=(self.sequence_count,))

        self.profit_history = []

    def read_frame(self, index):
        if index + self.observation_length < self.raw_sequences.shape[-2] - 1:
            frame = self.raw_sequences[index: index + self.observation_length, :]
            return torch.tensor(frame)
        else:
            return None

    def step(self, actions: List[float]):

        actions = actions.flatten()
        actions = actions / np.abs(actions).sum()
        done = False

        shift = 1
        prices = self.read_frame(self.curr_step + shift)

        if prices != None:
            prices = prices[-2:, :]
            price_change = (prices[-1, :] - prices[-shift-1, :]) / prices[-shift-1, :]
            profit = (torch.tensor(actions) * price_change).sum().item()

            self.total_profit += profit
            self.profit_history += [profit]

            reward = profit

            self.curr_step += 1

            next_state = self.read_frame(self.curr_step).unsqueeze(0)
            next_state = znorm(next_state)

        elif prices == None:
            done = True
            self.total_profits.append(self.total_profit)
            next_state = np.empty(self.observation_space.shape, dtype=object)
            reward = 0

        return (
            next_state,
            reward,
            done,
            {
                "curr_step": self.curr_step,
                "rewards": [reward],
                "wallet": self.total_profit,
            },
        )

    def reset(self):
        self.total_profit = 0
        self.curr_step = 0

        self.profit_history = []

        observation = self.read_frame(0).unsqueeze(0)

        return observation

    def render(self, mode="human"):
        pass

class SaveOnBestTrainingRewardCallback(BaseCallback):

    def __init__(self, check_freq, rolling_len, offset, verbose=1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.best_mean_reward = -np.inf
        self.wallets = []
        self.train_wallets = []

    def _on_step(self) -> bool:
        num = len(self.training_env.envs[0].total_profits) - len(self.wallets)
        if num > 0:
            test = FinanceEnv('gdrive/MyDrive/PracaMag/Dane.xlsx', "test", rolling_len, offset)
            o = test.reset()
            d = False
            m = None
            while not d:
                a, _ = model.predict(o, deterministic=True)
                o, _, d, m = test.step(a)
            for _ in range(num):
                self.wallets.append(m["wallet"])
            train = FinanceEnv('gdrive/MyDrive/PracaMag/Dane.xlsx', "train", rolling_len, offset)
            o = train.reset()
            d = False
            m = None
            while not d:
                a, _ = model.predict(o, deterministic=True)
                o, _, d, m = train.step(a)
            for _ in range(num):
                self.train_wallets.append(m["wallet"])
        return True

#A2C 5 - kod:

In [None]:
epochs = int(40000)
rolling_len = 500

now = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
res_dir = "gdrive/MyDrive/PracaMag/results/{}_A2C".format(now)

os.mkdir(res_dir)

wallets = []
naive_wallets = []

offset = 0

for i in range(6):
    env = FinanceEnv('gdrive/MyDrive/PracaMag/Dane.xlsx', "train", rolling_len, offset)
    model = A2C(
        "MlpPolicy",
        env,
        policy_kwargs={
            "features_extractor_class": LSTM_Model,
            "features_extractor_kwargs": {
                "input_size": env.sequence_count,
                "features_dim": env.sequence_count,
            }
        },
        learning_rate=2e-5,
        gamma=0.0,
        verbose=0
    )
    callback = SaveOnBestTrainingRewardCallback(500, rolling_len, offset)
    model.learn(total_timesteps=epochs, callback=callback)

    env = None
    test_env = FinanceEnv('gdrive/MyDrive/PracaMag/Dane.xlsx', "test", rolling_len, offset)

    obs = test_env.reset()
    wallet = []
    actions = []

    done = False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = test_env.step(action)
        wallet += [info["wallet"]]
        actions += [action]

    naive_score = 0
    done = False
    obs = test_env.reset()
    while not done:
        action = torch.full((test_env.sequence_count,), 1 / test_env.sequence_count)
        obs, reward, done, info = test_env.step(action)
        naive_score = info["wallet"]
        actions += [action]

    naive_wallets.append(naive_score)

    fig = plt.figure()
    plt.title(label="Final wallet through training")
    plt.xlabel('No of runs through data set')
    plt.ylabel('Final wallet')
    plt.plot(callback.wallets, label="test")
    plt.plot(callback.train_wallets, label="train")
    plt.legend(loc='upper center')
    fig.savefig("{}/{}_a".format(res_dir, i))
    plt.close()
    fig = plt.figure()
    plt.title(label="Test wallet through days")
    plt.ylabel("Final wallet")
    plt.xlabel('day of trading')
    plt.plot(wallet)
    fig.savefig("{}/{}_b".format(res_dir, i))
    plt.close()

    wallets.append(wallet[-1])
    offset += 200
    test_env = None

fig = plt.figure()
plt.title(label="Final wallet")
plt.plot(list(range(1, len(naive_wallets)+1)), wallets, label="rl")
plt.plot(list(range(1, len(naive_wallets)+1)), naive_wallets, label="naive")
plt.legend()
fig.savefig("{}/wallet".format(res_dir))
plt.close()
np.savetxt("{}/walletarr".format(res_dir), wallets)
np.savetxt("{}/naive_walletarr".format(res_dir), naive_wallets)