In [None]:
import pandas as pd

df_shares_INTC = pd.read_csv("INTC.csv")
df_shares_INTC.name = "intel"
df_shares_IBM = pd.read_csv("IBM.csv")
df_shares_IBM.name = "ibm"
df_shares_NVDA = pd.read_csv("NVDA.csv")
df_shares_NVDA.name = "nvidia"
df_shares_AMD = pd.read_csv("AMD.csv")
df_shares_META = pd.read_csv("META.csv")
df_shares_GOOGLE = pd.read_csv("alphabet.csv")
df_shares_CISCO = pd.read_csv("CSCO.csv")

In [None]:
%pip install gym-anytrading
%pip install stable-baselines3

url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.10/dist-packages/ lib/python3.10/site-packages/talib --strip-components=3
import talib

import gymnasium as gym

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt

from gym_anytrading.envs import StocksEnv, Actions, Positions

def reshape_obs(state, action, actual_ws):
    reshaped_state = state[:, 0]
    ws = len(reshaped_state)

    rsi_timeperiod = 9
    rsi_9 = talib.RSI(np.float64(reshaped_state[ws - rsi_timeperiod - 1:ws]), timeperiod=rsi_timeperiod)[-1]
    rsi_9 = rsi_9 / 100

    sma_200d = np.float32(talib.SMA(np.array(reshaped_state, dtype=np.float64), timeperiod=200))[-1]
    sma_200d = (reshaped_state[-1] / sma_200d) - 1

    reshaped_state = reshaped_state[ws - actual_ws:ws]

    reshaped_state = (reshaped_state - np.mean(reshaped_state)) / np.std(reshaped_state)
    reshaped_state = np.append(reshaped_state, action)
    reshaped_state = np.append(reshaped_state, rsi_9)
    reshaped_state = np.append(reshaped_state, sma_200d)
    return reshaped_state

class MyStocksEnv(StocksEnv):
    def __init__(self, actual_ws, **kwargs):
        super().__init__(**kwargs)

        self.actual_ws = actual_ws
        self.shape = (self.actual_ws + 3, )
        INF = 1e10
        self.observation_space = gym.spaces.Box(
            low=-INF, high=INF, shape=self.shape, dtype=np.float32,
        )

        self.last_action = 0

        self.number_of_trades = 0
        self.days_holding_position = 1
        self.history_days_holding_position = []

        self._total_reward = 0.
        self.trade_fee_bid_percent = 0
        self.trade_fee_ask_percent = 0


    def is_trade(self, action):
        if (action == Actions.Sell.value and self._position == Positions.Long):
            return True
        return False


    def _calculate_reward(self, action):
        step_reward = 0

        if self.is_trade(action) or (self._truncated and self._position == Positions.Long):
            self.number_of_trades += 1
            current_price = self.prices[self._current_tick]
            last_trade_price = self.prices[self._last_trade_tick]
            step_reward = (current_price / last_trade_price) - 1

        return step_reward


    def reset(self, seed=None, options=None):
        super().reset(seed=seed, options=options)
        self.action_space.seed(int((self.np_random.uniform(0, seed if seed is not None else 1))))

        self.number_of_trades = 0
        self.days_holding_position = 1
        self.history_days_holding_position = []

        self._truncated = False
        self._current_tick = self._start_tick
        self._last_trade_tick = self._current_tick - 1
        self._position = Positions.Short
        self._position_history = (self.window_size * [None]) + [self._position]
        self._total_reward = 0.
        self._total_profit = 1.  # unit
        self._first_rendering = True
        self.history = {}

        observation = self._get_observation()
        observation = reshape_obs(observation, 0, self.actual_ws)
        info = self._get_info()

        if self.render_mode == 'human':
            self._render_frame()

        return observation, info


    def step(self, action):
        self._truncated = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self.history_days_holding_position.append(self.days_holding_position)
            self._truncated = True

        step_reward = self._calculate_reward(action)
        self._total_reward += step_reward

        self._update_profit(action)

        trade = False
        if (
            (action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)
        ):
            trade = True

        if trade:
            self._position = self._position.opposite()
            self._last_trade_tick = self._current_tick

        self._position_history.append(self._position)
        observation = self._get_observation()
        observation = reshape_obs(observation, action, self.actual_ws)
        info = self._get_info()
        self._update_history(info)

        if self.last_action == action:
            self.days_holding_position += 1
        else:
            self.history_days_holding_position.append(self.days_holding_position)
            self.days_holding_position = 1
            self.last_action = action

        if self.render_mode == 'human':
            self._render_frame()

        return observation, step_reward, self._truncated, self._truncated, info

    def get_stats(self):
        return self.number_of_trades, np.mean(self.history_days_holding_position), self.history_days_holding_position

    def get_trend(self):
        if self.prices[self.window_size] - self.prices[-1] > 0:
            return "decreasing"
        return "rising"

    def render_all(self, max_days, title=None):
        figsize_x = math.ceil((max_days / 100) * 3)
        plt.figure(figsize=(figsize_x, 6))
        window_ticks = np.arange(len(self._position_history))
        plt.plot(self.prices)

        short_ticks = []
        long_ticks = []
        last_pos = None
        for i, tick in enumerate(window_ticks):
            current_pos = self._position_history[i]

            if current_pos == last_pos:
                continue

            if  current_pos == Positions.Short:
                short_ticks.append(tick)
            elif current_pos == Positions.Long:
                long_ticks.append(tick)

            last_pos = current_pos

        plt.plot(short_ticks, self.prices[short_ticks], 'ro', label="Sprzedaj")
        plt.plot(long_ticks, self.prices[long_ticks], 'go', label="Kup")

        plt.legend()

        if title:
            plt.title(f"Stocks from day {self.frame_bound[0]}")

        plt.suptitle(
            "Total Reward: %.6f" % self._total_reward + ' ~ ' +
            "Total Profit: %.6f" % self._total_profit
        )
        plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf
tf.compat.v1.disable_eager_execution()

from keras.layers import Dense, Activation
from keras.models import Sequential, load_model
from keras.optimizers import Adam

def create_model(learning_rate, input_dims, output_dims, l1_dims, l2_dims):
    model = Sequential(
        [
            Dense(l1_dims, input_shape=(input_dims,)),
            Activation('relu'),
            Dense(l2_dims),
            Activation('relu'),
            Dense(output_dims)
        ]
    )

    opt = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
    model.compile(optimizer=opt, loss='mse')

    return model


class SumTree:
    def __init__(self, max_size):
        self.data_idx = 0
        self.max_size = max_size
        self.tree = np.zeros(2 * max_size - 1)
        self.data = np.zeros(max_size, dtype=np.int32)

    def add(self, priority, data):
        self.data[self.data_idx] = data

        tree_index = self.max_size - 1 + self.data_idx
        self.update(tree_index, priority)

        self.data_idx = (self.data_idx + 1) % self.max_size

    def update(self, tree_index, priority):
        change = priority - self.tree[tree_index]
        self.tree[tree_index] = priority

        while tree_index != 0:
            tree_index = (tree_index - 1) // 2
            self.tree[tree_index] += change

    def sample(self, value):
        parent_index = 0
        while True:
            left_child_index = 2 * parent_index + 1
            right_child_index = left_child_index + 1

            if left_child_index >= len(self.tree):
                leaf_index = parent_index
                break

            if value <= self.tree[left_child_index]:
                parent_index = left_child_index
            else:
                value -= self.tree[left_child_index]
                parent_index = right_child_index

        data_index = leaf_index - self.max_size + 1
        return leaf_index, self.tree[leaf_index], self.data[data_index]

    def get_total_priority(self):
        return self.tree[0]

class PrioritizedExpirenceReplay:
    def __init__(self, mem_size, state_space_n, action_space_n, alpha=0.6, beta=0.4, beta_incr=0.001):
        self.mem_size = mem_size
        self.mem_cntr = 0

        self.states = np.zeros((self.mem_size, state_space_n))
        self.next_states = np.zeros((self.mem_size, state_space_n))
        self.actions = np.zeros((self.mem_size, action_space_n), dtype=np.int8)
        self.rewards = np.zeros(self.mem_size)
        self.dones = np.zeros(self.mem_size, dtype=np.float32)

        self.sum_tree = SumTree(self.mem_size)

        self.alpha = alpha
        self.beta = beta
        self.beta_incr = beta_incr

    def store_transition(self, state, action, reward, next_state, done):
        mem_idx = self.mem_cntr % self.mem_size

        self.states[mem_idx] = state
        self.next_states[mem_idx] = next_state
        actions = np.zeros(self.actions.shape[1])
        actions[action] = 1.0
        self.actions[mem_idx] = actions
        self.rewards[mem_idx] = reward
        self.dones[mem_idx] = 1 - done

        priority = np.max(self.sum_tree.tree[-self.sum_tree.max_size:])
        if priority == 0:
            priority = 1
        self.sum_tree.add(priority, mem_idx)

        self.mem_cntr += 1

    def get_sample_indicies(self, batch_size):
        tree_indicies = []
        mem_indices = []
        priorities = []
        segment = self.sum_tree.get_total_priority() / batch_size
        for k in range(batch_size):
            section_min, section_max  = segment * k, segment * (k + 1)
            seed = random.uniform(section_min, section_max)
            tree_indice, priority, mem_indice = self.sum_tree.sample(seed)
            tree_indicies.append(tree_indice)
            mem_indices.append(mem_indice)
            priorities.append(priority)
        return np.array(tree_indicies), np.array(mem_indices), np.array(priorities)

    def is_memory_prepared(self, batch_size):
        return self.mem_cntr >= batch_size

    def sample_memory(self, batch_size):
        current_mem_size = min(self.mem_cntr, self.mem_size)

        tree_indicies, mem_indices, mem_priorities = self.get_sample_indicies(batch_size)
        priorities_normalized = mem_priorities / self.sum_tree.get_total_priority()

        min_priority = np.min(self.sum_tree.tree[-self.sum_tree.max_size:][self.sum_tree.tree[-self.sum_tree.max_size:] != 0])
        if min_priority == 0:
            min_priority = 1
        min_prioritiy_normalized = min_priority / self.sum_tree.get_total_priority()

        weights = np.power(current_mem_size * priorities_normalized, -self.beta)
        max_weight = np.power(current_mem_size * min_prioritiy_normalized, -self.beta)
        weights = weights / max_weight

        states = self.states[mem_indices]
        actions = self.actions[mem_indices]
        rewards = self.rewards[mem_indices]
        next_states = self.next_states[mem_indices]
        dones = self.dones[mem_indices]

        # self.beta = min(self.beta + self.beta_incr, 1.0)

        return states, actions, rewards, next_states, dones, tree_indicies, weights

    def update_priorities(self, mem_indices, priorities, offset=0.001):
        for indice, priority in zip(mem_indices, priorities):
            priority = priority + offset
            priority = np.power(priority, self.alpha)
            self.sum_tree.update(indice, priority)

class Agent:
    def __init__(
            self,
            state_space_n,
            action_space_n,
            learning_rate=0.00001,
            l1_dims=256,
            l2_dims=256,
            gamma=0.99,
            epsilon=1.0,
            epsilon_decay_rate=0.999,
            batch_size=64,
            epsilon_min=0.03,
            mem_size=10000,
            model_sync_freq=250,
            alpha=1,
            beta=0,
            beta_incr=0,
        ):
        self.action_space_values = [i for i in range(action_space_n)]
        self.action_space = np.array(self.action_space_values, dtype=np.int8)
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay_rate = epsilon_decay_rate
        self.epsilon_min = epsilon_min
        self.batch_size = batch_size

        self.replay_buffer = PrioritizedExpirenceReplay(mem_size, state_space_n, action_space_n, alpha, beta, beta_incr)

        self.q_model = create_model(learning_rate, state_space_n, action_space_n, l1_dims, l2_dims)
        self.q_target_model = create_model(learning_rate, state_space_n, action_space_n, l1_dims, l2_dims)
        self.model_sync_freq = model_sync_freq

        self.q_model_loss = []

    def store_transition(self, state, action, reward, new_state, done):
        self.replay_buffer.store_transition(state, action, reward, new_state, done)

    def choose_action(self, state):
        action = np.random.choice(self.action_space_values)
        if np.random.random() >= self.epsilon:
            state = state[np.newaxis, :]
            actions = self.q_model.predict(state)
            action = np.argmax(actions)
        return action

    def choose_best_action(self, state):
        state = state[np.newaxis, :]
        actions = self.q_model.predict(state)
        return np.argmax(actions)

    def learn(self):
        if not self.replay_buffer.is_memory_prepared(self.batch_size):
            return

        states, actions, rewards, new_states, dones, tree_indicies, weights  = self.replay_buffer.sample_memory(self.batch_size)

        action_indices = np.dot(actions, self.action_space)

        q_pred = self.q_model.predict(states)
        old_q_pred = np.array(q_pred)
        q_eval = self.q_model.predict(new_states)
        q_next = self.q_target_model.predict(new_states)

        max_actions = np.argmax(q_eval, axis=1)

        batch_index = np.arange(self.batch_size, dtype=np.int32)

        q_pred[batch_index, action_indices] = rewards + self.gamma * q_next[batch_index, max_actions.astype(int)] * dones

        loss = self.q_model.fit(states, q_pred, sample_weight=weights, verbose=0)
        self.save_model_loss(loss)

        td_errors = abs(q_pred[batch_index, action_indices] - old_q_pred[batch_index, action_indices])

        self.replay_buffer.update_priorities(tree_indicies, td_errors)

        if self.replay_buffer.mem_cntr % self.model_sync_freq == 0:
            self.update_network_parameters()

    def save_model_loss(self, loss):
        loss = loss.history["loss"]
        self.q_model_loss.append(loss)

    def update_network_parameters(self):
        self.q_target_model.set_weights(self.q_model.get_weights())

    def load_model(self, file_name):
        self.q_model = load_model(file_name)
        self.update_network_parameters()

    def update_epsilon(self):
        self.epsilon = max(self.epsilon * self.epsilon_decay_rate, self.epsilon_min)

    def save_model(self, timestep, file_name="DDQN_PER_model"):
        self.q_model.save(f"{file_name}_{timestep}.h5")

In [None]:
def create_plot(xlabel, ylabel, title, values):
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.plot(range(len(values)), values, "r")
    plt.show()
    print()

class AnytradingStatsCollector(object):
    def __init__(self):
        self.profits = []
        self.scores = []
        self.epsilons = []

        self.stats_cntr = 0

    def store_episode_results(self, profit, score, epsilon):
        self.profits.append(profit)
        self.scores.append(score)
        self.epsilons.append(epsilon)

        self.stats_cntr += 1

    def print_stats(self, backward_episodes=100):
        stats_start_idx = max(0, self.stats_cntr-backward_episodes)

        avg_profit = np.mean(self.profits[stats_start_idx:self.stats_cntr])
        avg_score = np.mean(self.scores[stats_start_idx:self.stats_cntr])

        median_profit = np.median(self.profits[stats_start_idx:(self.stats_cntr)])
        median_score = np.median(self.scores[stats_start_idx:(self.stats_cntr)])

        print(f"EP: {self.stats_cntr - 1}, epsilon: {self.epsilons[-1]}, score: {self.scores[-1]}, profit: {self.profits[-1]}, avg_score: {avg_score}, avg_profit: {avg_profit},  median_score: {median_score}, median_profit: {median_profit}")

    def print_stats_plots(self):
        create_plot("Episode", "Score", "Score per epiosde", self.scores)
        create_plot("Episode", "Profit", "Profit per epiosde", self.profits)

    def get_avg_profit_form_last_10_eps(self):
        if self.stats_cntr <= 10:
            return 0
        return np.mean(self.profits[self.stats_cntr - 10:self.stats_cntr])

In [None]:
def make_env(actual_ws, dataframe, ws, start, period):
    env = MyStocksEnv(
        actual_ws,
        df=dataframe,
        window_size=ws,
        frame_bound=(start, start+period+1)
    )
    return env

In [None]:
window_size = 200
start = 200
period = len(df_shares_INTC) - 200
actual_ws = 100

agent = Agent(state_space_n=actual_ws + 3, action_space_n=2)

env = make_env(actual_ws, df_shares_INTC, window_size, start, period)
anytrading_stats = AnytradingStatsCollector()

episodes = 35
for ep in range(episodes):
    done = False
    score = 0
    state = env.reset()[0]
    while not done:
        action = agent.choose_action(state)

        next_state, reward, truncated, done, info = env.step(action)
        score += reward

        agent.store_transition(state, action, reward, next_state, int(done))
        state = next_state

        agent.learn()

        agent.update_epsilon()

    anytrading_stats.store_episode_results(info['total_profit'], score, agent.epsilon)
    anytrading_stats.print_stats()

agent.save_model(ep)
env.render_all(period, f"Starting day: {start}")

anytrading_stats.print_stats_plots()
create_plot("Loss", "Step", "Loss per step", agent.q_model_loss)

In [None]:
shares_storage = {
    "amd": df_shares_AMD,
    "meta": df_shares_META,
    "google": df_shares_GOOGLE,
    "cisco": df_shares_CISCO,
}

stats = {
    "profits": [],
    "num_of_trades": [],
    "avg_days_holding_positions": [],
    "trends": {
        "decreasing": {
            "counter": 0,
            "num_of_profits": 0,
            "profit_values": [],
        },
        "rising": {
            "counter": 0,
            "num_of_profits": 0,
            "profit_values": [],
        },
    }
}


for key, t_shares in shares_storage.items():
  print(f"--------------------------{key}--------------------------")
  for i in range(200, len(t_shares) - 365, 100):
      env = make_env(actual_ws, t_shares, window_size, i, 365)
      obs = env.reset()[0]
      done = False

      while not done:
          action = agent.choose_best_action(obs)
          obs, reward, truncated, done, info = env.step(action)
    #   env.render_all(365, True)

      trend = env.get_trend()
      num_of_trades, avg_days_holding_positions, _ = env.get_stats()
      profit = info["total_profit"]

      stats['profits'].append(profit)
      stats["num_of_trades"].append(num_of_trades)
      stats["avg_days_holding_positions"].append(avg_days_holding_positions)
      stats["trends"][trend]["counter"] += 1
      if profit >= 1.0:
        stats["trends"][trend]["num_of_profits"] += 1
      stats["trends"][trend]["profit_values"].append(profit)

print(f"--------------------------STATS--------------------------")
print(f"Median profit: {np.median(stats['profits'])}")
print(f"Lowest profit achived: {(np.min(stats['profits']) - 1) * 100}")
print(f"Highest profit achived: {(np.max(stats['profits']) - 1) * 100}")
print(f"Average num_of_trades: {np.mean(stats['num_of_trades'])}")
print(f"Average avg_days_holding_positions: {np.mean(stats['avg_days_holding_positions'])}")
print(f"Profits achvied on decreasing trend: {stats['trends']['decreasing']['num_of_profits']} / {stats['trends']['decreasing']['counter']}, {stats['trends']['decreasing']['num_of_profits'] / stats['trends']['decreasing']['counter'] * 100}%")
print(f"Median profit achvied on decreasing trend: {np.median(stats['trends']['decreasing']['profit_values'])}, {(np.median(stats['trends']['decreasing']['profit_values']) - 1) * 100}%")
print(f"Profits achvied on rising trend: {stats['trends']['rising']['num_of_profits']} / {stats['trends']['rising']['counter']}, {stats['trends']['rising']['num_of_profits'] / stats['trends']['rising']['counter'] * 100}%")
print(f"Median profit achvied on rising trend: {np.median(stats['trends']['rising']['profit_values'])}, {(np.median(stats['trends']['rising']['profit_values']) - 1) * 100}%")


# **Dodanie do stanu SMA 100 dni oraz SMA 50 dni**

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt

from gym_anytrading.envs import StocksEnv, Actions, Positions

def reshape_obs(state, action, actual_ws):
    reshaped_state = state[:, 0]
    ws = len(reshaped_state)

    rsi_timeperiod = 9
    rsi_9 = talib.RSI(np.float64(reshaped_state[ws - rsi_timeperiod - 1:ws]), timeperiod=rsi_timeperiod)[-1]
    rsi_9 = rsi_9 / 100

    sma_200d = np.float32(talib.SMA(np.array(reshaped_state, dtype=np.float64), timeperiod=200))[-1]
    sma_200d = (reshaped_state[-1] / sma_200d) - 1

    sma_100d = np.float32(talib.SMA(np.array(reshaped_state, dtype=np.float64), timeperiod=100))[-1]
    sma_100d = (reshaped_state[-1] / sma_100d) - 1

    sma_50d = np.float32(talib.SMA(np.array(reshaped_state, dtype=np.float64), timeperiod=50))[-1]
    sma_50d = (reshaped_state[-1] / sma_50d) - 1

    reshaped_state = reshaped_state[ws - actual_ws:ws]

    reshaped_state = (reshaped_state - np.mean(reshaped_state)) / np.std(reshaped_state)
    reshaped_state = np.append(reshaped_state, action)
    reshaped_state = np.append(reshaped_state, rsi_9)
    reshaped_state = np.append(reshaped_state, sma_200d)
    reshaped_state = np.append(reshaped_state, sma_100d)
    reshaped_state = np.append(reshaped_state, sma_50d)
    return reshaped_state

class MyStocksEnv(StocksEnv):
    def __init__(self, actual_ws, **kwargs):
        super().__init__(**kwargs)

        self.actual_ws = actual_ws
        self.shape = (self.actual_ws + 5, )
        INF = 1e10
        self.observation_space = gym.spaces.Box(
            low=-INF, high=INF, shape=self.shape, dtype=np.float32,
        )

        self.last_action = 0

        self.number_of_trades = 0
        self.days_holding_position = 1
        self.history_days_holding_position = []

        self._total_reward = 0.
        self.trade_fee_bid_percent = 0
        self.trade_fee_ask_percent = 0


    def is_trade(self, action):
        if (action == Actions.Sell.value and self._position == Positions.Long):
            return True
        return False


    def _calculate_reward(self, action):
        step_reward = 0

        if self.is_trade(action) or (self._truncated and self._position == Positions.Long):
            self.number_of_trades += 1
            current_price = self.prices[self._current_tick]
            last_trade_price = self.prices[self._last_trade_tick]
            step_reward = (current_price / last_trade_price) - 1

        return step_reward


    def reset(self, seed=None, options=None):
        super().reset(seed=seed, options=options)
        self.action_space.seed(int((self.np_random.uniform(0, seed if seed is not None else 1))))

        self.number_of_trades = 0
        self.days_holding_position = 1
        self.history_days_holding_position = []

        self._truncated = False
        self._current_tick = self._start_tick
        self._last_trade_tick = self._current_tick - 1
        self._position = Positions.Short
        self._position_history = (self.window_size * [None]) + [self._position]
        self._total_reward = 0.
        self._total_profit = 1.  # unit
        self._first_rendering = True
        self.history = {}

        observation = self._get_observation()
        observation = reshape_obs(observation, 0, self.actual_ws)
        info = self._get_info()

        if self.render_mode == 'human':
            self._render_frame()

        return observation, info


    def step(self, action):
        self._truncated = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self.history_days_holding_position.append(self.days_holding_position)
            self._truncated = True

        step_reward = self._calculate_reward(action)
        self._total_reward += step_reward

        self._update_profit(action)

        trade = False
        if (
            (action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)
        ):
            trade = True

        if trade:
            self._position = self._position.opposite()
            self._last_trade_tick = self._current_tick

        self._position_history.append(self._position)
        observation = self._get_observation()
        observation = reshape_obs(observation, action, self.actual_ws)
        # print(observation)
        info = self._get_info()
        self._update_history(info)

        if self.last_action == action:
            self.days_holding_position += 1
        else:
            self.history_days_holding_position.append(self.days_holding_position)
            self.days_holding_position = 1
            self.last_action = action

        if self.render_mode == 'human':
            self._render_frame()

        return observation, step_reward, self._truncated, self._truncated, info

    def get_stats(self):
        return self.number_of_trades, np.mean(self.history_days_holding_position), self.history_days_holding_position

    def get_trend(self):
        if self.prices[self.window_size] - self.prices[-1] > 0:
            return "decreasing"
        return "rising"

    def render_all(self, max_days, title=None):
        figsize_x = math.ceil((max_days / 100) * 3)
        plt.figure(figsize=(figsize_x, 6))
        window_ticks = np.arange(len(self._position_history))
        plt.plot(self.prices)

        short_ticks = []
        long_ticks = []
        last_pos = None
        for i, tick in enumerate(window_ticks):
            current_pos = self._position_history[i]

            if current_pos == last_pos:
                continue

            if  current_pos == Positions.Short:
                short_ticks.append(tick)
            elif current_pos == Positions.Long:
                long_ticks.append(tick)

            last_pos = current_pos

        plt.plot(short_ticks, self.prices[short_ticks], 'ro', label="Sprzedaj")
        plt.plot(long_ticks, self.prices[long_ticks], 'go', label="Kup")

        plt.legend()

        if title:
            plt.title(f"Stocks from day {self.frame_bound[0]}")

        plt.suptitle(
            "Total Reward: %.6f" % self._total_reward + ' ~ ' +
            "Total Profit: %.6f" % self._total_profit
        )
        plt.show()

In [None]:
window_size = 200
start = 200
period = len(df_shares_INTC) - 200
actual_ws = 100

agent = Agent(state_space_n=actual_ws + 5, action_space_n=2)

env = make_env(actual_ws, df_shares_INTC, window_size, start, period)
anytrading_stats = AnytradingStatsCollector()

episodes = 35
for ep in range(episodes):
    done = False
    score = 0
    state = env.reset()[0]
    while not done:
        action = agent.choose_action(state)

        next_state, reward, truncated, done, info = env.step(action)
        score += reward

        agent.store_transition(state, action, reward, next_state, int(done))
        state = next_state

        agent.learn()

        agent.update_epsilon()

    anytrading_stats.store_episode_results(info['total_profit'], score, agent.epsilon)
    anytrading_stats.print_stats()

agent.save_model(ep)
env.render_all(period, f"Starting day: {start}")

anytrading_stats.print_stats_plots()
create_plot("Loss", "Step", "Loss per step", agent.q_model_loss)

In [None]:
shares_storage = {
    "amd": df_shares_AMD,
    "meta": df_shares_META,
    "google": df_shares_GOOGLE,
    "cisco": df_shares_CISCO,
}

stats = {
    "profits": [],
    "num_of_trades": [],
    "avg_days_holding_positions": [],
    "trends": {
        "decreasing": {
            "counter": 0,
            "num_of_profits": 0,
            "profit_values": [],
        },
        "rising": {
            "counter": 0,
            "num_of_profits": 0,
            "profit_values": [],
        },
    }
}


for key, t_shares in shares_storage.items():
  print(f"--------------------------{key}--------------------------")
  for i in range(200, len(t_shares) - 365, 100):
      env = make_env(actual_ws, t_shares, window_size, i, 365)
      obs = env.reset()[0]
      done = False

      while not done:
          action = agent.choose_best_action(obs)
          obs, reward, truncated, done, info = env.step(action)
    #   env.render_all(365, True)

      trend = env.get_trend()
      num_of_trades, avg_days_holding_positions, _ = env.get_stats()
      profit = info["total_profit"]

      stats['profits'].append(profit)
      stats["num_of_trades"].append(num_of_trades)
      stats["avg_days_holding_positions"].append(avg_days_holding_positions)
      stats["trends"][trend]["counter"] += 1
      if profit >= 1.0:
        stats["trends"][trend]["num_of_profits"] += 1
      stats["trends"][trend]["profit_values"].append(profit)

print(f"--------------------------STATS--------------------------")
print(f"Median profit: {np.median(stats['profits'])}")
print(f"Lowest profit achived: {(np.min(stats['profits']) - 1) * 100}")
print(f"Highest profit achived: {(np.max(stats['profits']) - 1) * 100}")
print(f"Average num_of_trades: {np.mean(stats['num_of_trades'])}")
print(f"Average avg_days_holding_positions: {np.mean(stats['avg_days_holding_positions'])}")
print(f"Profits achvied on decreasing trend: {stats['trends']['decreasing']['num_of_profits']} / {stats['trends']['decreasing']['counter']}, {stats['trends']['decreasing']['num_of_profits'] / stats['trends']['decreasing']['counter'] * 100}%")
print(f"Median profit achvied on decreasing trend: {np.median(stats['trends']['decreasing']['profit_values'])}, {(np.median(stats['trends']['decreasing']['profit_values']) - 1) * 100}%")
print(f"Profits achvied on rising trend: {stats['trends']['rising']['num_of_profits']} / {stats['trends']['rising']['counter']}, {stats['trends']['rising']['num_of_profits'] / stats['trends']['rising']['counter'] * 100}%")
print(f"Median profit achvied on rising trend: {np.median(stats['trends']['rising']['profit_values'])}, {(np.median(stats['trends']['rising']['profit_values']) - 1) * 100}%")
