# RL Trend Following

In questo notebook ci limitiamo, in primo luogo, ad usare le API del RL per simulare l'interazione ambiente - agente.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("SPY.csv", index_col="Date", parse_dates=True)

In [3]:
# Usiamo le finestre che abbiamo già scovato in passato.
df["FastSMA"] = df["Close"].rolling(16).mean()
df["SlowSMA"] = df["Close"].rolling(33).mean()
feats = ["FastSMA", "SlowSMA"]

In [4]:
df["LogReturn"] = np.log(df["Close"]).diff()

In [5]:
N_test = 1000
train_data = df.iloc[:-N_test].copy()
test_data = df.iloc[-N_test:].copy()

In [6]:
class Environment:
    def __init__(self, df):
        self.df = df
        self.n = len(df)
        self.current_idx = 0
        self.action_space = [0, 1, 2] # Buy, Sell, Hold
        self.invested = 0

        # conversione numpy array più conveniente per indexing
        self.states = df[feats].to_numpy()
        self.rewards = df["LogReturn"].to_numpy()
        self.total_buy_and_hold = 0 # sarò la somma di tutti i reward alla fine dell'episodio seguendo una buy and hold strategy (baseline)

    def reset(self):
        self.invested = 0
        self.current_idx = 0
        self.total_buy_and_hold = 0
        return self.states[self.current_idx]

    def step(self, action):
        self.current_idx += 1
        if self.current_idx >= self.n:
            raise Exception("Episode Already Finished")

        if action == 0: # If Buy
            self.invested = 1
        elif action == 1: # If Sell
            self.invested = 0

        # Compute Reward
        if self.invested:
            reward = self.rewards[self.current_idx]
        else:
            reward = 0

        next_state = self.states[self.current_idx]

        #baseline
        self.total_buy_and_hold += self.rewards[self.current_idx]

        # done
        done = (self.current_idx == self.n-1)

        return next_state, reward, done
            

In [7]:
class Agent:
    def __init__(self):
        self.is_invested = False

    def act(self, state):
        assert(len(state)==2)

        if state[0] > state[1] and not self.is_invested:
            self.is_invested = True
            return 0 # Buy

        if state[1] > state[0] and self.is_invested:
            self.is_invested = False
            return 1 # Sell

        return 2 # Hold

In [8]:
def play_one_episode(env: Environment, agent: Agent):
    
    total_reward = 0
    done = 0
    state = env.reset()
    agent.is_invested = False

    while not done:
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        total_reward += reward
        state = next_state

    print(f"Reward obtained by Trend Following: {total_reward}. Reward obtained by buy and hold: {env.total_buy_and_hold}")

In [9]:
env = Environment(train_data)
agent = Agent()
play_one_episode(env, agent)

Reward obtained by Trend Following: 0.43459304796456966. Reward obtained by buy and hold: 0.5970866514889401


In [10]:
env = Environment(test_data)
play_one_episode(env, agent)

Reward obtained by Trend Following: 0.08889132894199303. Reward obtained by buy and hold: 0.19307543946998518


In [11]:
# NB: sembra che adottare una strategia trend following nel buy e seguire una hold, produca più che comprare a caso.