In [3]:
# TradingEnvGymLike: environment in stile Gymnasium minimale
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# optional: yfinance for online download
try:
    import yfinance as yf
    _YFINANCE_AVAILABLE = True
except Exception:
    _YFINANCE_AVAILABLE = False

class DiscreteActionSpace:
    def __init__(self, n, seed=None):
        self.n = int(n)
        self._rng = np.random.RandomState(seed)

    def sample(self):
        return int(self._rng.randint(0, self.n))

class ObservationSpace:
    def __init__(self, shape, dtype=np.float32):
        self.shape = tuple(shape)
        self.dtype = dtype

class TradingEnvGymLike:
    """
    Minimal Gym-like environment for short-window trading.
    - States: window of percentage returns (length = window_size)
    - Actions: 0=HOLD, 1=BUY, 2=SELL
    - Reward: if BUY -> r_{t+1}; if SELL -> -r_{t+1}; if HOLD -> 0.0
    - step returns: next_state, reward, terminated, truncated, info
    - terminated True when no next return is available (end of data)
    """
    def __init__(self, ticker_or_csv, window_size: int = 7, interval='1d',
                 period='2y', seed: int = 42, transaction_cost_pct: float = 0.0):
        """
        ticker_or_csv: if string endswith .csv -> path to CSV with a 'Close' column.
                       else interpreted as ticker string fetched with yfinance (if available).
        window_size: number of returns in the state (state_size)
        interval, period: used only for yfinance download
        seed: RNG seed for action_space sampling
        transaction_cost_pct: optional commission fraction (applied only if you later extend)
        """
        self.window_size = int(window_size)
        self.interval = interval
        self.period = period
        self.seed = int(seed)
        self.transaction_cost_pct = float(transaction_cost_pct)

        # action & obs spaces
        self.action_space = DiscreteActionSpace(3, seed=self.seed)  # 0 hold,1 buy,2 sell
        self.action_size = 3
        self.state_size = self.window_size
        self.observation_space = ObservationSpace(shape=(self.state_size,), dtype=np.float32)

        # load data
        self._load_data(ticker_or_csv)

        # prepare returns (pct changes)
        # prices: pandas Series of Close prices aligned to index 0..N-1
        self.returns = self.prices.pct_change().dropna().values.astype(np.float32)
        if len(self.returns) < self.window_size + 1:
            raise ValueError(f"Not enough data for window_size={self.window_size}. Need at least window_size+1 returns.")

        # internal indices
        self._rng = np.random.RandomState(self.seed)
        self.reset()

    def _load_data(self, ticker_or_csv):
    if isinstance(ticker_or_csv, str) and ticker_or_csv.lower().endswith('.csv'):
        df = pd.read_csv(ticker_or_csv)
        if 'Close' not in df.columns:
            raise ValueError("CSV must contain 'Close' column")
        self.df = df.reset_index(drop=True)

        close = self.df['Close']
        if isinstance(close, pd.DataFrame):
            close = close.iloc[:, 0]

        self.prices = pd.Series(close.astype(float).values)

    else:
        if not _YFINANCE_AVAILABLE:
            raise RuntimeError("yfinance not available; provide a CSV path instead.")

        ticker = str(ticker_or_csv)
        df = yf.download(
            ticker,
            period=self.period,
            interval=self.interval,
            progress=False
        )

        if df is None or 'Close' not in df.columns or len(df) == 0:
            raise ValueError(f"Unable to download Close prices for ticker '{ticker}'")

        df = df.reset_index(drop=True)
        self.df = df

        close = df['Close']
        if isinstance(close, pd.DataFrame):
            close = close.iloc[:, 0]

        self.prices = pd.Series(close.astype(float).values)


    def reset(self, start_index: int = None):
        """
        Reset environment.
        If start_index is None -> random valid start is chosen (deterministic w.r.t seed),
        otherwise start_index is an integer index within returns such that there are
        enough following returns to run at least one step.
        Returns initial state (array of length window_size).
        """
        min_start = 0 + (self.window_size - 1)   # minimal index in returns that can be the last element of initial state
        max_start = len(self.returns) - 2       # we need at least one future return for the first action's reward
        if max_start < min_start:
            raise ValueError("Not enough data to reset environment with chosen window_size")

        if start_index is None:
            # choose random start between min_start and max_start inclusive
            chosen = self._rng.randint(min_start, max_start + 1)
        else:
            chosen = int(start_index)
            if chosen < min_start or chosen > max_start:
                raise ValueError(f"start_index must be in [{min_start}, {max_start}]")

        # t is the index in self.returns of the *last element* in the current state window
        self.t = chosen
        self.terminated = False
        self.truncated = False
        self.total_steps = 0
        # current state: returns[t-window_size+1 : t+1]
        self.state = self.returns[self.t - self.window_size + 1 : self.t + 1].copy()
        return self.state.copy()

    def step(self, action: int):
        """
        action: 0=hold, 1=buy, 2=sell
        returns: next_state (or None if terminated), reward (float), terminated (bool), truncated (bool), info (dict)
        Reward uses the *next* return (r_{t+1}) according to the chosen action.
        After computing reward, we advance t <- t+1 and build next_state (window shifted by 1).
        """
        if self.terminated:
            raise RuntimeError("Calling step() on terminated environment. Call reset() to restart.")

        if action not in (0,1,2):
            raise ValueError("Action must be 0 (hold), 1 (buy) or 2 (sell)")

        # r_next exists because reset chose t <= len(returns)-2
        r_next = float(self.returns[self.t + 1])

        # simple reward mapping
        if action == 1:   # BUY -> profit proportional to next return
            reward = r_next
        elif action == 2: # SELL -> profit if price goes down -> negative of r_next
            reward = -r_next
        else:
            reward = 0.0

        # (optional) apply small transaction cost penalty if desired:
        # reward -= self.transaction_cost_pct * abs(action-0)  # (example placeholder)

        # advance time
        self.t += 1
        self.total_steps += 1

        # termination condition: no further r_{t+1} available
        if self.t + 1 >= len(self.returns):
            self.terminated = True
            next_state = None
        else:
            # new state: window ending at new t
            next_state = self.returns[self.t - self.window_size + 1 : self.t + 1].copy()
            self.state = next_state.copy()

        info = {
            't': int(self.t),
            'price': float(self.prices[self.t + 1]),  # corresponding price at index t+1 in original prices
            'r_next': r_next,
            'action': int(action)
        }
        return (None if next_state is None else next_state.copy()), float(reward), bool(self.terminated), bool(self.truncated), info

    def render(self, mode='imshow', figsize=(8,3)):
        """
        mode='imshow' (default): draws the close prices for the region covering the current window + a bit more,
        and marks the current time with a vertical line.
        """
        # determine price index corresponding to end of current state window: remember returns index t corresponds to price index t+1
        price_idx = self.t + 1  # index in self.prices corresponding to last return in state
        # choose a window of prices to plot
        start_plot = max(0, price_idx - (self.window_size + 5))
        end_plot = min(len(self.prices)-1, price_idx + 5)
        xs = np.arange(start_plot, end_plot+1)
        ys = self.prices.iloc[start_plot:end_plot+1].values

        plt.figure(figsize=figsize)
        plt.plot(xs, ys, marker='o')
        plt.axvline(x=price_idx, color='red', linestyle='--', label='current')
        # highlight the state window region
        win_start_price_index = price_idx - self.window_size
        plt.axvspan(win_start_price_index, price_idx, color='yellow', alpha=0.2, label='state window')
        plt.xlabel('price index')
        plt.ylabel('Close price')
        plt.title(f'Env render: price idx {price_idx}, window_size {self.window_size}')
        plt.legend()
        plt.show()

    # helper small API to exactly mimic gym-like naming
    @property
    def observation_space_shape(self):
        return self.observation_space.shape

    def seed(self, seed=None):
        if seed is None:
            seed = np.random.randint(0, 2**31-1)
        self._rng = np.random.RandomState(int(seed))
        self.action_space = DiscreteActionSpace(3, seed=int(seed))
        return int(seed)


IndentationError: expected an indented block after function definition on line 70 (813072535.py, line 71)

In [2]:
env = TradingEnvGymLike('AAPL', window_size=7)   # usa yfinance per scaricare
s0 = env.reset()
print("state shape:", s0.shape)       # (7,)
print("action sample:", env.action_space.sample())
next_state, reward, terminated, truncated, info = env.step(env.action_space.sample())
print("reward:", reward, "terminated:", terminated, "info:", info)
env.render()

  df = yf.download(ticker, period=self.period, interval=self.interval, progress=False)


ValueError: Data must be 1-dimensional, got ndarray of shape (503, 1) instead

# TODO

## the project is divided into three distinct macro-area:
1. The creation of an external script containing the environment class like in Gymnasium, that must behave exactly as for the Cart Pole Environment! generating its exact type of data.
2. The exact copy and paste of Prof. Berta DNQ algorithm for my purpose.
3. a utility function/class to keep track of the various improvements, such as total reward, looos, profit...

    3.1. Possibly related to point 3. somethig to show also the behaviour, under a plot format.

# important things

1. put the % variation of prices
2. keep the environment class as simple as possible, nothing superfluo
3. before doing anything understand properly the cart pole environment
4. understand how to calculate the loss
5. a  wallet might be even not neccessary since we can even go broke insetad of terminating truncate the episode
6. store indeed also the negative transaction
7. implement a simple implementation of what to buy and sell, such as just ONE stock, a portion of it?
8. evebn the action space must be super simple.
9. fai una comparazione importantissima ta quello implmementato online dal tizio, le notes di Berta e quello che voglio fare io!!!

In [1]:
import yfinance

In [5]:
df = yfinance.download("TSLA", interval="1h")

  df = yfinance.download("TSLA", interval="1h")
[*********************100%***********************]  1 of 1 completed


In [6]:
df

Price,Close,High,Low,Open,Volume
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2026-01-07 14:30:00+00:00,434.035004,438.130005,431.600006,435.885010,12591805
2026-01-07 15:30:00+00:00,435.989990,437.329987,433.850098,434.040009,6795114
2026-01-07 16:30:00+00:00,436.220001,438.369995,435.709991,436.000000,5209734
2026-01-07 17:30:00+00:00,436.347107,437.750000,435.989990,436.234985,3577085
2026-01-07 18:30:00+00:00,435.859985,437.510010,434.269989,436.359985,4386733
...,...,...,...,...,...
2026-02-06 16:30:00+00:00,413.869995,414.040009,410.035004,410.262512,5024365
2026-02-06 17:30:00+00:00,412.174988,414.549988,411.730011,413.869995,4336097
2026-02-06 18:30:00+00:00,413.385010,414.040009,411.079987,412.165009,3907698
2026-02-06 19:30:00+00:00,413.406189,414.549988,412.769989,413.399994,4270327


In [7]:
df.head()

Price,Close,High,Low,Open,Volume
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2026-01-07 14:30:00+00:00,434.035004,438.130005,431.600006,435.88501,12591805
2026-01-07 15:30:00+00:00,435.98999,437.329987,433.850098,434.040009,6795114
2026-01-07 16:30:00+00:00,436.220001,438.369995,435.709991,436.0,5209734
2026-01-07 17:30:00+00:00,436.347107,437.75,435.98999,436.234985,3577085
2026-01-07 18:30:00+00:00,435.859985,437.51001,434.269989,436.359985,4386733


In [9]:
pip install gymnasium

Collecting gymnasiumNote: you may need to restart the kernel to use updated packages.

  Downloading gymnasium-1.2.3-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.2.3-py3-none-any.whl (952 kB)
   ---------------------------------------- 0.0/952.1 kB ? eta -:--:--
   --------------------------------- ------ 786.4/952.1 kB 3.8 MB/s eta 0:00:01
   ---------------------------------------- 952.1/952.1 kB 3.2 MB/s eta 0:00:00
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium

   -------------------- ------------------- 1/2 [gymnasium]
   -------------------- ------------------- 1/2 [gymnasium]
   -------------------- ------------------- 1/2 [gymnasium]
   -------------------- ------------------- 1/2 [gymnasium]
   -------------------- ------------------- 1/2 [gymnasium]


In [10]:
import numpy as np
import yfinance as yf
import gymnasium as gym
from gymnasium import spaces

class TradingEnv(gym.Env):
    """
    Ambiente Gymnasium per trading finanziario basato sulle variazioni percentuali di prezzo di chiusura.
    
    Parametri di init:
      - ticker: stringa, simbolo del titolo (es. 'AAPL').
      - granularita: stringa, intervallo di tempo ('1d', '1h', '1m', ...).
      - sliding_window: intero, dimensione della finestra scorrevole (numero di variazioni % nello stato).
      - start_date: stringa o datetime, data di inizio (es. '2020-01-01').
      - end_date: stringa o datetime (opzionale), data di fine. Se None, usa data corrente.
      
    Stato (osservazione):
      - Vettore di lunghezza sliding_window con variazioni percentuali del prezzo di chiusura rispetto al passo precedente.
    Azioni:
      - Discrete(3): 0=Buy, 1=Hold, 2=Sell.
    Reward:
      - Differenza assoluta (valore monetario) tra prezzo corrente e prezzo dell’ultima azione opposta.
    Terminazione:
      - terminated=True alla fine dei dati, truncated sempre False (nessun time-limit).
    """
    metadata = {"render_modes": ["human"], "render_fps": 30}
    
    def __init__(self, ticker, granularita, sliding_window, start_date, end_date=None):
        super().reset()  # inizializza random seed (gym.Env)
        self.ticker = ticker
        self.granularita = granularita
        self.sliding_window = sliding_window
        
        # Scarico dati storici di chiusura con yfinance:contentReference[oaicite:8]{index=8}
        data = yf.Ticker(self.ticker).history(start=start_date, end=end_date, interval=self.granularita)
        if data is None or data.shape[0] <= sliding_window:
            raise ValueError("Dati insufficienti per il numero di passi richiesto.")
        
        # Estrai la serie dei prezzi di chiusura
        self.prices = data['Close'].to_numpy(dtype=np.float32)
        
        # Calcola variazioni percentuali tra chiusure consecutive:contentReference[oaicite:9]{index=9}
        # pct_change produce frazioni; moltiplichiamo per 100 per percentuale
        pct = (self.prices[1:] - self.prices[:-1]) / self.prices[:-1] * 100.0
        self.returns = pct.astype(np.float32)  # array di lunghezza len(prices)-1
        
        # Definizione degli spazi Gym:contentReference[oaicite:10]{index=10}
        # Azioni discrete: 3 possibili (Buy, Hold, Sell)
        self.action_space = spaces.Discrete(3)
        # Osservazioni: finestra di sliding_window valori reali (float), senza limiti
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, 
                                            shape=(self.sliding_window,), dtype=np.float32)
        
        # Altri stati interni
        self.current_step = None
        self.last_buy_price = None
        self.last_sell_price = None

    def reset(self, seed=None, options=None):
        """
        Inizia un nuovo episodio. 
        Imposta il puntatore dei dati all'indice sliding_window e azzera ultime operazioni.
        Restituisce (stato_iniziale, info) dove stato è il vettore delle prime sliding_window variazioni %.
        """
        super().reset(seed=seed)  # gestisce seed rng interno
        # Posiziona il puntatore al termine della prima finestra
        self.current_step = self.sliding_window
        # Prezzo iniziale (alla fine della finestra iniziale) per buy/sell precedente
        init_price = float(self.prices[self.current_step])
        self.last_buy_price = init_price
        self.last_sell_price = init_price
        
        # Stato iniziale: primi sliding_window valori di self.returns
        state = self.returns[0:self.sliding_window].copy()
        return state, {}  # Gymnasium richiede (obs, info)

    def step(self, action):
        """
        Esegue l'azione data (0=Buy, 1=Hold, 2=Sell).
        Calcola la reward basata sull'ultima azione opposta e aggiorna lo stato interno.
        Ritorna (nuovo_stato, reward, terminated, truncated, info).
        """
        # Prezzo corrente alla posizione current_step
        current_price = float(self.prices[self.current_step])
        reward = 0.0
        
        # Calcola reward e aggiorna ultimo buy/sell price
        if action == 0:  # Buy
            reward = abs(current_price - self.last_sell_price)
            self.last_buy_price = current_price
        elif action == 2:  # Sell
            reward = abs(current_price - self.last_buy_price)
            self.last_sell_price = current_price
        # azione Hold (1) => reward = 0.0
        
        # Passo successivo
        self.current_step += 1
        
        # Verifica di fine dati
        terminated = False
        if self.current_step >= len(self.prices):
            terminated = True
            # Stato finale (non importa se lo userà, rilasciamo ultima finestra valida)
            start = len(self.returns) - self.sliding_window
            state = self.returns[start:start + self.sliding_window].copy()
        else:
            # Stato successivo: ultime sliding_window variazioni % fino a current_step
            start = self.current_step - self.sliding_window
            state = self.returns[start:start + self.sliding_window].copy()
        
        # truncate non usato in questo semplice ambiente
        truncated = False
        
        return state, reward, terminated, truncated, {}

    def render(self):
        # Funzione render vuota (non implementata)
        pass

# Esempio d'uso:
# env = TradingEnv('AAPL', '1d', sliding_window=10, start_date='2020-01-01')
# obs, info = env.reset()
# action = env.action_space.sample()  # azione casuale
# obs, reward, terminated, truncated, info = env.step(action)


In [11]:
# Esempio d'uso:
env = TradingEnv('AAPL', '1d', sliding_window=10, start_date='2020-01-01')
obs, info = env.reset()
action = env.action_space.sample()  # azione casuale
obs, reward, terminated, truncated, info = env.step(action)

In [12]:
obs

array([ 0.7968242 , -0.47030082,  1.6086131 ,  2.1241038 ,  0.22606114,
        2.13645   , -1.3503492 , -0.42853838,  1.2526255 ,  1.1071318 ],
      dtype=float32)

In [13]:
info

{}

In [14]:
action

np.int64(0)

In [36]:
env = TradingEnv('AAPL', '1d', sliding_window=10, start_date='2020-01-01')

In [39]:
obs, info = env.reset()

In [17]:
obs

array([-0.9721687 ,  0.79684544, -0.4703007 ,  1.608581  ,  2.1241143 ,
        0.22606114,  2.13644   , -1.3503294 , -0.42853838,  1.252666  ],
      dtype=float32)

In [37]:
action = env.action_space.sample()

In [40]:
obs, reward, terminated, truncated, info = env.step(action)

In [31]:
obs

array([ 0.79684544, -0.4703007 ,  1.608581  ,  2.1241143 ,  0.22606114,
        2.13644   , -1.3503294 , -0.42853838,  1.252666  ,  1.107051  ],
      dtype=float32)

In [41]:
reward

0.0

In [35]:
truncated

False

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import yfinance as yf

class TradingEnv(gym.Env):
    """
    Ambiente di trading per RL. L'agente può Buy, Hold o Sell una singola azione.
    Stato: finestra mobile di variazioni percentuali dei prezzi di chiusura.
    Reward: differenza monetaria nel valore del portafoglio tra step consecutivi.
    """
    metadata = {'render_modes': ['human']}

    def __init__(self, ticker, granularita, sliding_window, start_date, end_date=None):
        super().__init__()
        self.ticker = ticker
        self.granularita = granularita
        self.sliding_window = sliding_window
        self.start_date = start_date
        self.end_date = end_date
        self.initial_capital = 10000.0  # Capitale iniziale

        # Carica i dati storici di chiusura usando yfinance:contentReference[oaicite:6]{index=6}
        # Se end_date è None, yfinance scarica fino ad oggi.
        data = yf.download(
            tickers=self.ticker,
            start=self.start_date,
            end=self.end_date,
            interval=self.granularita,
            progress=False
        )
        if data is None or data.empty or 'Close' not in data:
            raise ValueError("Impossibile scaricare dati per il ticker o intervallo specificato.")
        # Serie dei prezzi di chiusura
        self.prices = data['Close'].values
        # Numero di step disponibile
        self.max_step = len(self.prices)
        if self.max_step < self.sliding_window + 1:
            raise ValueError("Dati insufficienti per la finestra sliding_window richiesta.")

        # Calcola le variazioni percentuali giornaliere (np array di lunghezza = len(prices))
        # pct_change[0] = 0, poi (P[i]-P[i-1])/P[i-1] per i>=1
        pct = np.zeros(self.max_step, dtype=np.float32)
        for i in range(1, self.max_step):
            pct[i] = (self.prices[i] - self.prices[i-1]) / self.prices[i-1]
        self.pct_changes = pct

        # Spazio di azione e osservazione (Gym)
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.sliding_window,), dtype=np.float32
        )

        # Stato del portafoglio
        self.cash = None
        self.shares = None
        self.current_step = None  # indice corrente dell'ambiente

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        # Inizializza portafoglio
        self.cash = self.initial_capital
        self.shares = 0
        # Inizia dallo step = sliding_window
        # (il primo stato osservato utilizza i primi 'sliding_window' valori di pct_change)
        self.current_step = self.sliding_window
        # Valore portafoglio iniziale (da usare come riferimento)
        self.prev_portfolio_value = self.cash
        # Osservazione iniziale
        obs = self._get_obs()
        return obs, {}

    def _get_obs(self):
        """
        Restituisce l'osservazione corrente: array di variazioni percentuali.
        """
        start = self.current_step - self.sliding_window
        end = self.current_step
        obs = self.pct_changes[start:end]
        return obs.astype(np.float32)

    def step(self, action):
        # Controlla validità dell'azione
        assert self.action_space.contains(action), "Azione non valida."

        done = False
        terminated = False
        truncated = False

        # Prezzo corrente di chiusura
        price = self.prices[self.current_step]

        # Valore del portafoglio prima dell'azione
        old_portfolio = self.cash + self.shares * price

        # Esegui l'azione
        if action == 0:  # Buy
            # Compra 1 azione se possibile
            if self.cash >= price:
                self.shares += 1
                self.cash -= price
            # altrimenti si ignora l'azione (equivale a Hold)
        elif action == 2:  # Sell
            # Vendi 1 azione se possibile
            if self.shares > 0:
                self.shares -= 1
                self.cash += price
            # altrimenti Hold

        # Passa al prossimo passo temporale
        self.current_step += 1

        # Calcola reward: differenza monetaria tra portafoglio nuovo e vecchio
        if self.current_step < self.max_step:
            new_price = self.prices[self.current_step]
            new_portfolio = self.cash + self.shares * new_price
            reward = new_portfolio - old_portfolio
        else:
            # Non ci sono più prezzi successivi
            new_portfolio = self.cash + self.shares * price
            reward = new_portfolio - old_portfolio
            terminated = True

        # Check condizioni di terminazione
        # Episodio finisce se esauriti i dati o portafoglio = 0
        if new_portfolio <= 0:
            terminated = True

        # Ottieni la prossima osservazione (se non terminato)
        if not terminated:
            obs = self._get_obs()
        else:
            # Se terminato, restituisce comunque l'ultima osservazione valida
            obs = self._get_obs()

        info = {}  # informazioni addizionali (non usate)
        return obs, reward, terminated, truncated, info

    def render(self, mode='human'):
        """
        Opzionale: mostra stato attuale del portafoglio.
        """
        price = self.prices[self.current_step] if self.current_step < self.max_step else self.prices[-1]
        total_value = self.cash + self.shares * price
        print(f"Step {self.current_step}: Prezzo={price:.2f}, Cash={self.cash:.2f}, "
              f"Azioni={self.shares}, ValoreTot={total_value:.2f}")

# Esempio di utilizzo:
# env = TradingEnv("AAPL", "1d", sliding_window=10, start_date="2020-01-01", end_date="2021-01-01")
# obs, info = env.reset()
# action = env.action_space.sample()  # es. 0=buy,1=hold,2=sell
# obs, reward, done, truncated, info = env.step(action)


In [47]:
env = TradingEnv("AAPL", "1d", sliding_window=10, start_date="2020-01-01", end_date="2021-01-01")

  data = yf.download(
  pct[i] = (self.prices[i] - self.prices[i-1]) / self.prices[i-1]


In [48]:
obs, info = env.reset()


In [49]:
action = env.action_space.sample()  # es. 0=buy,1=hold,2=sell

In [50]:
action

np.int64(2)

In [51]:
obs, reward, done, truncated, info = env.step(action)

In [52]:
reward

array([0.])

In [53]:
obs

array([-0.00972221,  0.00796803, -0.00470238,  0.01608591,  0.02124072,
        0.00226071,  0.02136471, -0.01350359, -0.00428518,  0.01252615],
      dtype=float32)

In [54]:
done

False

In [55]:
truncated

False

In [56]:
info

{}