In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

from eodhd import APIClient

import gymnasium as gym
import gym_anytrading
from gym_anytrading.envs import Actions
from stable_baselines3 import PPO

Download the stocks data

In [2]:
api_key = os.getenv('EODHD_API_KEY')
api = APIClient(api_key)

In [3]:
n_years = 20
up_to_year = 2024
train_split = 0.7
valid_split = 0.2

n_years_train = n_years // (1 / train_split)
n_years_valid = n_years // (1 / valid_split)

init_year_train = int(up_to_year - n_years)
init_year_valid = int(init_year_train + n_years_train)
init_year_test = int(init_year_valid + n_years_valid)

init_date_train = datetime(init_year_train, 1, 1)
init_date_valid = datetime(init_year_valid, 1, 1) 
init_date_test = datetime(init_year_test, 1, 1) 

end_date_train = datetime(init_year_valid-1, 12, 31)
end_date_valid = datetime(init_year_test-1, 12, 31)
current_year = datetime.now().year
end_date_test = datetime.now() if up_to_year == current_year else datetime(up_to_year, 12, 31)

init_date_train, end_date_train, init_date_valid, end_date_valid, init_date_test, end_date_test


(datetime.datetime(2004, 1, 1, 0, 0),
 datetime.datetime(2016, 12, 31, 0, 0),
 datetime.datetime(2017, 1, 1, 0, 0),
 datetime.datetime(2020, 12, 31, 0, 0),
 datetime.datetime(2021, 1, 1, 0, 0),
 datetime.datetime(2024, 12, 31, 0, 0))

In [4]:
def get_ticker_data(ticker):
    data = api.get_eod_historical_stock_market_data(symbol = ticker, period='d', from_date = init_date_train, to_date = end_date_test, order='a')
    return pd.DataFrame(data)

In [5]:
# List of tickers
tickers = ['NVDA', 'BABA', 'KO']

def create_tickers():
    for ticker in tickers:
        try:
            # Fetch data for the ticker
            df = get_ticker_data(ticker)
            
            # Save data to a CSV file
            file_name = os.path.join('Tickers CSV', f"{ticker}.csv")
            df.to_csv(file_name, index=False)
            
            print(f"Data for {ticker} saved to {file_name}")
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

create_tickers()

Data for NVDA saved to Tickers CSV\NVDA.csv
Data for BABA saved to Tickers CSV\BABA.csv
Data for KO saved to Tickers CSV\KO.csv


In [6]:
ticker_example = 'KO'

In [7]:
# Get the data from the CSV files
stock_data = {}
for ticker in tickers:
    df = (
    pd.read_csv(os.path.join('Tickers CSV', f"{ticker}.csv"), parse_dates=True)
    .rename(columns=str.title)  # Capitalize the first letter of all column names
    )

    # Drop all columns named 'Close'
    df = df.drop(columns=['Close'], errors='ignore')  # Errors='ignore' ensures no issues if 'Close' doesn't exist

    # Rename 'Adjusted_Close' to 'Close'
    if 'Adjusted_Close' in df.columns:
        df.rename(columns={'Adjusted_Close': 'Close'}, inplace=True)

    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    # Add 'return' column based on the newly renamed 'Close'
    df['Return'] = df['Close'].pct_change()

    # Store the modified DataFrame in the dictionary
    stock_data[ticker] = df


In [8]:
stock_data[ticker_example]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-01-02,50.80000000,50.99000000,50.01000000,13.34890000,7210200,
2004-01-05,50.35000000,50.59000000,50.06000000,13.36480000,9740200,0.00119111
2004-01-06,50.22000000,50.37000000,50.12000000,13.30910000,8223200,-0.00416766
2004-01-07,50.20000000,50.24000000,49.72000000,13.22430000,10242800,-0.00637158
2004-01-08,49.81000000,50.16000000,49.66000000,13.28790000,9070800,0.00480933
...,...,...,...,...,...,...
2024-12-24,62.20000000,62.84000000,62.01000000,62.37970000,5019100,0.00737529
2024-12-26,62.62000000,62.74000000,62.40000000,62.11160000,7943800,-0.00429787
2024-12-27,62.45000000,62.95000000,62.21000000,61.99250000,8542800,-0.00191752
2024-12-30,62.34000000,62.34000000,61.68000000,61.57560000,8972200,-0.00672501


In [9]:
# split the data into training, validation and test sets
training_data = {}
validation_data = {}
test_data = {}

for ticker, df in stock_data.items():
    training_data[ticker] = df.loc[init_date_train : end_date_train]
    validation_data[ticker] = df.loc[init_date_valid : end_date_valid]
    test_data[ticker] = df.loc[init_date_test:end_date_test]

In [10]:
print(f'Training data shape for {ticker_example}: {training_data[ticker_example].shape}')
print(f'Validation data shape for {ticker_example}: {validation_data[ticker_example].shape}')
print(f'Test data shape for {ticker_example}: {test_data[ticker_example].shape}')

Training data shape for KO: (3273, 6)
Validation data shape for KO: (1007, 6)
Test data shape for KO: (1005, 6)


In [11]:
def add_technical_indicators(df):
    """
    Add common technical indicators to the stock data DataFrame.
    
    Parameters:
    df (DataFrame): A DataFrame containing stock price data with columns 'Open', 'High', 'Low', 'Close', and 'Volume'.
    
    Returns:
    DataFrame: The original DataFrame with additional columns for RSI, MACD, Signal, CCI, and ADX.
    """

    # Calculate Relative Strength Index (RSI) over a 14-period window
    # RSI measures the strength of price changes (upward or downward) and provides values between 0 and 100.
    # It indicates whether an asset is overbought or oversold.
    delta = df['Close'].diff()  # Calculate the difference between consecutive closing prices
    up = delta.where(delta > 0, 0)  # Keep positive changes (upward movements), set others to 0
    down = -delta.where(delta < 0, 0)  # Keep negative changes (downward movements), set others to 0 and make them positive
    rs = up.rolling(window=14).mean() / down.rolling(window=14).mean()  # Compute the relative strength
    df['RSI'] = 100 - (100 / (1 + rs))  # Calculate the RSI based on relative strength

    # Calculate Exponential Moving Averages (EMA) for MACD
    # MACD (Moving Average Convergence Divergence) is used to identify trend direction and momentum.
    df['EMA12'] = df['Close'].ewm(span=12, adjust=False).mean()  # 12-period EMA
    df['EMA26'] = df['Close'].ewm(span=26, adjust=False).mean()  # 26-period EMA
    df['MACD'] = df['EMA12'] - df['EMA26']  # The MACD line is the difference between EMA12 and EMA26
    df['Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()  # Signal line (9-period EMA of MACD)

    # Calculate Commodity Channel Index (CCI) over a 20-period window
    # CCI measures the deviation of the asset's price from its average price over a certain period.
    tp = (df['High'] + df['Low'] + df['Close']) / 3  # Typical Price (TP): average of high, low, and close prices
    sma_tp = tp.rolling(window=20).mean()  # Simple Moving Average (SMA) of Typical Price over 20 periods
    mean_dev = tp.rolling(window=20).apply(lambda x: np.mean(np.abs(x - x.mean())))  # Mean absolute deviation
    df['CCI'] = (tp - sma_tp) / (0.015 * mean_dev)  # CCI formula: (TP - SMA_TP) / (0.015 * Mean Dev)

    # Calculate Average Directional Index (ADX) over a 14-period window
    # ADX measures the strength of a trend (ranging from 0 to 100).
    high_diff = df['High'].diff()  # Difference between current and previous high prices
    low_diff = df['Low'].diff()  # Difference between current and previous low prices
    df['+DM'] = np.where((high_diff > low_diff) & (high_diff > 0), high_diff, 0)  # Positive directional movement (+DM)
    df['-DM'] = np.where((low_diff > high_diff) & (low_diff > 0), low_diff, 0)  # Negative directional movement (-DM)
    tr = pd.concat([
        df['High'] - df['Low'],  # High minus Low
        np.abs(df['High'] - df['Close'].shift(1)),  # Absolute difference between current high and previous close
        np.abs(df['Low'] - df['Close'].shift(1))  # Absolute difference between current low and previous close
    ], axis=1).max(axis=1)  # True Range (TR): max of the above values
    atr = tr.ewm(span=14, adjust=False).mean()  # Average True Range (ATR) over 14 periods
    df['+DI'] = 100 * (df['+DM'].ewm(span=14, adjust=False).mean() / atr)  # Positive Directional Indicator (+DI)
    df['-DI'] = 100 * (df['-DM'].ewm(span=14, adjust=False).mean() / atr)  # Negative Directional Indicator (-DI)
    dx = 100 * np.abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI'])  # Directional Movement Index (DX)
    df['ADX'] = dx.ewm(span=14, adjust=False).mean()  # Average DX over 14 periods (ADX)

    # Drop rows with NaN values caused by rolling calculations
    df.dropna(inplace=True)

    # Keep only relevant columns for analysis
    df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'MACD', 'Signal', 'RSI', 'CCI', 'ADX']]

    return df
