In [1]:
# Basic Python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
from tqdm import tqdm

# Quant Libraries

import yfinance as yf # yahoo finance PyPi for getting stock data
import quandl # provides additional stock data but it might be deprecated
import nsepy # this gets derivatives data
import pyfolio as pf # provides performance and risk analysis of financial portfolios
import mplfinance as mpf # works in combination with matplotlib and pandas to plot finance data
import pandas_ta as ta # pandas but it also has TA indicators
import numpy_financial as npf # numpy with additional financial functions
import pandas_datareader as pdr # helps you get specific data very quickly
import statsmodels # provides a lot of different statistics models like linear regression, time series analysis, and bayesian analysis
import pynance as pn # grabs info from stock market derivatives
# import zipline # useful for backtesting and live trading algorithmic trading systems, it isn't working with all of the other dependencies yet

# ML Models

import sklearn # big SciPy machine learning package
import torch # big neural network Python package developed by Facebook
import tensorflow as tf # big neural network Python package developed by Google
plt.style.use('seaborn-v0_8') # this changes the matplotlib style to something nicer that we can use for financial purposes
from sklearn.model_selection import GridSearchCV # this is a useful tool for finding the best parameters for a model

# Stats

from scipy.stats import t



## Data Gathering

In [11]:
# 0 returns the most recent friday, 1 returns the friday before that, 2 returns the friday before that, etc.
def x_fridays_ago(x):
    today = datetime.today()

    days_to_last_friday = (today.weekday() - 4) % 7

    most_recent_friday = today - timedelta(days=days_to_last_friday)
    
    date_x_fridays_ago = most_recent_friday - timedelta(weeks=x)
    
    return date_x_fridays_ago.date()

In [12]:
x_fridays_ago(0)

datetime.date(2024, 2, 2)

In [9]:
stocks = ['AAPL', 'MSFT', 'L', 'MMM', 'GOOG']

yf.download(stocks, start=datetime.now() - timedelta(2), end=datetime.now() - timedelta(1), interval='1m')

[*********************100%%**********************]  5 of 5 completed


Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,Open,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume
Ticker,AAPL,GOOG,L,MMM,MSFT,AAPL,GOOG,L,MMM,MSFT,...,AAPL,GOOG,L,MMM,MSFT,AAPL,GOOG,L,MMM,MSFT
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-02-05 09:30:00-05:00,187.149994,144.690002,74.870003,94.050003,409.614014,187.149994,144.690002,74.870003,94.050003,409.614014,...,188.149994,144.039993,74.589996,94.250000,409.899994,5514469.0,1913073.0,25934.0,208839.0,1132313.0
2024-02-05 09:31:00-05:00,187.399994,144.960007,74.925003,93.779999,410.640015,187.399994,144.960007,74.925003,93.779999,410.640015,...,187.205002,144.705002,74.949997,94.035004,409.630005,493472.0,379733.0,21223.0,15033.0,128228.0
2024-02-05 09:32:00-05:00,187.058899,144.710007,74.779999,93.760002,410.329987,187.058899,144.710007,74.779999,93.760002,410.329987,...,187.399994,144.955002,74.925003,93.830002,410.760010,359831.0,189165.0,9597.0,17662.0,112106.0
2024-02-05 09:33:00-05:00,187.570007,145.009995,73.820000,93.650002,410.540009,187.570007,145.009995,73.820000,93.650002,410.540009,...,187.104996,144.729996,74.705002,93.699997,410.320007,370343.0,214926.0,5263.0,14916.0,82493.0
2024-02-05 09:34:00-05:00,187.259995,145.580002,73.769997,93.790001,409.809998,187.259995,145.580002,73.769997,93.790001,409.809998,...,187.570007,144.990005,73.699997,93.680099,410.540009,370442.0,221975.0,2577.0,24747.0,101806.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-05 13:32:00-05:00,188.839996,145.185806,72.394997,93.052002,404.904999,188.839996,145.185806,72.394997,93.052002,404.904999,...,188.720001,145.279999,72.407997,92.989998,405.000000,78188.0,31100.0,1929.0,5772.0,34130.0
2024-02-05 13:33:00-05:00,188.889999,145.244995,72.419998,93.040001,404.899994,188.889999,145.244995,72.419998,93.040001,404.899994,...,188.850006,145.190002,72.410004,93.050003,404.920013,97576.0,26181.0,1029.0,8073.0,29481.0
2024-02-05 13:34:00-05:00,188.949997,145.244995,72.404999,93.099998,404.920013,188.949997,145.244995,72.404999,93.099998,404.920013,...,188.889999,145.255005,72.419998,93.040001,404.910004,100157.0,23872.0,2779.0,7018.0,20876.0
2024-02-05 13:35:00-05:00,188.964996,145.289993,72.510002,93.110001,404.880005,188.964996,145.289993,72.510002,93.110001,404.880005,...,188.940002,145.244995,72.419998,93.098999,404.899994,78313.0,34852.0,3247.0,8305.0,22082.0


## Training

In [22]:
class QLearningTrader:

    def __init__(self, num_actions, num_features, learning_rate, discount_factor, exploration_prob):
        self.num_actions = num_actions
        self.num_features = num_features
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob

        # Initialize Q-table with zeros
        self.q_table = np.zeros((num_actions, num_features))

        # Initialize state and action
        self.current_state = None
        self.current_action = None

    def choose_action(self, state):
        # Exploration-exploitation trade-off
        if np.random.uniform(0, 1) < self.exploration_prob:
            return np.random.choice(self.num_actions)  # Explore
        else:
            feature_index = np.argmax(state)
            return np.argmax(self.q_table[:, feature_index])  # Exploit

    def take_action(self, action, reward):
        # Update Q-table based on the observed reward
        if self.current_action is not None:
            feature_index = np.argmax(self.current_state)
            current_q_value = self.q_table[self.current_action, feature_index]
            new_q_value = (1 - self.learning_rate) * current_q_value + \
                           self.learning_rate * (reward + self.discount_factor * np.max(self.q_table[:, feature_index]))
            self.q_table[self.current_action, feature_index] = new_q_value

        # Update current state and action
        self.current_state = None
        self.current_action = action

    def observe_real_time_data(self, identifier, i, n):
        # Fetch real-time data
        real_time_data = fetch_real_time_data(identifier, i, n)

        # Extract features from real-time data
        self.current_state = np.array([real_time_data['open'], real_time_data['high'],
                                       real_time_data['low'], real_time_data['close'],
                                       real_time_data['volume']])

    def observe_next_state(self, identifier, i, n):
        # Update the current state with the observed next state
        self.current_state = fetch_real_time_data(identifier, i, n)

In [26]:
def calculate_reward(action, current_close, next_close):

    if action == 0:  # Buy
        return 1.0 if next_close > current_close else -1.0
    elif action == 1:  # Sell
        return 1.0 if next_close < current_close else -1.0
    else:  # Hold
        return 1.0 if next_close > current_close else -1.0 if next_close < current_close else 0.0
    
def fetch_real_time_data(identifier, i, n):

        # source = 'nasdaq_basic'
        # response = intrinio.SecurityApi().get_security_realtime_price(identifier, source=source)

        date = x_fridays_ago(n - i)
        
        try:
            price_data = yf.download(identifier, start=date, end=date + timedelta(1), interval='1d', progress=False)

            return {
                'open': price_data['Open'].values[0],
                'high': price_data['High'].values[0],
                'low': price_data['Low'].values[0],
                'close': price_data['Close'].values[0],
                'volume': price_data['Volume'].values[0]
            }
        except:
            price_data = yf.download(identifier, start=date - timedelta(1), end=date, interval='1d', progress=False)

            return { # need to change this to return close or something
                'open': price_data['Open'].values[0],
                'high': price_data['High'].values[0],
                'low': price_data['Low'].values[0],
                'close': price_data['Close'].values[0],
                'volume': price_data['Volume'].values[0]
            }

In [19]:
def take_action(self, action, reward):

  # Update Q-table based on the observed reward
  if self.current_action is not None:
    feature_index = np.argmax(self.current_state)
    current_q_value = self.q_table[self.current_action, feature_index]
    new_q_value = (1 - self.learning_rate) * current_q_value + self.learning_rate * (reward + self.discount_factor *np.max(self.q_table[:, feature_index]))
    self.q_table[self.current_action, feature_index] = new_q_value

  # Update current state and action
  self.current_state = None
  self.current_action = action

In [21]:
def calculate_profit_loss(initial_balance, suggested_action, current_close, next_close, quantity):

    if suggested_action == "Buy":
        return (next_close - current_close) * quantity
    elif suggested_action == "Sell":
        return (current_close - next_close) * quantity
    else:  # Hold
        return 0.0

In [29]:
def calculate_final_profit(identifier, initial_balance, quantity, num_iterations, learning_rate, discount_factor, exploration_prob):
    num_actions = 3
    num_features = 5
    q_trader = QLearningTrader(num_actions, num_features, learning_rate, discount_factor, exploration_prob)

    for i in range(num_iterations):
        q_trader.observe_real_time_data(identifier, i, num_iterations)

        action = q_trader.choose_action(q_trader.current_state)
        current_close = q_trader.current_state[3]

        # time.sleep(1)  # Introduce a delay before fetching the next real-time data

        q_trader.observe_next_state(identifier, i, num_iterations)
        next_close = q_trader.current_state['close']

        reward = calculate_reward(action, current_close, next_close)
        q_trader.take_action(action, reward)

    # Fetch real-time data just after the last iteration
    final_real_time_data = fetch_real_time_data(identifier, num_iterations, num_iterations)

    # Get the final suggested action based on the last state in the Q-table
    final_suggested_action = ["Buy", "Sell", "Hold"][np.argmax(q_trader.q_table[:, np.argmax(q_trader.current_state)])]

    # Calculate profit based on the final suggested action
    final_profit = calculate_profit_loss(initial_balance, final_suggested_action, current_close, final_real_time_data['close'], quantity)

    print(f"Final Suggested Action: {final_suggested_action}, Final Profit: {final_profit}")

## Testing

In [32]:
security_identifier = 'AAPL'
results = calculate_final_profit(security_identifier, initial_balance = 100, 
                       quantity = 10, num_iterations = 100, learning_rate = 0.1, 
                       discount_factor = 0.9, exploration_prob = 0.2)


1 Failed download:
['AAPL']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2022-04-15 -> 2022-04-16)')

1 Failed download:
['AAPL']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2022-04-15 -> 2022-04-16)')

1 Failed download:
['AAPL']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2023-04-07 -> 2023-04-08)')

1 Failed download:
['AAPL']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2023-04-07 -> 2023-04-08)')


Final Suggested Action: Sell, Final Profit: 65.69992065429688
