In [13]:
# Import needed libraries 

import pandas as pd 
import os
import datetime
from datetime import timedelta
import numpy as np 
from scipy.signal import argrelextrema
import alpaca_trade_api as tradeapi 
import matplotlib.pyplot as plt 
import matplotlib.dates as mpdates
from mplfinance.original_flavor import candlestick_ohlc
from dotenv import load_dotenv
from itertools import islice
import hvplot.pandas

In [14]:
# Load .env file

load_dotenv()

True

In [15]:
# Set Alpaca API key and secret passwords

alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [16]:
# Initiate REST API

api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version = "v2"
)

In [17]:
# Parameters for Stock Data from Alpacas
# Establish time frame (5 minute)

time_frame = "5min"

# Identify what stock symbol is trading

stock_symbol = "QQQ"

# Identify what start date to begin data analysis

start_date = pd.Timestamp("2021-02-01", tz="America/New_York").isoformat()

# Identify what end date to finalize data analysis

end_date = pd.Timestamp("2021-06-01", tz="America/New_York").isoformat()

In [18]:
# Function to call daily stock data

def get_stock_data(api, stock_symbol, time_frame, current_date_iso, next_day_date_iso):
    
    # Assuming api.get_bars returns a DataFrame with a 'df' attribute
    
    stock_data = api.get_bars(
        stock_symbol, 
        time_frame, 
        start=current_date_iso, 
        end=next_day_date_iso
        ).df
    
    return stock_data

# Displays the information pulled for working through code

# stock_data = get_stock_data(api, stock_symbol, time_frame, start_date, end_date)
# stock_data.info()
# display(stock_data.head())
# display(stock_data.tail())


In [19]:
# Function to prepare daily stock data to identify double top/bottom patterns and prepare for targets
# Includes establishing a polynomial fit and assigning new columns for localized min/max

# Polynomial Degree

polynomial_degree = 25

def polynomial_min_max_fit(stock_data, polynomial_degree):
    
    min_length = len(stock_data.index)
    x_data = np.arange(min_length)

    # Polynomial fitting
    polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
    polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
    polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
    polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)

    # Evaluate the polynomial fit for plotting
    y_polynomial_open = np.polyval(polynomial_coefficients_open, x_data)
    y_polynomial_high = np.polyval(polynomial_coefficients_high, x_data)
    y_polynomial_low = np.polyval(polynomial_coefficients_low, x_data)
    y_polynomial_close = np.polyval(polynomial_coefficients_close, x_data)

    # Identify local extrema for polynomial fit data (minima and maxima)
    local_poly_minima = argrelextrema(y_polynomial_close, np.less, order=5)[0]
    local_poly_maxima = argrelextrema(y_polynomial_close, np.greater, order=5)[0]

    # Convert the close price polynomial fit data into a dataframe
    # This is done for OHLC poly fit data

    poly_df = pd.DataFrame(y_polynomial_open)
    columns = ['poly_fit_open']
    poly_df.columns = columns
    poly_df = poly_df.assign(poly_fit_high = y_polynomial_high)
    poly_df = poly_df.assign(poly_fit_low = y_polynomial_low)
    poly_df = poly_df.assign(poly_fit_close = y_polynomial_close)

    # Reset the index of the original updated ticker dataframe to concat with the polynomial dataframe that does not include a timeseries
    # This will ensure that the indexed intergers of the ploynomial fit align with the time each data point corresponds to

    updated_stock_data = stock_data.reset_index()
    updated_stock_data = pd.concat([updated_stock_data, poly_df], axis='columns', join='inner')
    updated_stock_data.head()

    # Add minima and maxima column to the DataFrame

    updated_stock_data["minima"] = 0
    updated_stock_data["maxima"] = 0
    updated_stock_data.head()

    # Mark rows with local minima as 1 in the 'minima' column

    for index in local_poly_minima:
        updated_stock_data.at[index, 'minima'] = -1

    for index in local_poly_maxima:
        updated_stock_data.at[index, "maxima"] = 1

    # Create Target Columns - Double Top Target & Double Bottom Target
        
    updated_stock_data["dbl_top_target"] = 0
    updated_stock_data["dbl_bot_target"] = 0

    # Create Machine Learning Model Target Column (Identify Long (1) or Short (-1))

    updated_stock_data["long_short_target"] = 0

    # # Plot data: COMMENTED OUT TO NOT REPEAT PLOTS AT END OF PROGRAM
    # # Plot the stock data and identified minima

    # plt.figure(figsize=(15, 5))
    # plt.plot(stock_data.index, stock_data["close"], label='Close Prices', alpha=0.7)

    # # Plot polynomial fit

    # plt.plot(stock_data.index[:min_length], y_polynomial_close, '-', markersize=1.0, color='black', alpha=0.9, label='Polynomial Fit')

    # # Plot red dots at local minima and blue dots at local maxima

    # plt.scatter(stock_data.index[local_poly_minima], y_polynomial_close[local_poly_minima], color='red', label='Local Minima')
    # plt.scatter(stock_data.index[local_poly_maxima],y_polynomial_close[local_poly_maxima], color="blue", label = "Local Maxima")
    
    return updated_stock_data

# For data checking/confirmation

# updated_stock_data = polynomial_min_max_fit(stock_data,polynomial_degree)

In [20]:
# # Plot data
# # Plot the stock data and identified minima

# plt.figure(figsize=(15, 5))
# plt.plot(stock_data.index, stock_data["close"], label='Close Prices', alpha=0.7)

# # # Plot polynomial fit

# plt.plot(stock_data.index[:min_length], y_polynomial_close, '-', markersize=1.0, color='black', alpha=0.9, label='Polynomial Fit')

# # # Plot red dots at local minima and blue dots at local maxima

# plt.scatter(stock_data.index[local_poly_minima], y_polynomial_close[local_poly_minima], color='red', label='Local Minima')
# plt.scatter(stock_data.index[local_poly_maxima],y_polynomial_close[local_poly_maxima], color="blue", label = "Local Maxima")

In [21]:
# Define the time independent DF for double top/bottom identification
# Timestamp remains a column, but can identify pattern from peak to peak immediately

def time_independent_data(updated_stock_data):

    # Define and establish time independent DF

    time_independent_df = []
    time_independent_df = pd.DataFrame(time_independent_df, columns = ["timestamp","close","high","low","trade_count","open","volume","vwap","poly_fit_open","poly_fit_high","poly_fit_low","poly_fit_close","minima","maxima","dbl_top_target","dbl_bot_target"])

    # Loop to iterate through all rows of stock data and write min/max to new DF

    for index, row in islice(updated_stock_data.iterrows(), 0, None):

        # Assign to rows only those that contain local min or max

        if (updated_stock_data.at[index,"minima"] == -1):
            time_independent_df.loc[index] = row
            a = updated_stock_data.iloc[index]["poly_fit_close"]
        elif (updated_stock_data.at[index,"maxima"] == 1):
            time_independent_df.loc[index] = row
            a = updated_stock_data.iloc[index]["poly_fit_close"]

    # Reset time dependent index
    # Set new time independent index

    time_independent_df.reset_index(inplace = True)
    time_independent_df.rename(columns={"index":"time_dependent_index"}, inplace = True)

    return time_independent_df

# Data confirmation/check

# time_independent_df = time_independent_data(updated_stock_data)
# time_independent_df.head(10)

In [22]:
# Function to identify daily double top/bottom patterns
# Inherently there should only be 1 identification of the pattern as it is considered a trend reversal pattern
# The loop will break once the pattern is identified

def identify_double_patterns(time_independent_df, updated_stock_data):

    # Initialize variables to identify double top/bottom patterns
    # Time dependent variable x_0 will always begin at a local min/max which also coincides
    # With the start of a trend into a potential double top/bottom
    
    x_0 = 0

    # Initiation of double top/bottom variable
    
    a = 0

    # First peak/valley of double top/bottom pattern
    
    b = 0

    # Trough/peak of double top/bottom pattern

    c = 0

    # Second peak/vallye of double top/bottom pattern
    
    d = 0

    # Trigger of double top/bottom pattern
    
    e = 0

    # Final time increment to finalize and trigger double top signal
    
    x_f = 0

    # Read through code to identify double top/bottom and assign to target columns.

    for index, row in islice(time_independent_df.iterrows(), 0, len(time_independent_df) - 4):

        # Check for double top
        # If found then assigns x_f final time for writing to targets in time dependent dataframe

        if (time_independent_df.at[index,"minima"] == -1):
            a = time_independent_df.iloc[index]["poly_fit_close"]
            b = time_independent_df.iloc[index + 1]["poly_fit_close"]
            c = time_independent_df.iloc[index + 2]["poly_fit_close"]
            d = time_independent_df.iloc[index + 3]["poly_fit_close"]
            e = time_independent_df.iloc[index + 4]["poly_fit_close"]
            x_0 = time_independent_df.iloc[index]["time_dependent_index"]

            if (time_independent_df.iloc[index + 1]["poly_fit_low"])*.95 < d < (time_independent_df.iloc[index + 1]["poly_fit_high"])*1.05 and (e <= c):
                x_f = time_independent_df.iloc[index + 4]["time_dependent_index"]
                updated_stock_data.at[x_f, "long_short_target"] = -1
                while (x_0 < x_f + 1):
                    updated_stock_data.at[x_0, "dbl_top_target"] = 1
                    x_0 = x_0 + 1
                break
            
            

        # Check for double bottom
        # If found then assigns x_f final time for writing to targets in time dependent dataframe
            
        elif (time_independent_df.at[index,"maxima"] == 1):
            a = time_independent_df.iloc[index]["poly_fit_close"]
            b = time_independent_df.iloc[index + 1]["poly_fit_close"]
            c = time_independent_df.iloc[index + 2]["poly_fit_close"]
            d = time_independent_df.iloc[index + 3]["poly_fit_close"]
            e = time_independent_df.iloc[index + 4]["poly_fit_close"]
            x_0 = time_independent_df.iloc[index]["time_dependent_index"] 

            if (time_independent_df.iloc[index+1]["poly_fit_low"])*.95 < d < (time_independent_df.iloc[index+1]["poly_fit_high"])*1.05 and (e >= c):
                x_f_min = time_independent_df.iloc[index + 4]["time_dependent_index"]
                updated_stock_data.at[x_f, "long_short_target"] = 1
                while (x_0 < x_f + 1):
                    updated_stock_data.at[x_0, "dbl_bot_target"] = 1
                    x_0 = x_0 + 1
                break
            
    return updated_stock_data, x_f

# updated_stock_data, x_f = identify_double_patterns(time_independent_df,updated_stock_data)

# print(x_f)



In [23]:
# # Data Checking

# display(updated_stock_data.head(20))
# print(updated_stock_data.loc[x_f])

In [24]:
# Loop to collect and assess daily data one day at a time

master_df = {
    'timestamp': [],  # List of timestamps
    'close': [],      # List of close prices
    'high': [],       # List of high prices
    'low': [],        # List of low prices
    'trade_count': [], # List of trade counts
    'open': [],       # List of open prices
    'volume': [],     # List of volumes
    'vwap': [],       # List of volume-weighted average prices
    'poly_fit_open': [],    # List of polynomial fit open prices
    'poly_fit_high': [],    # List of polynomial fit high prices
    'poly_fit_low': [],     # List of polynomial fit low prices
    'poly_fit_close': [],   # List of polynomial fit close prices
    'minima': [],           # List of minima
    'maxima': [],           # List of maxima
    'dbl_top_target': [],   # List of double top targets
    'dbl_bot_target': [],    # List of double bottom targets
    'buy_sell_tgt': []       # List of ML Model target
}

daily_dataframes = []

master_df = pd.DataFrame(master_df)

current_date = pd.to_datetime(start_date)
next_day_date = current_date + pd.offsets.BDay(1)

print(current_date)
print(next_day_date)

while current_date <= (pd.to_datetime(end_date)):

    # next_day_date = next_day_date + pd.offsets.BDay(1)
    # current_date = current_date + pd.offsets.BDay(1)
    # Convert current_date and next_day_date to ISO format for API call

    current_date_iso = current_date.isoformat()
    # next_day_date = current_date + pd.offsets.BDay(1)
    next_day_date_iso = next_day_date.isoformat()

    #current_date = datetime.datetime.fromisoformat(current_date_iso) + pd.offsets.BDay(1)
    
    # Fetch stock data for the current day

    stock_data = get_stock_data(api, stock_symbol, time_frame, current_date_iso, next_day_date_iso)

    if len(stock_data.index) == 0:
        next_day_date = next_day_date + pd.offsets.BDay(1)
        current_date = current_date + pd.offsets.BDay(1)
        continue
    
    # Run polynomial fit function for updated stock data

    updated_stock_data = polynomial_min_max_fit(stock_data, polynomial_degree)
    
    # Isolate Mins/maxes

    time_independent_df = time_independent_data(updated_stock_data)

    # Updated stock data with double tops/bots identified by function
    # x_f is the double top/bot trigger row(index) for current day's double top/bot

    updated_stock_data, x_f = identify_double_patterns(time_independent_df, updated_stock_data)
    
    # Perform your analysis or call your functions here
    # e.g., identify_double_patterns(time_independent_df, updated_stock_data)

    daily_dataframes.append(updated_stock_data)

    master_df = pd.concat(daily_dataframes, ignore_index=True)
    # display(master_df.tail())
    # Increment to the next day

    next_day_date = next_day_date + pd.offsets.BDay(1)
    current_date = current_date + pd.offsets.BDay(1)
    # next_day_date = pd.Timestamp(next_day_date_iso) + pd.offsets.BDay(1)
    
    # print(current_date)
    # print(next_day_date)
    


2021-02-01 00:00:00-05:00
2021-02-02 00:00:00-05:00


  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
  polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
  polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)
  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
  polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
  polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)
  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['

In [28]:
master_df.info()
print(len(master_df))
print(type(master_df))
print(sum(master_df["dbl_top_target"]))
print(sum(master_df["dbl_bot_target"]))
print(sum(master_df["long_short_target"]))
display(master_df.head(50))
display(master_df.tail(50))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15458 entries, 0 to 15457
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   timestamp          15458 non-null  datetime64[ns, UTC]
 1   close              15458 non-null  float64            
 2   high               15458 non-null  float64            
 3   low                15458 non-null  float64            
 4   trade_count        15458 non-null  int64              
 5   open               15458 non-null  float64            
 6   volume             15458 non-null  int64              
 7   vwap               15458 non-null  float64            
 8   poly_fit_open      15458 non-null  float64            
 9   poly_fit_high      15458 non-null  float64            
 10  poly_fit_low       15458 non-null  float64            
 11  poly_fit_close     15458 non-null  float64            
 12  minima             15458 non-null  int64      

Unnamed: 0,timestamp,close,high,low,trade_count,open,volume,vwap,poly_fit_open,poly_fit_high,poly_fit_low,poly_fit_close,minima,maxima,dbl_top_target,dbl_bot_target,long_short_target
0,2021-02-01 09:00:00+00:00,317.78,317.79,317.26,457,317.62,152785,317.615546,317.589182,317.713881,317.194753,317.668424,0,0,0,0,0
1,2021-02-01 09:05:00+00:00,317.63,317.79,317.54,42,317.75,5968,317.621602,317.741116,317.920431,317.621392,317.859689,0,0,0,0,0
2,2021-02-01 09:10:00+00:00,317.85,317.9,317.65,41,317.65,5138,317.836724,317.855104,318.045421,317.816207,317.940576,0,0,0,0,0
3,2021-02-01 09:15:00+00:00,318.15,318.25,317.97,77,318.0,18913,318.110307,317.933154,318.110568,317.885225,317.961846,0,1,0,0,0
4,2021-02-01 09:20:00+00:00,318.01,318.33,318.0,88,318.11,20816,318.259888,317.98029,318.134465,317.893536,317.954718,0,0,0,0,0
5,2021-02-01 09:25:00+00:00,318.11,318.16,318.0,42,318.08,7704,318.125444,318.002784,318.132132,317.878397,317.937047,0,0,0,0,0
6,2021-02-01 09:30:00+00:00,318.06,318.17,318.06,157,318.09,32477,318.148594,318.007016,318.11504,317.858889,317.917926,0,0,0,0,0
7,2021-02-01 09:35:00+00:00,317.84,318.08,317.81,32,318.05,5522,317.879103,317.998796,318.09142,317.842885,317.901036,0,0,0,0,0
8,2021-02-01 09:40:00+00:00,317.71,317.91,317.7,61,317.86,14239,317.7916,317.983015,318.06674,317.831912,317.887013,0,0,0,0,0
9,2021-02-01 09:45:00+00:00,317.49,317.81,317.49,25,317.81,3617,317.720274,317.963521,318.044244,317.824431,317.875049,0,0,0,0,0


Unnamed: 0,timestamp,close,high,low,trade_count,open,volume,vwap,poly_fit_open,poly_fit_high,poly_fit_low,poly_fit_close,minima,maxima,dbl_top_target,dbl_bot_target,long_short_target
15408,2021-05-28 17:35:00+00:00,334.53,334.87,334.48,1967,334.805,221634,334.701173,334.660178,334.726939,334.49368,334.591794,0,0,0,0,0
15409,2021-05-28 17:40:00+00:00,334.48,334.565,334.38,1885,334.52,267678,334.483766,334.584459,334.659499,334.424441,334.529084,0,0,0,0,0
15410,2021-05-28 17:45:00+00:00,334.3,334.5,334.18,2514,334.48,347212,334.296986,334.521328,334.604432,334.368759,334.48026,0,0,0,0,0
15411,2021-05-28 17:50:00+00:00,334.46,334.46,334.25,1445,334.305,155678,334.376539,334.47254,334.562956,334.328047,334.446471,0,0,0,0,0
15412,2021-05-28 17:55:00+00:00,334.43,334.49,334.32,1317,334.45,128026,334.40944,334.439208,334.535688,334.303074,334.428198,0,0,0,0,0
15413,2021-05-28 18:00:00+00:00,334.4799,334.4806,334.34,1920,334.43,214047,334.435129,334.421748,334.52262,334.293921,334.425223,-1,0,0,0,0
15414,2021-05-28 18:05:00+00:00,334.39,334.54,334.33,1981,334.47,250928,334.444026,334.419846,334.523134,334.299963,334.436628,0,0,0,0,0
15415,2021-05-28 18:10:00+00:00,334.3399,334.43,334.21,2164,334.39,299300,334.342129,334.43247,334.536028,334.319888,334.460823,0,0,0,0,0
15416,2021-05-28 18:15:00+00:00,334.413,334.47,334.27,1787,334.32,201644,334.396204,334.457903,334.559584,334.35174,334.495608,0,0,0,0,0
15417,2021-05-28 18:20:00+00:00,334.4799,334.49,334.32,1839,334.4,224579,334.404196,334.493825,334.591646,334.392995,334.538268,0,0,0,0,0


In [26]:
print(current_date)

2021-06-01 00:00:00-05:00


In [27]:
master_df.hvplot(x = "timestamp")



  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for dt in data.dt.to_pydatetime()]
  dts = [dt.replace(tzinfo=None) for d