In [1]:
# Import needed libraries 

import pandas as pd 
import os
import datetime
from datetime import timedelta
import numpy as np 
from scipy.signal import argrelextrema
import alpaca_trade_api as tradeapi 
import matplotlib.pyplot as plt 
import matplotlib.dates as mpdates
from mplfinance.original_flavor import candlestick_ohlc
from dotenv import load_dotenv
from itertools import islice
from pandas .tseries.offsets import DateOffset

In [2]:
# Load .env file

load_dotenv()

True

In [3]:
# Set Alpaca API key and secret
ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
ALPACA_SECRET_KEY = os.getenv("ALPACA_SECRET_KEY")
ALPACA_ENDPOINT_KEY = os.getenv("ALPACA_END_POINT")


# Create the Alpaca API object
api = tradeapi.REST(
    ALPACA_API_KEY,
    ALPACA_SECRET_KEY,
    api_version="v2",
    base_url= ALPACA_ENDPOINT_KEY
)

In [4]:
# Parameters for Stock Data from Alpacas
# Establish time frame (5 minute)

time_frame = "5min"

# Identify what stock symbol is trading

stock_symbol = "QQQ"

# Identify what start date to begin data analysis

start_date = pd.Timestamp("2021-02-01", tz="America/New_York").isoformat()

# Identify what end date to finalize data analysis

end_date = pd.Timestamp("2021-06-01", tz="America/New_York").isoformat()

In [5]:
print(start_date)
print(end_date)

2021-02-01T00:00:00-05:00
2021-06-01T00:00:00-04:00


In [6]:
# Function to call daily stock data

def get_stock_data(api, stock_symbol, time_frame, start_date, end_date):
    
    # Assuming api.get_bars returns a DataFrame with a 'df' attribute
    
    stock_data = api.get_bars(
        stock_symbol, 
        time_frame, 
        start=start_date, 
        end=end_date
        ).df
    return stock_data

stock_data = get_stock_data(api, stock_symbol, time_frame, start_date, end_date)

# Displays the information pulled for working through code

stock_data.info()
display(stock_data.head())
display(stock_data.tail())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 15458 entries, 2021-02-01 09:00:00+00:00 to 2021-05-28 21:55:00+00:00
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   close        15458 non-null  float64
 1   high         15458 non-null  float64
 2   low          15458 non-null  float64
 3   trade_count  15458 non-null  int64  
 4   open         15458 non-null  float64
 5   volume       15458 non-null  int64  
 6   vwap         15458 non-null  float64
dtypes: float64(5), int64(2)
memory usage: 966.1 KB


Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-02-01 09:00:00+00:00,317.78,317.79,317.26,457,317.62,152785,317.615546
2021-02-01 09:05:00+00:00,317.63,317.79,317.54,42,317.75,5968,317.621602
2021-02-01 09:10:00+00:00,317.85,317.9,317.65,41,317.65,5138,317.836724
2021-02-01 09:15:00+00:00,318.15,318.25,317.97,77,318.0,18913,318.110307
2021-02-01 09:20:00+00:00,318.01,318.33,318.0,88,318.11,20816,318.259888


Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-05-28 21:25:00+00:00,333.84,333.84,333.8,12,333.8,788,333.814201
2021-05-28 21:30:00+00:00,333.8,333.82,333.8,15,333.82,2008,333.814337
2021-05-28 21:45:00+00:00,333.84,333.84,333.82,11,333.82,1115,333.827005
2021-05-28 21:50:00+00:00,333.83,333.83,333.82,6,333.82,702,333.828604
2021-05-28 21:55:00+00:00,333.82,333.83,333.82,11,333.83,1023,333.82303


In [7]:
# Function to prepare daily stock data to identify double top/bottom patterns and prepare for targets
# Includes establishing a polynomial fit and assigning new columns for localized min/max

# Polynomial Degree

polynomial_degree = 25

def polynomial_min_max_fit(stock_data, polynomial_degree):
    
    min_length = len(stock_data.index)
    x_data = np.arange(min_length)

    # Polynomial fitting
    polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
    polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
    polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
    polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)

    # Evaluate the polynomial fit for plotting
    y_polynomial_open = np.polyval(polynomial_coefficients_open, x_data)
    y_polynomial_high = np.polyval(polynomial_coefficients_high, x_data)
    y_polynomial_low = np.polyval(polynomial_coefficients_low, x_data)
    y_polynomial_close = np.polyval(polynomial_coefficients_close, x_data)

    # Identify local extrema for polynomial fit data (minima and maxima)
    local_poly_minima = argrelextrema(y_polynomial_close, np.less, order=5)[0]
    local_poly_maxima = argrelextrema(y_polynomial_close, np.greater, order=5)[0]

    # Convert the close price polynomial fit data into a dataframe
    # This is done for OHLC poly fit data

    poly_df = pd.DataFrame(y_polynomial_open)
    columns = ['poly_fit_open']
    poly_df.columns = columns
    poly_df = poly_df.assign(poly_fit_high = y_polynomial_high)
    poly_df = poly_df.assign(poly_fit_low = y_polynomial_low)
    poly_df = poly_df.assign(poly_fit_close = y_polynomial_close)

    # Reset the index of the original updated ticker dataframe to concat with the polynomial dataframe that does not include a timeseries
    # This will ensure that the indexed intergers of the ploynomial fit align with the time each data point corresponds to

    updated_stock_data = stock_data.reset_index()
    updated_stock_data = pd.concat([updated_stock_data, poly_df], axis='columns', join='inner')
    updated_stock_data.head()

    # Add minima and maxima column to the DataFrame

    updated_stock_data["minima"] = 0
    updated_stock_data["maxima"] = 0
    updated_stock_data.head()

    # Mark rows with local minima as 1 in the 'minima' column

    for index in local_poly_minima:
        updated_stock_data.at[index, 'minima'] = -1

    for index in local_poly_maxima:
        updated_stock_data.at[index, "maxima"] = 1

    # Create Target Columns - Double Top Target & Double Bottom Target
        
    updated_stock_data["dbl_top_target"] = 0
    updated_stock_data["dbl_bot_target"] = 0

    # # Plot data: COMMENTED OUT TO NOT REPEAT PLOTS AT END OF PROGRAM
    # # Plot the stock data and identified minima

    # plt.figure(figsize=(15, 5))
    # plt.plot(stock_data.index, stock_data["close"], label='Close Prices', alpha=0.7)

    # # Plot polynomial fit

    # plt.plot(stock_data.index[:min_length], y_polynomial_close, '-', markersize=1.0, color='black', alpha=0.9, label='Polynomial Fit')

    # # Plot red dots at local minima and blue dots at local maxima

    # plt.scatter(stock_data.index[local_poly_minima], y_polynomial_close[local_poly_minima], color='red', label='Local Minima')
    # plt.scatter(stock_data.index[local_poly_maxima],y_polynomial_close[local_poly_maxima], color="blue", label = "Local Maxima")
    
    return updated_stock_data

# For data checking/confirmation
updated_stock_data = polynomial_min_max_fit(stock_data,polynomial_degree)
updated_stock_data.head()

  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
  polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
  polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)


Unnamed: 0,timestamp,close,high,low,trade_count,open,volume,vwap,poly_fit_open,poly_fit_high,poly_fit_low,poly_fit_close,minima,maxima,dbl_top_target,dbl_bot_target
0,2021-02-01 09:00:00+00:00,317.78,317.79,317.26,457,317.62,152785,317.615546,312.930406,313.15773,312.744286,312.916258,0,0,0,0
1,2021-02-01 09:05:00+00:00,317.63,317.79,317.54,42,317.75,5968,317.621602,313.056023,313.282869,312.869895,313.04218,0,0,0,0
2,2021-02-01 09:10:00+00:00,317.85,317.9,317.65,41,317.65,5138,317.836724,313.180801,313.407171,312.994663,313.16726,0,0,0,0
3,2021-02-01 09:15:00+00:00,318.15,318.25,317.97,77,318.0,18913,318.110307,313.304743,313.53064,313.118597,313.291503,0,0,0,0
4,2021-02-01 09:20:00+00:00,318.01,318.33,318.0,88,318.11,20816,318.259888,313.427853,313.653279,313.2417,313.414912,0,0,0,0


In [None]:
# # Shift dataframe by one datapoint to set target columns to train  ML model 

# updated_stock_data['close_lagged'] = updated_stock_data['close'].shift(1)
# updated_stock_data['high_lagged'] = updated_stock_data['high'].shift(1)
# updated_stock_data['low_lagged'] = updated_stock_data['low'].shift(1)
# updated_stock_data['open_lagged'] = updated_stock_data['open'].shift(1)
# updated_stock_data['trade_count_lagged'] = updated_stock_data['trade_count'].shift(1)
# updated_stock_data['volume_lagged'] = updated_stock_data['volume'].shift(1)
# updated_stock_data['vwap_lagged'] = updated_stock_data['vwap'].shift(1)
# updated_stock_data['poly_open_lagged'] = updated_stock_data['poly_fit_open'].shift(1)
# updated_stock_data['poly_high_lagged'] = updated_stock_data['poly_fit_high'].shift(1)
# updated_stock_data['poly_low_lagged'] = updated_stock_data['poly_fit_low'].shift(1)
# updated_stock_data['poly_close_lagged'] = updated_stock_data['poly_fit_close'].shift(1)
# updated_stock_data['minima_lagged'] = updated_stock_data['minima'].shift(1)
# updated_stock_data['maxima_lagged'] = updated_stock_data['maxima'].shift(1)

# display(updated_stock_data.head())
# display(updated_stock_data.tail())

In [None]:
# # Plot data
# # Plot the stock data and identified minima

# plt.figure(figsize=(15, 5))
# plt.plot(stock_data.index, stock_data["close"], label='Close Prices', alpha=0.7)

# # # Plot polynomial fit

# plt.plot(stock_data.index[:min_length], y_polynomial_close, '-', markersize=1.0, color='black', alpha=0.9, label='Polynomial Fit')

# # # Plot red dots at local minima and blue dots at local maxima

# plt.scatter(stock_data.index[local_poly_minima], y_polynomial_close[local_poly_minima], color='red', label='Local Minima')
# plt.scatter(stock_data.index[local_poly_maxima],y_polynomial_close[local_poly_maxima], color="blue", label = "Local Maxima")

In [8]:
# Define the time independent DF for double top/bottom identification
# Timestamp remains a column, but can identify pattern from peak to peak immediately

def time_independent_data(updated_stock_data):

    # Define and establish time independent DF

    time_independent_df = []
    time_independent_df = pd.DataFrame(time_independent_df, columns = ["timestamp","close","high","low","trade_count","open","volume","vwap","poly_fit_open","poly_fit_high","poly_fit_low","poly_fit_close","minima","maxima", "dbl_top_target","dbl_bot_target"])

    # Loop to iterate through all rows of stock data and write min/max to new DF

    for index, row in islice(updated_stock_data.iterrows(), 0, None):

        # Assign to rows only those that contain local min or max

        if (updated_stock_data.at[index,"minima"] == -1):
            time_independent_df.loc[index] = row
            a = updated_stock_data.iloc[index]["poly_fit_close"]
        elif (updated_stock_data.at[index,"maxima"] == 1):
            time_independent_df.loc[index] = row
            a = updated_stock_data.iloc[index]["poly_fit_close"]

    # Reset time dependent index
    # Set new time independent index

    time_independent_df.reset_index(inplace = True)
    time_independent_df.rename(columns={"index":"time_dependent_index"}, inplace = True)

    return time_independent_df

# Data confirmation/check

time_independent_df = time_independent_data(updated_stock_data)
time_independent_df.head(10)

Unnamed: 0,time_dependent_index,timestamp,close,high,low,trade_count,open,volume,vwap,poly_fit_open,poly_fit_high,poly_fit_low,poly_fit_close,minima,maxima,dbl_top_target,dbl_bot_target
0,1536,2021-02-11 11:50:00+00:00,334.1,334.22,334.06,51,334.16,5938,334.109267,335.489916,335.593558,335.376124,335.490674,0,1,0,0
1,4424,2021-03-05 19:40:00+00:00,307.535,308.24,307.14,9474,307.99,897074,307.727024,309.331801,309.647273,309.01703,309.327414,-1,0,0,0
2,5954,2021-03-17 22:20:00+00:00,322.15,322.22,322.15,40,322.22,8299,322.186672,318.284073,318.479015,318.075503,318.280644,0,1,0,0
3,7088,2021-03-25 23:00:00+00:00,311.59,311.64,311.54,18,311.58,3860,311.613834,313.906709,314.095617,313.700954,313.903994,-1,0,0,0
4,9364,2021-04-14 10:05:00+00:00,341.19,341.26,341.18,30,341.23,2466,341.214217,339.617838,339.733633,339.493057,339.617857,0,1,0,0
5,10262,2021-04-20 23:20:00+00:00,335.1,335.13,335.09,16,335.11,2150,335.111153,337.793903,337.933293,337.649807,337.791455,-1,0,0,0
6,11234,2021-04-28 11:05:00+00:00,340.03,340.06,339.86,93,339.96,20839,339.952775,339.996713,340.109649,339.872701,339.999674,0,1,0,0
7,13705,2021-05-17 16:35:00+00:00,323.24,323.27,322.7099,1683,322.73,169100,323.045953,321.629896,321.825519,321.441034,321.629637,-1,0,0,0
8,14951,2021-05-26 11:05:00+00:00,334.12,334.24,334.04,62,334.24,9224,334.163679,333.620895,333.724459,333.513186,333.61944,0,1,0,0
9,15103,2021-05-26 23:45:00+00:00,333.98,333.99,333.93,41,333.95,10923,333.962246,333.520122,333.622286,333.41438,333.521461,-1,0,0,0


In [9]:
# Function to identify daily double top/bottom patterns
# Inherently there should only be 1 identification of the pattern as it is considered a trend reversal pattern
# The loop will break once the pattern is identified

def identify_double_patterns(time_independent_df, updated_stock_data):

    # Initialize variables to identify double top/bottom patterns
    # Time dependent variable x_0 will always begin at a local min/max which also coincides
    # With the start of a trend into a potential double top/bottom
    
    x_0 = 0

    # Initiation of double top/bottom variable
    
    a = 0

    # First peak/valley of double top/bottom pattern
    
    b = 0

    # Trough/peak of double top/bottom pattern

    c = 0

    # Second peak/vallye of double top/bottom pattern
    
    d = 0

    # Trigger of double top/bottom pattern
    
    e = 0

    # Final time increment to finalize and trigger double top signal
    
    x_f = 0

    # Read through code to identify double top/bottom and assign to target columns.

    for index, row in islice(time_independent_df.iterrows(), 0, len(time_independent_df) - 4):

        # Check for double top
        # If found then assigns x_f final time for writing to targets in time dependent dataframe

        if (time_independent_df.at[index,"minima"] == -1):
            a = time_independent_df.iloc[index]["poly_fit_close"]
            b = time_independent_df.iloc[index + 1]["poly_fit_close"]
            c = time_independent_df.iloc[index + 2]["poly_fit_close"]
            d = time_independent_df.iloc[index + 3]["poly_fit_close"]
            e = time_independent_df.iloc[index + 4]["poly_fit_close"]
            x_0 = time_independent_df.iloc[index]["time_dependent_index"]

            if (time_independent_df.iloc[index + 1]["poly_fit_low"])*.95 < d < (time_independent_df.iloc[index + 1]["poly_fit_high"])*1.05 and (e <= c):
                x_f = time_independent_df.iloc[index + 4]["time_dependent_index"]
                updated_stock_data.at[x_f, "long_short_target"] = -1
                while (x_0 < x_f + 1):
                    updated_stock_data.at[x_0, "dbl_top_target"] = 1
                    x_0 = x_0 + 1
                break
            
            

        # Check for double bottom
        # If found then assigns x_f final time for writing to targets in time dependent dataframe
            
        elif (time_independent_df.at[index,"maxima"] == 1):
            a = time_independent_df.iloc[index]["poly_fit_close"]
            b = time_independent_df.iloc[index + 1]["poly_fit_close"]
            c = time_independent_df.iloc[index + 2]["poly_fit_close"]
            d = time_independent_df.iloc[index + 3]["poly_fit_close"]
            e = time_independent_df.iloc[index + 4]["poly_fit_close"]
            x_0 = time_independent_df.iloc[index]["time_dependent_index"] 

            if (time_independent_df.iloc[index+1]["poly_fit_low"])*.95 < d < (time_independent_df.iloc[index+1]["poly_fit_high"])*1.05 and (e >= c):
                x_f_min = time_independent_df.iloc[index + 4]["time_dependent_index"]
                updated_stock_data.at[x_f, "long_short_target"] = 1
                while (x_0 < x_f + 1):
                    updated_stock_data.at[x_0, "dbl_bot_target"] = 1
                    x_0 = x_0 + 1
                break
            
    return updated_stock_data, x_f

updated_stock_data, x_f = identify_double_patterns(time_independent_df,updated_stock_data)

print(x_f)

0


In [10]:
print(updated_stock_data.columns)

Index(['timestamp', 'close', 'high', 'low', 'trade_count', 'open', 'volume',
       'vwap', 'poly_fit_open', 'poly_fit_high', 'poly_fit_low',
       'poly_fit_close', 'minima', 'maxima', 'dbl_top_target',
       'dbl_bot_target', 'long_short_target'],
      dtype='object')


In [12]:
# Loop to collect and assess daily data one day at a time

master_df = {
    'timestamp': [],  # List of timestamps
    'close': [],      # List of close prices
    'high': [],       # List of high prices
    'low': [],        # List of low prices
    'trade_count': [], # List of trade counts
    'open': [],       # List of open prices
    'volume': [],     # List of volumes
    'vwap': [],       # List of volume-weighted average prices
    'poly_fit_open': [],    # List of polynomial fit open prices
    'poly_fit_high': [],    # List of polynomial fit high prices
    'poly_fit_low': [],     # List of polynomial fit low prices
    'poly_fit_close': [],   # List of polynomial fit close prices
    'minima': [],           # List of minima
    'maxima': [],           # List of maxima
    'dbl_top_target': [],   # List of double top targets
    'dbl_bot_target': [],    # List of double bottom targets
    'long_short_target': []       # List of ML Model target
}

daily_dataframes = []

master_df = pd.DataFrame(master_df)

current_date = pd.to_datetime(start_date)
next_day_date = current_date + pd.offsets.BDay(1)

print(current_date)
print(next_day_date)

while current_date <= (pd.to_datetime(end_date)):

    # next_day_date = next_day_date + pd.offsets.BDay(1)
    # current_date = current_date + pd.offsets.BDay(1)
    # Convert current_date and next_day_date to ISO format for API call

    current_date_iso = current_date.isoformat()
    # next_day_date = current_date + pd.offsets.BDay(1)
    next_day_date_iso = next_day_date.isoformat()

    #current_date = datetime.datetime.fromisoformat(current_date_iso) + pd.offsets.BDay(1)
    
    # Fetch stock data for the current day

    stock_data = get_stock_data(api, stock_symbol, time_frame, current_date_iso, next_day_date_iso)

    if len(stock_data.index) == 0:
        next_day_date = next_day_date + pd.offsets.BDay(1)
        current_date = current_date + pd.offsets.BDay(1)
        continue
    
    # Run polynomial fit function for updated stock data

    updated_stock_data = polynomial_min_max_fit(stock_data, polynomial_degree)
    
    # Isolate Mins/maxes

    time_independent_df = time_independent_data(updated_stock_data)

    # Updated stock data with double tops/bots identified by function
    # x_f is the double top/bot trigger row(index) for current day's double top/bot

    updated_stock_data, x_f = identify_double_patterns(time_independent_df, updated_stock_data)
    
    # Perform your analysis or call your functions here
    # e.g., identify_double_patterns(time_independent_df, updated_stock_data)

    daily_dataframes.append(updated_stock_data)

    master_df = pd.concat(daily_dataframes, ignore_index=True)
    # display(master_df.tail())
    # Increment to the next day

    next_day_date = next_day_date + pd.offsets.BDay(1)
    current_date = current_date + pd.offsets.BDay(1)
    # next_day_date = pd.Timestamp(next_day_date_iso) + pd.offsets.BDay(1)
    
    # print(current_date)
    # print(next_day_date)
    

2021-02-01 00:00:00-05:00
2021-02-02 00:00:00-05:00


  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
  polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
  polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)
  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['high'][:min_length], polynomial_degree)
  polynomial_coefficients_low = np.polyfit(x_data, stock_data['low'][:min_length], polynomial_degree)
  polynomial_coefficients_close = np.polyfit(x_data, stock_data['close'][:min_length], polynomial_degree)
  polynomial_coefficients_open = np.polyfit(x_data, stock_data['open'][:min_length], polynomial_degree)
  polynomial_coefficients_high = np.polyfit(x_data, stock_data['

In [13]:
# Shift master dataframe by one datapoint to set target columns to train  ML model 

master_df['close_lagged'] = master_df['close'].shift(1)
master_df['high_lagged'] = master_df['high'].shift(1)
master_df['low_lagged'] = master_df['low'].shift(1)
master_df['open_lagged'] = master_df['open'].shift(1)
master_df['trade_count_lagged'] = master_df['trade_count'].shift(1)
master_df['volume_lagged'] = master_df['volume'].shift(1)
master_df['vwap_lagged'] = master_df['vwap'].shift(1)
master_df['poly_open_lagged'] = master_df['poly_fit_open'].shift(1)
master_df['poly_high_lagged'] = master_df['poly_fit_high'].shift(1)
master_df['poly_low_lagged'] = master_df['poly_fit_low'].shift(1)
master_df['poly_close_lagged'] = master_df['poly_fit_close'].shift(1)
master_df['minima_lagged'] = master_df['minima'].shift(1)
master_df['maxima_lagged'] = master_df['maxima'].shift(1)
master_df['dbl_top_target_lagged'] = master_df['dbl_top_target'].shift(1)
master_df['dbl_bot_target_lagged'] = master_df['dbl_bot_target'].shift(1)
master_df['long_short_target_lagged'] = master_df['long_short_target'].shift(1)

In [17]:
# Convert the NaN values within both tagret columns into 0's to feed into ML Models
master_df['long_short_target'] = master_df['long_short_target'].fillna(0)
master_df['long_short_target_lagged'] = master_df['long_short_target_lagged'].fillna(0)

master_df.info()
print(len(master_df))
print(type(master_df))
print(sum(master_df["dbl_top_target"]))
print(sum(master_df["dbl_bot_target"]))
print(sum(master_df["long_short_target"]))
display(master_df.head(100))
display(master_df.tail(100))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15458 entries, 0 to 15457
Data columns (total 33 columns):
 #   Column                    Non-Null Count  Dtype              
---  ------                    --------------  -----              
 0   timestamp                 15458 non-null  datetime64[ns, UTC]
 1   close                     15458 non-null  float64            
 2   high                      15458 non-null  float64            
 3   low                       15458 non-null  float64            
 4   trade_count               15458 non-null  int64              
 5   open                      15458 non-null  float64            
 6   volume                    15458 non-null  int64              
 7   vwap                      15458 non-null  float64            
 8   poly_fit_open             15458 non-null  float64            
 9   poly_fit_high             15458 non-null  float64            
 10  poly_fit_low              15458 non-null  float64            
 11  poly_fit_close 

Unnamed: 0,timestamp,close,high,low,trade_count,open,volume,vwap,poly_fit_open,poly_fit_high,...,vwap_lagged,poly_open_lagged,poly_high_lagged,poly_low_lagged,poly_close_lagged,minima_lagged,maxima_lagged,dbl_top_target_lagged,dbl_bot_target_lagged,long_short_target_lagged
0,2021-02-01 09:00:00+00:00,317.7800,317.79,317.26,457,317.6200,152785,317.615546,317.589178,317.713873,...,,,,,,,,,,0.0
1,2021-02-01 09:05:00+00:00,317.6300,317.79,317.54,42,317.7500,5968,317.621602,317.741114,317.920427,...,317.615546,317.589178,317.713873,317.194772,317.668435,0.0,0.0,0.0,0.0,0.0
2,2021-02-01 09:10:00+00:00,317.8500,317.90,317.65,41,317.6500,5138,317.836724,317.855104,318.045420,...,317.621602,317.741114,317.920427,317.621386,317.859682,0.0,0.0,0.0,0.0,0.0
3,2021-02-01 09:15:00+00:00,318.1500,318.25,317.97,77,318.0000,18913,318.110307,317.933155,318.110569,...,317.836724,317.855104,318.045420,317.816195,317.940566,0.0,0.0,0.0,0.0,0.0
4,2021-02-01 09:20:00+00:00,318.0100,318.33,318.00,88,318.1100,20816,318.259888,317.980292,318.134466,...,318.110307,317.933155,318.110569,317.885216,317.961841,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021-02-01 16:55:00+00:00,321.1200,321.53,321.04,2587,321.5200,986484,321.481060,321.472747,321.777601,...,321.474570,321.272915,321.588797,321.224019,321.499567,0.0,0.0,0.0,0.0,0.0
96,2021-02-01 17:00:00+00:00,321.7872,321.91,321.13,3042,321.1300,370943,321.660885,321.663247,321.957903,...,321.481060,321.472747,321.777601,321.428802,321.691487,0.0,0.0,0.0,0.0,0.0
97,2021-02-01 17:05:00+00:00,322.0100,322.06,321.71,2401,321.7843,302873,321.929986,321.842810,322.128004,...,321.660885,321.663247,321.957903,321.618845,321.870988,0.0,0.0,0.0,0.0,0.0
98,2021-02-01 17:10:00+00:00,322.3600,322.39,321.94,2442,321.9600,314549,322.070601,322.010039,322.286403,...,321.929986,321.842810,322.128004,321.792881,322.036707,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,timestamp,close,high,low,trade_count,open,volume,vwap,poly_fit_open,poly_fit_high,...,vwap_lagged,poly_open_lagged,poly_high_lagged,poly_low_lagged,poly_close_lagged,minima_lagged,maxima_lagged,dbl_top_target_lagged,dbl_bot_target_lagged,long_short_target_lagged
15358,2021-05-28 13:25:00+00:00,334.070,334.140,333.9000,173,333.90,40980,334.054536,334.017899,334.230719,...,333.857373,333.963019,334.158928,333.887776,334.027685,0.0,0.0,0.0,0.0,0.0
15359,2021-05-28 13:30:00+00:00,334.088,334.420,333.9400,7326,334.14,910795,334.192342,334.076430,334.304166,...,334.054536,334.017899,334.230719,333.938440,334.086644,0.0,0.0,0.0,0.0,0.0
15360,2021-05-28 13:35:00+00:00,334.450,334.620,334.0000,4826,334.07,541852,334.364700,334.136704,334.376924,...,334.192342,334.076430,334.304166,333.990454,334.146761,0.0,0.0,0.0,0.0,0.0
15361,2021-05-28 13:40:00+00:00,334.700,334.985,334.4600,5411,334.46,627069,334.795548,334.196825,334.446750,...,334.364700,334.136704,334.376924,334.042174,334.206121,0.0,0.0,0.0,0.0,0.0
15362,2021-05-28 13:45:00+00:00,334.290,334.855,334.2399,4709,334.70,516972,334.592730,334.254988,334.511599,...,334.795548,334.196825,334.446750,334.092060,334.262941,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15453,2021-05-28 21:25:00+00:00,333.840,333.840,333.8000,12,333.80,788,333.814201,333.788918,333.799171,...,333.920281,333.781296,333.802876,333.797115,333.806531,0.0,0.0,0.0,0.0,0.0
15454,2021-05-28 21:30:00+00:00,333.800,333.820,333.8000,15,333.82,2008,333.814337,333.806065,333.813151,...,333.814201,333.788918,333.799171,333.802676,333.818287,0.0,0.0,0.0,0.0,0.0
15455,2021-05-28 21:45:00+00:00,333.840,333.840,333.8200,11,333.82,1115,333.827005,333.829971,333.846285,...,333.814337,333.806065,333.813151,333.805815,333.830350,0.0,0.0,0.0,0.0,0.0
15456,2021-05-28 21:50:00+00:00,333.830,333.830,333.8200,6,333.82,702,333.828604,333.846077,333.873472,...,333.827005,333.829971,333.846285,333.809184,333.838308,0.0,1.0,0.0,0.0,0.0


In [16]:
master_df['long_short_target'].value_counts()

long_short_target
-1.0    42
 1.0    38
Name: count, dtype: int64