In [8]:
import pandas as pd
import numpy as np
from dask import dataframe as dd
from datetime import datetime, timedelta
from collections import defaultdict



In [2]:
# stocks in play are defined as follows:
# 1. daily gap of X% (eg +/- 0.01%)- defined  close at 9:15am vs. high/low of previous 3 days  
# 2. premarket gap of X% (eg +/- 0.01%)- defined close at 9:15am vs. close of previous day
# 3. premarket cumulative vol from 4am to 9:15am 
# 3. RVOL of X (eg > 1.5) - defined as cumulative volume at 9:34am (9:30-9:34 1 min vol) on specific day 
# vs. 14 day 5 min average volume 

# store stocks in play as a list in a dataframe with the date as the index
# could also store the key data so can regenerate the list based upon other parameters????

In [3]:
# define source data paths 
#stock_1m_file_path = "G:/My Drive/Backtesting/FirstRateData/Stocks/new stocks1min-parquet/"
#stock_1d_file_path = "G:/My Drive/Backtesting/FirstRateData/Stocks/stocks1day-csv/"
#ticker_list = pd.read_csv("12Jan2025_50Mcap_3Mavgvol_50cATR_10price_tickers_only.csv")['Ticker'].tolist()



 

In [9]:
# stocks in play inital variables

#first_day_partition = 3500
first_day_partition = 30
end_premarket_time = "09:00" # right now using 2 candles before open because of variable premarket times 
daily_range_gap_threshold = 0.01 # end premarket time vs. high/low of previous 3 days
premarket_gap_threshold = 0.02 # endpremarket close vs. previous day close 
premarket_volume_threshold = 100000
RVOL_threshold = 1.4
premarket_candles_min = 5

#stock_partition_day = pd.DataFrame(columns=['Date', 'vol_5min', 'day_high', 'day_low', \
#                                            'premarket_close', 'premarket_open', \
#                                            'premarket_volume' ]) # dataframe with all the stock data



time_930 = pd.to_datetime("09:30", format="%H:%M").time()
time_934 = pd.to_datetime("09:34", format="%H:%M").time()
time_1559 = pd.to_datetime("15:59", format="%H:%M").time()

In [None]:
stocks_in_play = defaultdict(list) # keys will be datetime and the values will be a list of stocks in play
stock_data = defaultdict(list) # keys will be the stock symbol and the values will be a dataframe with stock in play data on each day 

ticker_list = ["PLTR"]
#ticker = 'PLTR'


for ticker in ticker_list:

    directory_path = f"G:/My Drive/Backtesting/FirstRateData/Stocks/filtered/filtered-parquet/{ticker}_1min_parquet/"


    #parquet_path = stock_1m_file_path + ticker + "_1min_parquet"
    #csv_path = stock_1d_file_path + ticker + "_full_1day_UNADJUSTED.csv"

    df_1min = dd.read_parquet(directory_path)
    

    df_1min_part = df_1min.partitions[first_day_partition:]
    num_partitions = df_1min_part.npartitions

    #df_1day = pd.read_csv(csv_path)
    #df_1day['Datetime'] = pd.to_datetime(df_1day['Datetime'], format='mixed')
    #df_1day.set_index('Datetime', inplace=True)

    
    # obtain stock of interest and first day of interest for first_day_partition
    #df_first_day = df_1min.partitions[first_day_partition].compute()     
    #df_first_day['Datetime'] = pd.to_datetime(df_first_day['Datetime'])
    #df_first_day.set_index('Datetime', inplace=True)
    
    #first_date_of_interest = df_first_day.Date.iloc[0]
    #stock_of_interest = df_first_day.Ticker.iloc[0]



#print(df_current_day.head(15))
    # need moving average for RVOL calculation
    # store vol5min and all data for each day and then cycle through each day to get variables and determine SiP
    for partition in range(first_day_partition, num_partitions):
        #print(f"Processing partition {date_of_interest} of {num_partitions}")
        df_current_date = df_1min.partitions[partition].compute()   
        #print(df_current_date.head(5))  
        df_current_date['Datetime'] = pd.to_datetime(df_current_date['Datetime'])
        df_current_date.set_index('Datetime', inplace=True)
        current_date_of_interest = df_current_date.Date.iloc[0]
        #print(f"Processing date: {current_date_of_interest}")

       
        candles = df_current_date[
            (df_current_date.index.time >= time_930) &
            (df_current_date.index.time <= time_934)
        ]['Volume']
        print('candles 5 min', candles)
        vol_5min = candles.sum()
        


        # obtain the premarket close for the current day define at time above (use 2nd to last candle because variable premarket)
        premarket_candles = df_current_date[df_current_date.index.time < time_930]
        print('premarket_candles', premarket_candles)
        
        
        if len(premarket_candles) < premarket_candles_min:
            print(f"Error: No premarket data for {current_date_of_interest}")
            continue    
        premarket_volume = premarket_candles['Volume'].sum()
        premarket_close = premarket_candles['Close'].iloc[-2]
        premarket_open = premarket_candles['Open'].iloc[0]

        # obtain the high and low of entire day 
        market_open_candles = df_current_date[(df_current_date.index.time >= time_930) & (df_current_date.index.time <= time_1559)]
        day_high = market_open_candles['High'].max()
        day_low = market_open_candles['Low'].min()
           

        #df_previous1day = df_1day[:current_date_of_interest].iloc[-2:-1]

        #previous1day_close = df_previous1day['Close'].iloc[0]


        #premarket_gap = round((premarket_close_current_date - previous1day_close)/previous1day_close,4)


        #df_3day = df_1day[:current_date_of_interest].iloc[-4:-1]
        #previous_days_high = df_3day['High'].max()
        #previous_days_low = df_3day['Low'].min()
    
        if ticker not in stock_data:
            stock_data[ticker] = [{'Date':current_date_of_interest, 'vol_5min': vol_5min, \
                                                'day_high': day_high, 'day_low': day_low, 'premarket_close': premarket_close, \
                                                'premarket_open': premarket_open, 'premarket_volume': premarket_volume}]
        else:
            stock_data[ticker].append({'Date':current_date_of_interest, 'vol_5min': vol_5min, \
                                                'day_high': day_high, 'day_low': day_low, 'premarket_close': premarket_close, \
                                                'premarket_open': premarket_open, 'premarket_volume': premarket_volume})

        stock_data[ticker] = pd.DataFrame(stock_data[ticker])

        # calculate 14 day rolling RVOL
        stock_data[ticker]['RVOL_14days'] = stock_data[ticker]['vol_5min'].rolling(window=14).mean().fillna(0)
        # calculate RVOL ratio for the current day
        stock_data[ticker]['RVOL_ratio'] = round(stock_data[ticker]['vol_5min']/stock_data[ticker]['RVOL_14days'],2)
        
        # calculate daily gap for the current day
        stock_data[ticker]['daily_3day_max'] = stock_data[ticker]['day_high'].rolling(window=3, closed = 'left').max().fillna(0)
        stock_data[ticker]['daily_3day_min'] = stock_data[ticker]['day_low'].rolling(window=3, closed = 'left').min().fillna(0)
        
        stock_data[ticker]['daily_in_outmax'] = stock_data[ticker]['premarket_close'] > stock_data[ticker]['daily_3day_max']
        stock_data[ticker]['daily_in_outmin'] = stock_data[ticker]['premarket_close'] < stock_data[ticker]['daily_3day_min']
        
        stock_data[ticker]['daily_range_gap'] = np.where(stock_data[ticker][daily_in_outround((stock_data[ticker]['premarket_close'] - stock_data[ticker]['daily_3day_max'])/stock_data[ticker]['daily_3day_max'],4) if stock_data[ticker]['daily_in_out_max'] == 'out'
        
        if (stock_data[ticker]['premarket_close'] > stock_data[ticker]['daily_3day_max'])
        stock_data[ticker]['daily_in_out'] = 'outmax' 
        
        
        
        
        elif stock_data[ticker]['daily_in_out_min'] == 'out':
        stock_data[ticker]['daily_range_gap'] = round((stock_data[ticker]['premarket_close'] - stock_data[ticker]['daily_3day_min'])/stock_data[ticker]['daily_3day_min'],4)
        else:
            stock_data[ticker]['daily_range_gap'] = 0

        if stock_data[ticker]['daily_range_gap'] > daily_range_gap_threshold:
            stock_data[ticker]['daily_range_status'] = True
        else:
            stock_data[ticker]['daily_range_status'] = False

        stock_data[ticker]['premarket_gap'] = round((stock_data[ticker]['premarket_close'] - stock_data[ticker]['premarket_open'])/stock_data[ticker]['premarket_open'],4)
        if stock_data[ticker]['premarket_gap'] > premarket_gap_threshold:
            stock_data[ticker]['premarket_gap_status'] = True
        else:
            stock_data[ticker]['premarket_gap_status'] = False

        if stock_data[ticker]['premarket_volume'] > premarket_volume_threshold:
            stock_data[ticker]['premarket_volume_status'] = True
        else:
            stock_data[ticker]['premarket_volume_status'] = False
        
        '''
        if premarket_close_current_date < previous_days_low:
            
            daily_range_gap = round((premarket_close_current_date - previous_days_low)/previous_days_low,4)
        elif premarket_close_current_date > previous_days_high:
            
            daily_range_gap = round((premarket_close_current_date - previous_days_high)/previous_days_high,4)  
        else:
            daily_range_gap = 0  

        '''
        #print(f"Daily range gap: {daily_range_gap}")

        # calculate the RVOL for the current day
        # calculate RVOL for the 14 days prior to the current day  

        
        
        '''
        # work on this part and print dates for stocks in play over the period of days 
        # check if the stock meets the criteria for being in play
        if abs(daily_range_gap) > daily_range_gap_threshold and abs(premarket_gap) > premarket_gap_threshold and RVOL_ratio > RVOL_threshold:
            #print(f"Stock {stock_of_interest} is in play on {current_date_of_interest}")
            in_play = True
        else:
            #print(f"Stock {stock_of_interest} is not in play on {current_date_of_interest}")
            in_play = False

        

        if current_date_of_interest not in stocks_in_play:
            stocks_in_play[current_date_of_interest] = [stock_of_interest] if in_play else []


        elif stocks_in_play[current_date_of_interest] != [] and in_play:
            stocks_in_play[current_date_of_interest].append(stock_of_interest)
        
        '''
        
'''
    #stock_data[ticker] = pd.DataFrame(stock_data[ticker])

stocks_in_play_df = pd.DataFrame(stocks_in_play.items(), columns=['Date', 'Stocks in play'])
#stock_data = pd.DataFrame(stock_data.items(), columns=['Stock', 'Date', 'daily_range_gap', 'premarket_gap', 'RVOL_ratio', \
#                                             'previous_days_high', 'previous_days_low', 'premarket_close_current_date', \
#                                            'previous_1day_close', 'RVOL_14days', 'vol_current_day_partition_volume' ])
stocks_in_play_df.to_csv("stocks_in_play.csv")

#stock_data.to_csv("stock_data.csv")
 '''   

candles 5 min Datetime
2020-12-14 09:30:00    852657
2020-12-14 09:31:00    755869
2020-12-14 09:32:00    471139
2020-12-14 09:33:00    582521
2020-12-14 09:34:00    454692
Name: Volume, dtype: int64
premarket_candles                           Date   Open   High    Low  Close  Volume Ticker
Datetime                                                                 
2020-12-14 04:00:00 2020-12-14  27.48  27.60  27.35  27.60    1030   PLTR
2020-12-14 04:01:00 2020-12-14  27.60  27.61  27.31  27.31    1000   PLTR
2020-12-14 04:02:00 2020-12-14  27.60  27.76  27.50  27.74    1102   PLTR
2020-12-14 04:03:00 2020-12-14  27.74  27.74  27.74  27.74    2226   PLTR
2020-12-14 04:04:00 2020-12-14  27.80  27.80  27.67  27.75    1015   PLTR
...                        ...    ...    ...    ...    ...     ...    ...
2020-12-14 09:25:00 2020-12-14  28.22  28.23  28.21  28.21    9740   PLTR
2020-12-14 09:26:00 2020-12-14  28.21  28.23  28.20  28.22   10596   PLTR
2020-12-14 09:27:00 2020-12-14  28.22  28.

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [7]:
test = pd.DataFrame({'B': [0, 2, 2, 5, 4]})
test.rolling(window=3, closed = 'left').mean()

Unnamed: 0,B
0,
1,
2,
3,1.333333
4,3.0
