In [1]:
# samember_flash_gap\Scripts\activate

#!pip install alpaca
#!pip install alpaca-py
#!pip install alpaca-trade-api

from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

from alpaca.trading.client import TradingClient
from alpaca.trading.requests import GetAssetsRequest
from alpaca.trading.enums import AssetExchange, AssetStatus, AssetClass

from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest
from alpaca.data.enums import Adjustment

import pandas as pd

from datetime import datetime, timedelta, time as dt_time
import pytz
ny = pytz.timezone('America/New_York')

from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

import os

if os.name == 'nt':
    path = 'C:/Users/pcuser/Documents/GitHub/C9-Research/ftg/code/screener_v2/credentials/apca_live.txt'
if os.name == 'posix':
    path = '/home/s/Downloads/apca_key.txt'
    
# # path to live creds 
# 'ftg/code/screener_v2/credentials/apca_live.txt'

key, sec = open(path, 'r').read().split('\n')

trading_client = TradingClient(key, sec)
stock_client = StockHistoricalDataClient(key, sec)

print("Part 1 done!")

#############################################################

trading_client = TradingClient(key, sec, raw_data=True, paper=False)
equity_assets = trading_client.get_all_assets(
    GetAssetsRequest(
        # status=AssetStatus.ACTIVE,
        asset_class=AssetClass.US_EQUITY,
    )
)
equities_df = pd.DataFrame([asset for asset in equity_assets])
print(len(equities_df))

#############################################################

equities_df['len'] = equities_df['symbol'].apply(lambda x: len(x))
# filter warrants
equities_df.loc[
    equities_df['name'].str.contains(' Warrant'),
    'warrant'
] = 1
equities_df['warrant'] = equities_df['warrant'].fillna(0)
# filter acquisition units
equities_df.loc[
    (equities_df['name'].str.contains(' Acquisition'))
    & ((equities_df['name'].str.contains(' Unit'))),
    'acq_unit'
] = 1
equities_df['acq_unit'] = equities_df['acq_unit'].fillna(0)

#############################################################

actives = equities_df.loc[
    (~equities_df['symbol'].str.contains('[0-9]{1,}'))
    & (equities_df['exchange'] != 'OTC')
    & (equities_df['warrant'] == 0)
    & (equities_df['acq_unit'] == 0)
    & ~(equities_df['symbol'].str.contains(r'\.'))
    & (equities_df['len'] <= 4),
    'symbol'
].tolist()

print("Actives done!")


#############################################################

actives = sorted(set(actives))
print(len(actives))

print ("Length of actives is said above!")

#############################################################

def get_intra(symbols, start, end):

    tf = TimeFrame(1, TimeFrameUnit.Minute)
    request_params = StockBarsRequest(
        symbol_or_symbols=symbols,
        timeframe=tf,
        start=start,
        end=end,
        adjustment=Adjustment.SPLIT,
    )
    stock_client = StockHistoricalDataClient(key, sec)
    bars = stock_client.get_stock_bars(request_params)
    bars = bars.df.tz_convert('America/New_York', axis=0, level=1)
    
    return bars

print ("get_intra function understood")

#############################################################

def get_all_intra(tickers, start, end, window=10):
    data = pd.DataFrame()
    for _ in tqdm(range(int(len(tickers) / window) + 1)):
        symbols = list(tickers[_*window:(_+1)*window])
        try:
            df = get_intra(
                symbols=symbols,
                start=start,
                end=end
            )
            data = pd.concat([data, df], axis=0)
            df.to_parquet(
                f'd:/equities_latest/intraday/{symbols[0]}-{symbols[-1]}_{start.date()}-{end.date()}.pq'
            )
        except Exception as e:
            print(f"Error fetching data for symbols {symbols}: {e}")
            # Optionally, log the error to a file for further analysis
            with open("error_log.txt", "a") as log_file:
                log_file.write(f"Error fetching data for symbols {symbols}: {e}\n")
    return data

print("get_all_intra function with error handling understood")



print ("get_all_intra function understood")

#############################################################
def get_all_intra_in_batches(tickers, start, end, window=10, batch_size=100):
    num_batches = len(tickers) // batch_size + (1 if len(tickers) % batch_size > 0 else 0)
    
    for batch_idx in tqdm(range(num_batches)):
        batch_start = batch_idx * batch_size
        batch_end = min((batch_idx + 1) * batch_size, len(tickers))
        batch_tickers = tickers[batch_start:batch_end]
        
        data = pd.DataFrame()
        for _ in tqdm(range(int(len(batch_tickers) / window) + 1)):
            symbols = list(batch_tickers[_*window:(_+1)*window])
            df = get_intra(
                symbols=symbols,
                start=start,
                end=end
            )
            data = pd.concat([data, df], axis=0)
            df.to_parquet(
                f'd:/equities_data_2024/intraday/{symbols[0]}-{symbols[-1]}_{start.date()}-{end.date()}.pq'
            )
        
        yield data

print ("get_all_intra_in batches function understood")
#############################################################
#############################################################
#############################################################

Part 1 done!
31523
Actives done!
11129
Length of actives is said above!
get_intra function understood
get_all_intra function with error handling understood
get_all_intra function understood
get_all_intra_in batches function understood


In [2]:
#############################################################
#############################################################
#############################################################

#get_all_intra function used below: 
#TO USE: 
####MODIFY "start" and change pd.offsets.BDay(2)) the number after BDay
####MODIFY tickers and change tickers = actives[0:1] to the list you want to siphon

start = (datetime.today() - pd.offsets.BDay(100))
start = datetime.combine(
    start,
    dt_time(0, 0, tzinfo=ny),
)
end = datetime.combine(
    datetime.now(tz=ny).date(),
    dt_time(9, 30, tzinfo=ny)
)

tickers = actives[2946:11130]
window = 2

intraday_data = get_all_intra(tickers, start, end, window)

# Check if the function executed successfully
if intraday_data is not None:
    print("Intraday data retrieval and saving completed successfully.")
else:
    print("There was an issue with intraday data retrieval and saving.")

#############################################################
#############################################################
#############################################################



  0%|          | 0/4092 [00:00<?, ?it/s]

Error fetching data for symbols ['EIO', 'EIP']: The level 1 is not valid
Error fetching data for symbols ['EMBH', 'EMBU']: The level 1 is not valid
Error fetching data for symbols ['ENBL', 'ENFC']: The level 1 is not valid
Error fetching data for symbols ['EQD', 'EQGP']: The level 1 is not valid
Error fetching data for symbols ['EQLT', 'EQM']: The level 1 is not valid
Error fetching data for symbols ['ESND', 'ESNG']: The level 1 is not valid
Error fetching data for symbols ['EUMF', 'EUMV']: The level 1 is not valid
Error fetching data for symbols ['FCRZ', 'FCSC']: The level 1 is not valid
Error fetching data for symbols ['FXS', 'FXSG']: The level 1 is not valid
Error fetching data for symbols ['GARD', 'GARS']: The level 1 is not valid
Error fetching data for symbols ['GBLO', 'GBNK']: The level 1 is not valid
Error fetching data for symbols ['GFY', 'GG']: The level 1 is not valid
Error fetching data for symbols ['GHII', 'GHIV']: The level 1 is not valid
Error fetching data for symbols [

In [None]:
import pandas as pd

# Read a Parquet file into a pandas DataFrame
df = pd.read_parquet('D:\equities_data_2024\intraday\AABA-AACG_2022-05-09-2024-04-08.pq')

# Display the DataFrame
print(df)