In [6]:
import csv
# %pip install pandas_market_calendars
import pandas_market_calendars as mcal
from datetime import datetime
from collections import defaultdict
import pickle



# given a string date in the form "2023-02-14"
# given a string time in the form "13:05:00"
# returns if the date and time is during market open (9:30 to 4:00)
nyse = mcal.get_calendar('XNYS')
def is_market_open(date_str, time_str):
  a = time_str.split(":")
  if len(a) != 3:
    return False
  
  # get hr and min
  try:
    hr = int(a[0])
    min = int(a[1])
  except ValueError:
    return False
  
  # keep only (10:00 to 15:30)
  if hr <= 9 or hr >= 16:
    return False
  elif hr == 15 and min >= 30:
    return False
  
  # check that the market is open on this day
  date = datetime.strptime(date_str, "%Y-%m-%d")
  is_trading_day = nyse.valid_days(start_date=date, end_date=date).size > 0
  return is_trading_day



### uses all_announcements.csv to make:
# market_anns: list of list holding the ticker, anndat, and anntim for annoucements during market hours 
market_anns = []
with open("./raw_data/all_announcements.csv") as file:
  # TICKER,OFTIC,MEASURE,PDICITY,ANNDATS,ANNTIMS
  csv_reader = csv.reader(file)
  
  for i,row in enumerate(csv_reader):
    if i == 0:
      continue

    _, ticker, _, _, anndat, anntim = row
    if is_market_open(anndat, anntim) and len(ticker) > 0:
      market_anns += [[ticker, anndat, anntim]]  



### make a file for each year containing the tickers for that year
date_to_tickers = defaultdict(lambda: set())
for ticker, anndat, _ in market_anns:
  annyear = int(anndat[:4]) if len(anndat) > 4 else -1
  date_to_tickers[annyear].add(ticker)

for year, tickers in date_to_tickers.items():
  with open(f"ticker_queries/tickers_{year}.txt", 'w') as file:
    for ticker in tickers:
      file.write(f"{ticker}\n")



### use market_anns to create a dictionary for each year, ticker, and annoucement date
### save market_anns with pickle to ./market_anns_storage
market_anns_map = {}
for ticker, anndat, anntim in market_anns:
  annyear = int(anndat[:4]) if len(anndat) > 4 else -1
  if annyear not in market_anns_map:
    market_anns_map[annyear] = {}
  
  if ticker not in market_anns_map[annyear]:
    market_anns_map[annyear][ticker] = {}
    
  market_anns_map[annyear][ticker][anndat] = anntim

# save with pickle
with open("./market_anns_storage/market_anns_map.pkl", "wb") as file:
  pickle.dump(market_anns_map, file)


