In [3]:
import csv
# %pip install pandas_market_calendars
import pandas_market_calendars as mcal
from datetime import datetime
from collections import defaultdict
import pickle



# given a string date in the form "2023-02-14"
# given a string time in the form "13:05:00"
# returns if the date and time is during market open (9:30 to 4:00)
nyse = mcal.get_calendar('XNYS')
def is_market_open(date_str, time_str):
  a = time_str.split(":")
  if len(a) != 3:
    return False
  
  # get hr and min
  try:
    hr = int(a[0])
    min = int(a[1])
  except ValueError:
    return False
  
  # keep only (10:00 to 15:30)
  if hr <= 9 or hr >= 16:
    return False
  elif hr == 15 and min >= 30:
    return False
  
  # check that the market is open on this day
  date = datetime.strptime(date_str, "%Y-%m-%d")
  is_trading_day = nyse.valid_days(start_date=date, end_date=date).size > 0
  return is_trading_day



### load the market_anns_map from using pickle
def load_market_anns_map(market_anns_file_name):
  with open(market_anns_file_name, "rb") as file:
    wrds_market_anns_map = pickle.load(file)
  return wrds_market_anns_map
market_anns_map_file_name = "./market_anns_storage/market_anns_map.pkl"
wrds_market_anns_map = load_market_anns_map(market_anns_map_file_name) 



### uses all_announcements.csv to make:
# all_anns: list of lists holding the ticker, anndat, and anntim
# market_anns: list of list holding the ticker, anndat, and anntim for annoucements during market hours 
all_anns = []
market_anns = []
with open("./raw_data/all_announcements.csv") as file:
  # TICKER,OFTIC,MEASURE,PDICITY,ANNDATS,ANNTIMS
  csv_reader = csv.reader(file)
  
  for i,row in enumerate(csv_reader):
    if i == 0:
      continue

    _, ticker, _, _, anndat, anntim = row
    all_anns += [[ticker, anndat, anntim]]  
    if is_market_open(anndat, anntim) and len(ticker) > 0:
      market_anns += [[ticker, anndat, anntim]]  

# output the date range of the announcements
earliest_date = market_anns[0][1]
latest_date = market_anns[0][1]
for _, anndat, _ in market_anns:
  earliest_date = min(earliest_date, anndat)
  latest_date = max(latest_date, anndat)

In [8]:
print(f"range of dates that have annoucements during market hours: {earliest_date} to {latest_date}") 
print(f"total number of annoucements: {len(all_anns)}")
print(f"total number of annoucements during market hours: {len(market_anns)}")
print(f"percent of annoucements that are during market hours: {len(market_anns)/len(all_anns)*100}%")

range of dates that have annoucements during market hours: 2005-01-03 to 2023-09-14
total number of annoucements: 442234
total number of annoucements during market hours: 35394
percent of annoucements that are during market hours: 8.003455184359412%
