# WRDS Code

1. request data from TAQ on wrds for each year
  - select the date from the start to end of the year 
  - choose a time range from 9:30 to 16:00
  - select the tickers_{year}.txt file for the list of relevant tickers
  - select the query variables 'DATE', 'TIME_M', 'SYM_ROOT', 'SYM_SUFFIX', 'SIZE', and 'PRICE'
  - ensure output format is "csv" with zip compression and "YYYY-MM-DD" date format

2. Upload the market_anns_map.pkl file to the wrds cluster to locate annoucement times

In [2]:
from datetime import datetime, timedelta
import pickle
import zipfile
import pandas as pd
from io import StringIO



### check if string times are within 30 minutes of each other
# time1 is to the millisecond as in taq data, time2 is to the second as in annoucement time
def is_within_30_minutes(time1, time2):
  # parse the time strings into datetime objects
  t1 = datetime.strptime(time1[:-3], "%H:%M:%S.%f").time() # remove nanosecond precision
  t2 = datetime.strptime(time2, "%H:%M:%S").time()

  # calculate the absolute difference between the two times
  time_difference = timedelta(hours=t1.hour, minutes=t1.minute, seconds=t1.second, microseconds=t1.microsecond) - \
                    timedelta(hours=t2.hour, minutes=t2.minute, seconds=t2.second, microseconds=t2.microsecond)

  # check if the absolute difference is less than or equal to 30 minutes
  return abs(time_difference) <= timedelta(minutes=30)



### load the market_anns_map from using pickle
def load_market_anns_map(market_anns_file_name):
  with open(market_anns_file_name, "rb") as file:
    wrds_market_anns_map = pickle.load(file)
  return wrds_market_anns_map



### produce a csv file with trades near announcements using zip_file_path and wrds_market_anns_map and saving to csv_file_path
def create_annoucement_trades_csv(zip_file_path, csv_file_path, wrds_market_anns_map, year):
  # define a custom function to check if a trade is within 30 minutes of an announcement
  def trade_near_annoucement(row):
    date = row['DATE']
    time = row['TIME_M']
    ticker = row['SYM_ROOT']
    
    if ticker in wrds_market_anns_map[year]:
        if date in  wrds_market_anns_map[year][ticker]:
            if is_within_30_minutes(time, wrds_market_anns_map[year][ticker][date]):
                return True
    return False

  # pipeline data from zip to csv, reducing the amount of data
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    file_list = zip_ref.namelist()
    assert(len(file_list)==1)

    with zip_ref.open(file_list[0]) as input_file:

      # create column names from the first row
      first_chunk = pd.read_csv(input_file, nrows=1)
      column_names = first_chunk.columns

      with open(csv_file_path, 'w') as output_file:

        while True:
          # loop while there is a chunk
          chunk_size = 10000
          chunk = input_file.read(chunk_size)
          if not chunk:
              break
          
          df_chunk = pd.read_csv(StringIO(chunk.decode('utf-8')), names=column_names)
          df_chunk = df_chunk[df_chunk.apply(trade_near_annoucement, axis=1)]

          df_chunk.to_csv(output_file, mode='a', header=not output_file.tell(), index=False)



### for a given zip file of trades and year, produce a csv file with trades near announcements in that year
def zip_to_annoucement_trades_csv(zip_csv_file_name, year):
  market_anns_map_file_name = "./market_anns_map.pkl"
  annoucement_trades_csv_file_name = f"./announcement_trades_csv/trades_{year}.csv"

  print(f"loading the market annoucement times from {market_anns_map_file_name}")
  wrds_market_anns_map = load_market_anns_map(market_anns_map_file_name)  

  print(f"creating csv for trades near annoucements at {annoucement_trades_csv_file_name}")
  create_annoucement_trades_csv(zip_csv_file_name, annoucement_trades_csv_file_name, wrds_market_anns_map, year)

def main():
  # get annoucement_trades for each year
  compute_list = [
    ["./web_query_output/yjqi3btakxz4z03q_csv.zip", 2022]
    # ["./web_query_output/wgqbo7lgze1c6e0q_csv.zip", 2021]
  ]

  for zip_csv_file_name, year in compute_list:
    zip_to_annoucement_trades_csv(zip_csv_file_name, year)

if __name__ == "__main__":
  main()

{2014: {'TLMR': {'2014-05-06': '10:45:00'}, 'CLDN': {'2014-05-14': '13:02:00'}, 'MDNT': {'2014-08-18': '10:41:00', '2014-11-19': '15:18:00'}, 'TRIV': {'2014-08-06': '15:11:00'}, 'LPG': {'2014-09-03': '10:41:00'}, 'OEC': {'2014-11-13': '14:38:00'}, 'AIRT': {'2014-06-02': '13:00:00'}, 'APT': {'2014-05-06': '13:00:00'}, 'BMRA': {'2014-01-14': '14:57:00', '2014-04-14': '12:00:00', '2014-08-29': '12:00:00', '2014-10-15': '12:00:00'}, 'FRD': {'2014-06-11': '12:00:00', '2014-08-13': '13:00:00'}, 'DSCI': {'2014-03-13': '12:00:00'}, 'BYBK': {'2014-11-05': '10:10:00'}, 'CECE': {'2014-05-08': '14:07:00'}, 'APGI': {'2014-02-13': '11:20:00'}, 'DBLE': {'2014-03-12': '14:00:00'}, 'EPL': {'2014-02-27': '11:00:00'}, 'DXPE': {'2014-05-12': '12:00:00'}, 'CAC': {'2014-04-29': '13:00:00'}, 'ABCO': {'2014-07-31': '12:00:00'}, 'AAON': {'2014-08-07': '13:00:00'}, 'SSREY': {'2014-08-06': '13:14:00'}, 'ABGOF': {'2014-11-12': '11:00:00'}, 'ABHID': {'2014-03-17': '13:00:00', '2014-06-16': '11:22:00'}, 'AMBT': {'2