In [1]:
%load_ext autoreload
%autoreload 2
import sys, os
from os.path import expanduser
## actions required!!!!!!!!!!!!!!!!!!!! change your folder path 
path = "~/Documents/G3/MA-prediction"
path = expanduser(path)
sys.path.append(path)

import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
from MA_prediction.utils import *
from MA_prediction.mkt_calendar import *
from MA_prediction.preprocessing import *
from MA_prediction.crsp import *

In [3]:
import wrds
db = wrds.Connection()

Enter your WRDS username [yizhan]: olivershu
Enter your password: ········


WRDS recommends setting up a .pgpass file.


Create .pgpass file now [y/n]?:  y


Created .pgpass file successfully.
Loading library list...
Done


# data-3: CRSP, pull delist and market data

In this notebook we pull the delisting information and market data from CRSP. 


- Look for delisting information (delisting code, last trade date, delisting date and delisting returns) in CRSP database, for all the targets.
- pull raw market data for all the targets and acquirors, from 40 (trading) days prior to `dao`, to 40 days after `dr`.


## I/O

- Input: 
    - `data/intermediate/df_basic_processing.h5`
    - `data/intermediate/CRSP/apermno.h5`, `data/intermediate/CRSP/tpermno.h5`.

- Output:

    - `data/intermediate/CRSP/delist.h5`
    - `data/mkt-data/mkt_data_tgt_raw.pickle`, `data/mkt-data/mkt_data_acq_raw.pickle`

## Load data

In [4]:
# read hdf file
filepath = f"{path}/data/intermediate/df_basic_processing.h5"
df = pd.read_hdf(filepath)
print_shape(df)

# load permno results
tpermno, apermno = load_permno_data(path)
# merge permno information with df, df won't be saved
loc_names = ['atic', 'atic', 'acu', 'an']
insert_cols(df, loc_names, None, apermno)

loc_names = ['ttic', 'ttic', 'tcu', 'tn'] 
insert_cols(df, loc_names, None, tpermno)

The dataset is of size (10446, 95).


# Get target delisting information

In [5]:
# columns = ['delist_code', 'last_trade_date', 'delist_date', 'delist_amount', 'delist_return']
delist_CRSP = apply_func_to_ser_df(df.tpermno, get_delisting_information, return_as_df=True, db=db)
complete = df.statc.eq('C')
print(f"{(complete & delist_CRSP.delist_return.isnull()).sum()} out of {complete.sum()} completed deals cannot find delisting returns.")

100%|███████████████████████████████████████| 9614/9614 [02:14<00:00, 71.34it/s]


652 out of 8097 completed deals cannot find delisting returns.


In [6]:
## Save intermediate results
pathfile = f"{path}/data/intermediate/CRSP/delist.h5"
delist_CRSP.to_hdf(pathfile, key = 'delist', mode='w')

# pull raw market data

In [7]:
# concat into the dataset
loc_names = ['att'] * 5
insert_cols(df, loc_names, None, delist_CRSP)

# update delist date before pulling market data
idx = df.statc.eq('C') & df.delist_code.between(200, 300, inclusive="left") & df.delist_return.notna() & abs(get_num_trading_days_between(df.de, df.delist_date)).le(40)
df.dr.loc[idx] = df.delist_date.loc[idx]

In [8]:
df['dao_40days_prior'] = get_trading_day_offset(df.dao, -40)
last_date = max(df.da.max(), df.dr.dropna().max())
df['dr_40days_after'] = get_trading_day_offset(df.dr.fillna(last_date), 40)

In [9]:
def get_stock_market_data_daily_from_ser_CRSP(ser, db=db):
    """
    get daily market data from a series of (permno, start_date, end_date)
    """
    return get_stock_market_data_daily_CRSP(ser.iloc[0], start_date=ser.iloc[1], end_date=ser.iloc[2], db=db)

In [10]:
mkt_data_tgt = apply_func_to_ser_df(df[['tpermno', 'dao_40days_prior', 'dr_40days_after']], 
                                    get_stock_market_data_daily_from_ser_CRSP, 
                                    return_as_df=False, 
                                    db=db)
print(f"{mkt_data_tgt.isna().sum()} out of {len(df)} deals cannot find target market data.")

100%|█████████████████████████████████████| 10446/10446 [03:49<00:00, 45.52it/s]


832 out of 10446 deals cannot find target market data.


In [11]:
mkt_data_acq = apply_func_to_ser_df(df[['apermno', 'dao_40days_prior', 'dr_40days_after']],
                                       get_stock_market_data_daily_from_ser_CRSP,
                                       return_as_df=False,
                                       db=db)
print(f"{mkt_data_acq.isna().sum()} out of {len(df)} deals cannot find acquiror market data.")

100%|█████████████████████████████████████| 10446/10446 [02:12<00:00, 78.57it/s]


4991 out of 10446 deals cannot find acquiror market data.


## save raw market data

In [12]:
import pickle

filepath_tgt = f"{path}/data/mkt-data/mkt_data_tgt_raw.pickle"
with open(filepath_tgt, 'wb') as handle:
    pickle.dump(mkt_data_tgt, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    
filepath_acq = f"{path}/data/mkt-data/mkt_data_acq_raw.pickle"
with open(filepath_acq, 'wb') as handle:
    pickle.dump(mkt_data_acq, handle, protocol=pickle.HIGHEST_PROTOCOL)