In [1]:
%load_ext autoreload
%autoreload 2
import sys, os
from os.path import expanduser
## actions required!!!!!!!!!!!!!!!!!!!! change your folder path 
path = "~/Documents/G3/MA-prediction"
path = expanduser(path)
sys.path.append(path)

import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
from MA_prediction.utils import *
from MA_prediction.preprocessing import *

# Data cleaning 3: Process market data

In this notebook we process the market data pulled from CRSP. We have already done the following when pulling the raw data:

- Replace the all `None`-type price and return into all `np.nan`.
- take absolute value of negative prices.

We will address the following:

- For completed deals, add the delisting return.
- Fill misssing price / return. It happens when the trading of a certain stock is not allowed during some days.
    - fill missing price by previous prices (considering adjust factors) and fill missing return from the filled prices.
- calculate mktcap.
- Adjust raw price by the cumulative factor, based on the factor on the announcement day.

    
    
## I/O

- Input:
    - `df_permno_delist_CRSP.h5`
    - `market_data_tgt_raw.pickle`
    - `market_data_acq_raw.pickle`
- Output:
    - `market_data_tgt_processed.pickle`
    - `market_data_acq_processed.pickle`

## load data

In [3]:
filepath = f"{path}/data/intermediate/df_permno_delist_CRSP.h5"
df = pd.read_hdf(filepath)

mkt_data_tgt_raw, mkt_data_acq_raw = load_mkt_data(path, "raw")

# Add delisting return for completed deals

In [4]:
mkt_data_tgt_processed = add_delisting_prc_ret(mkt_data_tgt_raw, df)

100%|█████████████████████████████████████| 7255/7255 [00:04<00:00, 1640.72it/s]


# fill missing price/return

In [5]:
mkt_data_tgt_processed = mkt_data_tgt_processed.map(fill_na_prc_ret, na_action="ignore")
mkt_data_acq_processed = mkt_data_acq_raw.map(fill_na_prc_ret, na_action="ignore")

# calculate mktcap

In [6]:
mkt_data_tgt_processed = mkt_data_tgt_processed.map(calculate_mktcap, na_action="ignore")
mkt_data_acq_processed = mkt_data_acq_processed.map(calculate_mktcap, na_action="ignore")

# adjust price by factors

In [7]:
mkt_data_tgt_processed = adjust_price_for_mkt_data_ser(mkt_data_tgt_processed, df.da)
mkt_data_acq_processed = adjust_price_for_mkt_data_ser(mkt_data_acq_processed, df.da)

100%|█████████████████████████████████████| 9612/9612 [00:02<00:00, 3537.32it/s]
100%|█████████████████████████████████████| 5454/5454 [00:01<00:00, 3316.42it/s]


In [8]:
import pickle

filepath_tgt = f"{path}/data/raw/mkt_data_tgt_processed.pickle"
with open(filepath_tgt, 'wb') as handle:
    pickle.dump(mkt_data_tgt_processed, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    
filepath_acq = f"{path}/data/raw/mkt_data_acq_processed.pickle"
with open(filepath_acq, 'wb') as handle:
    pickle.dump(mkt_data_acq_processed, handle, protocol=pickle.HIGHEST_PROTOCOL)