In [1]:
%load_ext autoreload
%autoreload 2
import sys, os
from os.path import expanduser
## actions required!!!!!!!!!!!!!!!!!!!! change your folder path 
path = "~/Documents/G3/MA-prediction"
path = expanduser(path)
sys.path.append(path)

import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
from MA_prediction.utils import *
from MA_prediction.crsp import *
from MA_prediction.mkt_calendar import *

# Data Filters
Previously we deleted the following deals:

- database errors.
- deal price is missing (mostly debt restructuring).
- deal consideration is missing.


Now we apply the following filters:


- able to match a `permno` for the tgt.
- for completed deals
    - delisting reason from CRSP should be due to MA.
    - delisting return is not missing (no complex payment that are hard to price).
    - delisting date from CRSP and effective date from SDC are not too far away (<=40 days difference).
- acquiror matched in CRSP for stock deals.


## I/O

- input:
    - `/data/intermediate/df_permno_delist_CRSP.h5`
    - `/data/reference/filters_basic.txt`

- output:
    - `/data/reference/filters.txt`

## old
- deal price, initial deal price >= \$1. 
- target stock price 1 day, 1 week and 4 weeks before >= \$1.
- deal value adjusted by inflation >= \$200 m.
- date announced is not estimated.
- deal cash and stock terms parsing successful.

- duration no fewer than 5 trading days.


Results of filtering:

- delete 3 deals due to data error. 9854 -> 9851
- delete 1503 deals due to deal price smaller than \$1. 9851 -> 8348
- delete 490 deals due to target price smaller than \$1. 8348 -> 7858
- delete 2473 deals due to deal value adjusted by inflation smaller than $200 m. 7858 -> 5385
- delete 0 deals due to estimated announcement date. 5385 -> 5385
- delete 1117 deals due to failure of extracting cash and stock terms. 5385 -> 4268
- delete 31 deals due to failure of matching target in CRSP. 4268 -> 4237
- delete 33 deals due to failure of matching acquiror in CRSP for stock deals. 4237 -> 4204
- delete 21 deals due to non-M&A-related delisting code for completed deals. 4204 -> 4183
- delete 10 deals due to mismatch between delisting date in CRSP and effective date in SDC. 4183 -> 4173
- delete 39 deals due to duration shorter than 5 trading days. 4173 -> 4134

## load data

In [12]:
# dataset
filepath = f"{path}/data/intermediate/df_permno_delist_CRSP.h5"
df = pd.read_hdf(filepath)

In [13]:
# already applied filters
textpath_old = f"{path}/data/reference/filters_basic.txt"
prev_filters = open(textpath_old, "r").read()
print(prev_filters)
# 
textpath = f"{path}/data/reference/filters.txt"
textfile = open(textpath, "w")
_=textfile.write(prev_filters)

delete 3 deals due to database errors. 12082 -> 12079
delete 1631 deals due to missing pricing information. 12079 -> 10448
delete 3 deals due to missing consideration. 10448 -> 10445



# Filtering

In [14]:
df['retain'] = True
num = len(df)

## tgt permno is matched successfully

In [15]:
# no tgt permno
num_old = num

df.retain[df.tpermno.isna()] = False

num = df.retain.sum()

string = f"delete {num_old-num} deals due to failure of matching target in CRSP. {num_old} -> {num}"
print_and_save_string(string, textfile)

delete 833 deals due to failure of matching target in CRSP. 10445 -> 9612


## delisting

### delisting reason is MA

In [16]:
# delisted not due to MA
num_old = num

df.retain[df.statc.eq('C') & ~df.delist_code.between(200, 300, inclusive='left')] = False

num = df.retain.sum()

string = f"delete {num_old-num} deals due to non-M&A-related delisting code for completed deals. {num_old} -> {num}"
print_and_save_string(string, textfile)

delete 223 deals due to non-M&A-related delisting code for completed deals. 9612 -> 9389


### delisting return is not missing

In [17]:
# from CRSP, the deal involves payment that is difficult to price
num_old = num

df.retain[df.statc.eq('C') & df.delist_return.isna()] = False

num = df.retain.sum()

string = f"delete {num_old-num} deals due to missing delist price/return (complex payment) for completed deals. {num_old} -> {num}"
print_and_save_string(string, textfile)

delete 45 deals due to missing delist price/return (complex payment) for completed deals. 9389 -> 9344


### delisting date and effective date don't differ too much

In [10]:
# delisted date differs too much with effective date
num_old = num

days_between_de_delist_date = get_num_trading_days_between(df.de, df.delist_date)
df.retain[df.statc.eq('C') & abs(days_between_de_delist_date).gt(40)] = False

num = df.retain.sum()

string = f"delete {num_old-num} deals due to mismatch between delisting date in CRSP and effective date in SDC. {num_old} -> {num}"
print_and_save_string(string, textfile)

delete 133 deals due to mismatch between delisting date in CRSP and effective date in SDC. 9344 -> 9211


In [None]:
textfile.close()

# Data preprocessing final: Data Filters

Apply filters:

- data error detected manually.
- deal price, initial deal price >= \$1. 
- target stock price 1 day, 1 week and 4 weeks before >= \$1.
- deal value adjusted by inflation >= \$200 m.
- date announced is not estimated.
- deal cash and stock terms parsing successful.
- target and acquiror matched in CRSP.
- For completed deals, delisting code in CRSP is M&A related.
- For completed deals, effective date in SDC and target delisting date in CRSP is close. 
- duration no fewer than 5 trading days.


Results of filtering:

- delete 3 deals due to data error. 9854 -> 9851
- delete 1503 deals due to deal price smaller than \$1. 9851 -> 8348
- delete 490 deals due to target price smaller than \$1. 8348 -> 7858
- delete 2473 deals due to deal value adjusted by inflation smaller than $200 m. 7858 -> 5385
- delete 0 deals due to estimated announcement date. 5385 -> 5385
- delete 1117 deals due to failure of extracting cash and stock terms. 5385 -> 4268
- delete 31 deals due to failure of matching target in CRSP. 4268 -> 4237
- delete 33 deals due to failure of matching acquiror in CRSP for stock deals. 4237 -> 4204
- delete 21 deals due to non-M&A-related delisting code for completed deals. 4204 -> 4183
- delete 10 deals due to mismatch between delisting date in CRSP and effective date in SDC. 4183 -> 4173
- delete 39 deals due to duration shorter than 5 trading days. 4173 -> 4134

# Filters

In [4]:
df['retain'] = True

In [5]:
num_orig = df.retain.sum()

index_del = dsk.get_delete_index(df)
df.retain[index_del] = False
df.retain[df.ttic.eq(df.atic)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to data error. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 40 deals due to data error. 12031 -> 11991


In [6]:
num_orig = df.retain.sum()

df.retain[df.pr.lt(1., fill_value = 0.) | df.pr_initial.lt(1., fill_value = 0.)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to deal price smaller than $1. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 1861 deals due to deal price smaller than $1. 11991 -> 10130


In [7]:
num_orig = df.retain.sum()

df.retain[df.pr1day.lt(1., fill_value=0.) | df.pr1wk.lt(1., fill_value=0.) | df.pr4wk.lt(1., fill_value=0.)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to target price smaller than $1. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 903 deals due to target price smaller than $1. 10130 -> 9227


In [8]:
num_orig = df.retain.sum()

df.retain[df.val_adj_by_cpi.lt(200., fill_value=0.)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to deal value adjusted by inflation smaller than $200 m. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 2986 deals due to deal value adjusted by inflation smaller than $200 m. 9227 -> 6241


In [9]:
num_orig = df.retain.sum()

df.retain[df.da.eq('Yes')] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to estimated announcement date. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 0 deals due to estimated announcement date. 6241 -> 6241


In [10]:
num_orig = df.retain.sum()

payment_types_lst = ['Cash', 'Common Stock', 'Cash and Common Stock', 
                     'Common Stock, fixed dollar', 'Cash and Common Stock, fixed dollar']
df.retain[~df.payment_type.isin(payment_types_lst)]=False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to failure of extracting cash and stock terms. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 1276 deals due to failure of extracting cash and stock terms. 6241 -> 4965


In [11]:
num_orig = df.retain.sum()

df.retain[df.tpermno.isna()] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to failure of matching target in CRSP. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 42 deals due to failure of matching target in CRSP. 4965 -> 4923


In [12]:
num_orig = df.retain.sum()

df.retain[df.payment_type.isin(['Common Stock', 'Cash and Common Stock']) & df.apermno.isna()] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to failure of matching acquiror in CRSP for stock deals. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 34 deals due to failure of matching acquiror in CRSP for stock deals. 4923 -> 4889


In [13]:
num_orig = df.retain.sum()

df.retain[df.statc.eq('C') & ~df.delist_code.between(200, 300, inclusive='left')] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to non-M&A-related delisting code for completed deals. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 97 deals due to non-M&A-related delisting code for completed deals. 4889 -> 4792


In [14]:
num_orig = df.retain.sum()

days_between_de_delist_date = dsk.get_num_trading_days_between(df.de, df.delist_date)
df.retain[df.statc.eq('C')&abs(days_between_de_delist_date).gt(252)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to mismatch between delisting date in CRSP and effective date in SDC. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 11 deals due to mismatch between delisting date in CRSP and effective date in SDC. 4792 -> 4781


In [15]:
num_orig = df.retain.sum()

df.retain[df.duration.lt(5)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to duration shorter than 5 trading days. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 46 deals due to duration shorter than 5 trading days. 4781 -> 4735


In [16]:
num_orig = df.retain.sum()

df.retain[df.duration.gt(504)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals due to duration longer than 2 years. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 10 deals due to duration longer than 2 years. 4735 -> 4725


In [17]:
num_orig = df.retain.sum()

df.retain[df.statc.eq('P') & df.ann_year.lt(2020)] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} pending deals announced before 2020. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 0 pending deals announced before 2020. 4725 -> 4725


In [18]:
num_orig = df.retain.sum()

import pickle
index_no_trading = pd.read_pickle(f'{path}/data/index_no_trading.pickle')
df.retain[index_no_trading] = False

num_new = df.retain.sum()

string = f"delete {num_orig-num_new} deals whose targets or acquirors are not allowed to trade within 5 days after announcement. {num_orig} -> {num_new}"
txt_file.write(string+"\n")
print(string)

delete 2 deals whose targets or acquirors are not allowed to trade within 5 days after announcement. 4725 -> 4723


# Save final dataset

In [19]:
txt_file.close()

In [20]:
filepath = f"{path}/data/df_final.h5"
df.to_hdf(filepath, key='df', mode='w')

In [21]:
filepath = f"{path}/data/df_final.csv"
df.to_csv(filepath)