# CKO JAR Revision

### Import modules

In [1]:
import pandas as pd
import numpy as np
import rpy2.rinterface #ggplot tool
from pandas_profiling import ProfileReport
import dask.dataframe as dd
import wrds
import pandasql as ps

## Review TNIC-3 data

### Import TNIC3 data from Hoberg and Philips data library 

In [None]:
# !wget -P ../2_pipeline/ http://hobergphillips.tuck.dartmouth.edu/idata/tnic3_data.zip
# !unzip -q ../2_pipeline/tnic3_data.zip -d ../2_pipeline/ && rm ../2_pipeline/tnic3_data.zip

In [None]:
"""
Hoberg and Philips TNIC3 database
"""
tnic = pd.read_csv('/Users/ohn0000/Dropbox/Project/cko/0_data/external/tnic3_data.txt', 
                   delimiter='\t', header=0, index_col=['gvkey1', 'year', 'gvkey2'])
tnic.dropna(inplace=True)

### Subset to 20-closest competitors

In [None]:
# tnic_industry = tnic.groupby(level=['gvkey1', 'year'])["score"].nlargest(20).reset_index(level=[0,1], drop=True)
# tnic_industry = tnic_industry.to_frame(name='score')
# tnic_industry.to_pickle('../2_pipeline/tnic_industry.pkl')
tnic_industry = pd.read_pickle('../2_pipeline/tnic_industry.pkl')

```tnic_industry``` still has firm-years with less than 20 competitors.

In [None]:
# """
# Require at least 20 closest competitors
# """
# tnicind_sub = tnic.groupby(level=['gvkey1', 'year'])["score"].filter(lambda x: x.size == 20)
# tnicind_sub = tnicind_sub.to_frame(name='score')

In [None]:
"""
tnic_industry['gvkey1'] = tnic_industry['gvkey1'].apply(lambda x: str(x).zfill(6))
tnic_industry['gvkey2'] = tnic_industry['gvkey2'].apply(lambda x: str(x).zfill(6))
"""

Remeber that _year_ in __tnic_industry__ is the base year for identifying close competitors. Accordingly, _lead1_ is the M&A year and _lead2_ is the year following M&A.

Readme_tnic3.txt explains that _year_ equals the first four digits of the __compustat__ _datadate_.

### Shift years in __tnic_industry__ to get _lead1_ and _lead2_ similarity scores

In [None]:
tnic_industry.rename(columns={'score':'score_0'}, inplace=True)

for i in range(1,3):
    colname = 'score' + '_' + str(i)
    tnic_industry['score'] = np.NaN
    tnic_industry.index = tnic_industry.index.set_levels(tnic_industry.index.levels[1] + 1, level=1)
    tnic_industry.update(tnic)
    tnic_industry.rename(columns={'score':colname}, inplace=True)

tnic_industry.reset_index(inplace=True)
tnic_industry["year"] -= 2
tnic_industry.set_index(["gvkey1", "year", "gvkey2"], inplace=True)

In [None]:
tnic_industry.to_pickle('../2_pipeline/tnic_industry.pkl')

### Run __*tnic_industry.py*__ on _WRDS_ to update lead scores

Many of the _lead1_ and _lead2_ values are missing. Grab these values from __TNIC_Advanced__ uploaded on _WRDS_. 

In [None]:
# !scp ../2_pipeline/tnic_industry.pkl tnic_industry.py $WRDS:~

### Download updated __*tnic_industry*__ file from WRDS

In [None]:
# !scp $WRDS:/scratch/ou/hohn/tnic_ind_update.pkl ../2_pipeline/ 

In [2]:
tnic_industry = pd.read_pickle('../2_pipeline/tnic_ind_update.pkl')

Average TNIC similarity score across 20-closest competitors.  
Remeber that in __TNIC_ALL__ most of the scores equals to zero. The _z\__ might be the more suitable.
- Close pair in t0 not appearing in t1 or t2 is meaningful.
- __BE CAREFUL__ of year 2016 and 2017. __TNIC is available only up to 2017__.

In [3]:
avg_sim = tnic_industry.groupby(level=['gvkey1','year']).mean()
avg_sim = avg_sim.join(tnic_industry.groupby(level=['gvkey1','year']).count().add_prefix("n_"))
avg_sim = avg_sim.join(tnic_industry.fillna(0).groupby(level=['gvkey1','year']).mean().add_prefix("z_"))

### Grab COMPUSTAT _datadate_

Revise here if _avg\_sim_ needs additional COMPUSTAT variables

In [4]:
db = wrds.Connection(wrds_username='yaera')

Loading library list...
Done


In [5]:
comp_query = """
select gvkey, datadate
from comp.funda
where consol = %(consol)s and indfmt in %(indfmt)s 
    and datafmt = %(datafmt)s and popsrc = %(popsrc)s
    and curcd in %(curcd)s
"""

parm = {'consol':('C'), 'indfmt' : ('INDL', 'FS'), 'datafmt': ('STD'), 'popsrc' : ('D'), 'curcd' : ('USD', 'CAD')}
datadate = db.raw_sql(comp_query, date_cols=['datadate'], params=parm)

In [6]:
datadate.drop_duplicates(inplace=True)
datadate['year'] = datadate['datadate'].dt.year
datadate.drop_duplicates(['gvkey', 'year'], inplace=True, keep='last')
datadate['gvkey'] = pd.to_numeric(datadate['gvkey']).astype('Int64')
datadate.set_index(['gvkey', 'year'], inplace=True)

In [7]:
import sqlite3
#Make the db in memory
conn = sqlite3.connect(':memory:')
#write the tables
avg_sim.to_sql('avg_sim', conn, index=True)
datadate.to_sql('datadate', conn, index=True)

qry = '''
    select  
        avg_sim.*, datadate.datadate
    from
        avg_sim join datadate on
        avg_sim.gvkey1 = datadate.gvkey and avg_sim.year = datadate.year
    '''
df = pd.read_sql_query(qry, conn)
df['datadate'] = df['datadate'].astype('datetime64[ns]')

### Grab _permno_

Revise here if _avg\_sim_ needs additional CRSP variables

In [8]:
sql_query= """
select gvkey, liid as iid, lpermno as permno, linkdt, linkenddt
from crsp.ccmxpf_linktable
where linktype in %(type)s and linkprim in %(prim)s and usedflag = 1
"""

parm = {'type':('LU', 'LC'), 'prim':('P', 'C')}
linktable = db.raw_sql(sql_query, date_cols=['linkdt', 'linkenddt'], params=parm)

linktable['gvkey'] = pd.to_numeric(linktable['gvkey'])
linktable['permno'] = pd.to_numeric(linktable['permno']).astype('int64')
linktable['iid'] = linktable['iid'].astype('category')

In [9]:
enddt = pd.to_datetime('2020-01-07 00:00:00')

In [10]:
linktable['linkenddt'] = linktable['linkenddt'].fillna(value = enddt)
linktable['linkenddt'] = linktable['linkenddt'].dt.date

In [11]:
import sqlite3
# Make the db in memory
conn = sqlite3.connect(':memory:')
# write the tables
df.to_sql('avg_sim', conn, index=False)
linktable.to_sql('linktable', conn, index=False)

qry = '''
    select  
        avg_sim.*, linktable.permno
    from
        avg_sim left join linktable on
        avg_sim.datadate between linkdt and linkenddt and avg_sim.gvkey1 = linktable.gvkey
    '''
df = pd.read_sql_query(qry, conn)
df['permno'] = df['permno'].astype('Int64')
df['datadate'] = df['datadate'].astype('datetime64[ns]')

In [19]:
col = list(df)
col.insert(1, col.pop(col.index('permno')))
col.insert(2, col.pop(col.index('datadate')))
df = df[col]

In [28]:
df['dt_s1'] = np.where(df['year'] == 2017, np.NaN, df['score_1'] - df['score_0'])
df['dt_s2'] = np.where(df['year'] == 2017, np.NaN, df['score_2'] - df['score_1'])
df['dt_z1'] = np.where(df['year'] == 2017, np.NaN, df['z_score_1'] - df['z_score_0'])
df['dt_z2'] = np.where(df['year'] >= 2016, np.NaN, df['z_score_2'] - df['z_score_1'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

## Link SDC to COMPUSTAT

### SDC and Compustat Link File

Historical CUSIP and TIC can link similarity data with SDC.

- Historical CUSIP: CRSP & COMPUSTAT
- TIC: COMPUSTAT

Things to grab here:
- Historical _CUSIP_
- CRSP _permno_

The link file is from [Michael Ewens](https://github.com/michaelewens/SDC-to-Compustat-Mapping.git). Cite papers below.

```
@article{phillips2013r,
  title={R\&D and the Incentives from Merger and Acquisition Activity},
  author={Phillips, Gordon M and Zhdanov, Alexei},
  journal={The Review of Financial Studies},
  volume={26},
  number={1},
  pages={34--78},
  year={2013},
  publisher={Society for Financial Studies}
  }
 ```

```
@article{ewensPetersWang2018,
 title={Acquisition prices and the measurement of intangible capital},
 author={Ewens, Michael and Peters, Ryan and Wang, Sean},
 journal={Working Paper}
 year={2018}
 }
```

In [None]:
sdc_link = pd.read_csv('/Users/ohn0000/Dropbox/Project/cko/0_data/external/dealnum_to_gvkey.csv', 
                       dtype={'DealNumber':'Int64', 'agvkey':'Int64', 'tgvkey':'Int64'}, index_col='DealNumber')

In [None]:
print(sdc_link['agvkey'].count(), sdc_link['tgvkey'].count())

Grab _datadate_ from __Compustat__.

In [None]:
comp_query = """
select gvkey, datadate
from comp.funda
where consol = %(consol)s and indfmt in %(indfmt)s 
    and datafmt = %(datafmt)s and popsrc = %(popsrc)s
    and curcd in %(curcd)s
"""

parm = {'consol':('C'), 'indfmt' : ('INDL', 'FS'), 'datafmt': ('STD'), 'popsrc' : ('D'), 'curcd' : ('USD', 'CAD')}
datadate = dd.from_pandas(db.raw_sql(comp_query, date_cols=['datadate'], params=parm), npartitions=20)

In [None]:
datadate['year'] = datadate['datadate'].dt.year - 1
datadate.drop_duplicates(inplace=True)
datadate = datadate.compute().copy()

In [None]:
datadate['gvkey'] = pd.to_numeric(datadate['gvkey'])
datadate =  datadate[(datadate['year'] >= 1995) & (datadate['year'] <= 2018)] 
datadate.rename(columns={'gvkey':'gvkey1'}, inplace=True)

In [None]:
datadate.set_index(['gvkey1', 'year'], inplace=True)

## Import previously constructed datasets

### Private Target Data (Chen 2019)

The _encoding_ option allows proper string imports.

In [None]:
cw = pd.read_sas('../0_data/manual/CW2019.sas7bdat', format = 'sas7bdat', encoding="utf-8")

Use _dask[dataframe]_ to facilitate import of __SDC__.

In [None]:
sdc = dd.read_csv('../0_data/external/sdc/sdc_*.csv',
                  dtype=object, thousands=',', assume_missing=True)

COLUMNS = ['MASTER_DEAL_NO', 'AMANAMES', 'ACUSIP', 'APUBLIC', 'DATEANN', 'DATEEFF',
           'DATEFIN', 'EBITLTM', 'ENTVAL', 'MV', 'NETASS', 'NILTM',
           'PCT_CASH', 'PCT_STK', 'PCT_OTHER', 'PCT_UNKNOWN', 'ATTITUDE', 'PR',
           'RANKVAL', 'SALESLTM', 'TMANAMES', 'TNATIONCODE', 'TPUBLIC']

comp_us = sdc.loc[(sdc['APUBLIC'] == 'Public') & (sdc['STATUSCODE'] == 'C') & (sdc['ANATIONCODE'].isin(['US', 'CA'])), COLUMNS]

compmna = comp_us.compute()

In [None]:
Cat = ['ACUSIP', 'APUBLIC', 'TNATIONCODE', 'TPUBLIC', 'ATTITUDE']
Dt = ['DATEANN', 'DATEEFF', 'DATEFIN']
Flt = ['EBITLTM', 'ENTVAL', 'MV', 'NETASS', 'NILTM', 'PCT_CASH', 'PCT_STK',
       'PCT_OTHER', 'PCT_UNKNOWN', 'PR', 'RANKVAL', 'SALESLTM']

compmna['MASTER_DEAL_NO'] = abs(compmna['MASTER_DEAL_NO'].astype(np.int32))
for i in Cat: 
    compmna[i] = compmna[i].astype('category')
for i in Dt:
    compmna[i] = compmna[i].astype('datetime64[ns]')
for i in Flt:
    compmna[i] = pd.to_numeric(compmna[i].astype(str).str.replace(',',''),errors='coerce')

compmna.drop_duplicates(inplace=True)

_priv_ dataset is a subset of _compmna_ that will be matched to Ciao-Wei's data.

In [None]:
priv = compmna.copy().loc[(compmna['RANKVAL'].notnull()) & (compmna['TPUBLIC'] == 'Priv.'), :]
priv.drop_duplicates(inplace=True)
priv = priv[(priv['DATEEFF'] >= '1997-01-01') & (priv['DATEEFF'] <= '2013-12-31')]

The code below grabs all but 8 _MASTER_DEAL_NO_ from _priv_.

In [None]:
# merge by dates and names
cw = cw.merge(priv[['AMANAMES', 'DATEANN', 'DATEEFF', 'TMANAMES', 'MASTER_DEAL_NO']],
         left_on=['ACQ_NAME', 'ANN', 'EFF', 'TRG_NAME'],
         right_on=['AMANAMES', 'DATEANN', 'DATEEFF', 'TMANAMES'], how='left')
col = list(cw)
col.insert(0, col.pop())
cw = cw[col]

cw.drop(columns=['AMANAMES', 'DATEANN', 'DATEEFF', 'TMANAMES'], inplace=True)

cw['MASTER_DEAL_NO'] = cw['MASTER_DEAL_NO'].astype('Int64')

# review dates and names of the 20 missing
missing = priv[['MASTER_DEAL_NO', 'AMANAMES', 'DATEANN', 'DATEEFF', 'TMANAMES']].merge(cw[cw['MASTER_DEAL_NO'].isna()][['ANN', 'EFF', 'ACQ_NAME', 'TRG_NAME']],
                                                           how='right',left_on=['DATEANN', 'DATEEFF'], right_on=['ANN', 'EFF'])
# manual match
missing = missing.iloc[[0, 6, 8, 13, 15, 16, 17, 18, 19, 20, 23, 26]][['MASTER_DEAL_NO', 'ANN', 'EFF', 'ACQ_NAME', 'TRG_NAME']]

# update MASTER_DEAL_NO
cw = cw.merge(missing, 
         left_on=['ACQ_NAME', 'ANN', 'EFF', 'TRG_NAME'],
         right_on=['ACQ_NAME', 'ANN', 'EFF', 'TRG_NAME'], how='left', suffixes=('','_y'))

cw['MASTER_DEAL_NO'] = np.where(cw['MASTER_DEAL_NO'].isna(), cw['MASTER_DEAL_NO_y'], cw['MASTER_DEAL_NO'])

cw.drop(columns=['MASTER_DEAL_NO_y'], inplace=True)

In [None]:
sql_query= """
select permno, namedt, nameenddt, substring(ncusip, 1, 6) as cusip
from crsp.stocknames
where shrcd in %(shrcd)s and ncusip is not null
"""

parm = {'shrcd':(10, 11)}
ncusip = db.raw_sql(sql_query, date_cols=['linkdt', 'linkenddt'], params=parm)

ncusip['permno'] = ncusip['permno'].astype('int32')

In [None]:
link_sql = """
select distinct a.permno, namedt, nameenddt, cusip, gvkey, linkdt, linkenddt
from ncusip a join linktable b
on a.permno = b.permno 
"""
link = ps.sqldf(link_sql, locals())

link['namedt'] = link['namedt'].astype('datetime64[ns]')
link['nameenddt'] = link['nameenddt'].astype('datetime64[ns]')
link['linkdt'] = link['linkdt'].astype('datetime64[ns]')
link['linkenddt'] = link['linkenddt'].astype('datetime64[ns]')
link['cusip'] = link['cusip'].astype('category')

In [None]:
link.shape

In [None]:
link[link['permno']==10006]

In [None]:
link[~(link['namedt'] >= link['linkdt']) | ~(link['nameenddt'] <= link['linkenddt'])]

In [None]:
compmna

In [None]:
link_sql = """
select distinct a.*, b.permno, b.gvkey, b.namedt, b.nameenddt
from compmna a left join link b
on a.ACUSIP = b.cusip
"""
sdc_sub = ps.sqldf(link_sql, locals())

In [None]:
compmna.shape

In [None]:
sdc_sub.shape

### Materiality of M&A

In [None]:
material = pd.read_csv('/Users/ohn0000/Project/cko/0_data/external/materiality.csv')
material.set_index(["year", "gvkey1"], inplace=True, verify_integrity=True)

Be careful since the _year_ here refers to the M&A firm-year. The _year_ in __avg_sim__ is the year competitors are identified. 

### M&A Disclosure

In [None]:
disc = pd.read_csv('/Users/ohn0000/Project/cko/0_data/manual/disc.csv', parse_dates=['DATADATE'])
disc['CIK'] = disc['CIK'].apply(lambda x: str(int(x)).zfill(10) if pd.notnull(x) else None)

Disclosure also might need additonal data collection.

In [None]:
disc.rename(columns={"GVKEY":"gvkey1", "FYEAR":"year"}, inplace=True)
disc.set_index(["year", "gvkey1"], inplace=True, verify_integrity=True)

In [None]:
manual = disc.join(material)[['DATADATE', 'CIK', 'TGTAT_ACQAT', 'TGTDVAL_ACQAT', 'MD_A', 'PROFORMA']].sort_index()

In [None]:
import wrds
db = wrds.Connection(wrds_username = "yaera")

In [None]:
ma_details_desc = db.describe_table('sdc', 'ma_details').sort_values('name')
with pd.option_context('display.max_rows', None):
    print(ma_details_desc)

|     Variable | Description                    |
|:------------:|:-------------------------------|
|bookvalue     |Target Book Value (\$mil)       |
|compete       |Competing Bidder (Y/N)          |
|competecode   |Competing Bid Deal Code         |  
|dateann       |Date Announced                  |
|dateannest    |_dateann_ is estimated (Y/N)    | 
|dateeff       |Date Effective                  | 
|ebitltm       |Target EBIT LTM (\$mil)         |
|pct_cash      |Percentage of consideration paid in cash|
|pct_other|Percentage of consideration paid in other then cash or stock|
|pct_stk|Percentage of consideration paid in stock|
|pct_unknown|Percentage of consideration which is unknown|
|ptincltm|Target Pre-Tax Income LTM (\$mil)|
|salesltm|Target Sales LTM (\$mil)|
|rankval|Ranking Value incl Net Debt of Target (\$mil)|

Run sql query below on _WRDS_

In [None]:
# import wrds
# sdc_query = """
# select master_deal_no as DealNumber, 
#         bookvalue, 
#         compete, 
#         competecode, 
#         dateann, 
#         dateannest, 
#         dateeff, 
#         ebitltm, 
#         pct_cash,
#         pct_other,
#         pct_stk,
#         pct_unknown,
#         ptincltm,
#         salesltm,
#         rankval
# from sdc.ma_details
# where dateeff is not null 
# """
# # and master_deal_no in %(deal_no)s
# sdc = db.raw_sql(sdc_query, date_cols=['dateann', 'dateeff'])
# sdc.to_pickle('/home/upenn/yaera/sdc.pkl')

In [None]:
sdc = pd.read_pickle('/Users/ohn0000/Project/cko/0_data/external/sdc.pkl')
sdc.drop_duplicates('dealnumber', inplace = True)
sdc['dealnumber'] = sdc['dealnumber'].astype('int64')

# clear up values and change dtype to 'float'
for column in ['bookvalue', 'ebitltm', 'pct_cash', 'pct_other', 'pct_stk', 'pct_unknown', 'ptincltm', 'salesltm', 'rankval']:
    sdc[column] = sdc[column].apply(lambda x: np.NaN if x == '*********' else (np.NaN if pd.isna(x) else (float(x.replace(',', '')) if isinstance(x, str) else float(x))))
    sdc[column].astype('float16')

In [None]:
sdc_sub = pd.merge(sdc_link, sdc,
                   left_index=True, right_on='dealnumber').drop('dealnumber', axis='columns')
sdc_sub.index.name = 'dealnumber'

In [None]:
sdc_sub.sort_values(['agvkey', 'dateeff'], inplace=True)

In [None]:
sdc_link['agvkey'].count() / sdc['dealnumber'].count()

In [None]:
sdc_sub['agvkey'].count() / sdc_link['agvkey'].count() 

In [None]:
sdc_sub.profile_report(style={'full_width':True})

Use __compustat__ _datadate_ and gvkey to link the sdc data to the similarity scores

In [None]:
import wrds
db = wrds.Connection(wrds_username = 'hohn')

sdc_quary = """
select gvkey, datadate, fyear, cusip,  cik
from comp.funda
where consol = %(consol)s and indfmt in %(indfmt)s and datafmt = %(datafmt)s and popsrc = %(popsrc)s and curcd in %(curcd)s
"""

parm = {'consol':('C'), 'indfmt' : ('INDL', 'FS'), 'datafmt': ('STD'), 'popsrc' : ('D'), 'curcd' : ('USD', 'CAD')}

funda = db.raw_sql(sdc_quary, params = parm, date_cols = ['datadate'])

In [None]:
funda['start'] = funda['datadate'] - pd.DateOffset(months = 12) + pd.DateOffset(days = 1)
funda['gvkey'] = funda['gvkey'].astype('int64')
funda.set_index('gvkey', inplace=True)

In [None]:
funda.fyear = funda.fyear.astype('Int16')

In [None]:
import pandasql as ps

sql_query = '''
select a.*, b.datadate, b.fyear, b.cusip, b.cik
from sdc_sub a left join funda b
on a.agvkey = b.gvkey and a.dateeff between b.start and b.datadate
'''

newdf = ps.sqldf(sql_query, locals())

In [None]:
col = list(newdf)
for i in range(2, 6):
    col.insert(i, col.pop(-1))
newdf = newdf.loc[:,col]

In [None]:
for i in ['datadate', 'dateann', 'dateeff']:
    newdf[i] = newdf[i].astype('datetime64[ns]')
    
newdf['year'] = newdf['datadate'].dt.year.astype('Int16')
for i in ['fyear', 'agvkey', 'tgvkey']:
    newdf[i] = newdf[i].astype('Int64')

In [None]:
col = list(newdf)
col.insert(col.index('datadate'), col.pop(col.index('year')))
newdf = newdf.loc[:,col]

In [None]:
newdf = newdf.drop_duplicates(subset='dealnumber')

In [None]:
newdf[newdf['agvkey'].notnull()]

In [None]:
newdf['rankval'].count()

18994 observations with non-missing _rankval_

In [None]:
newdf['salesltm'].count()

8055 observations with non-missing _salesltm_

In [None]:
np.sum(newdf['rankval'].notnull() & newdf['salesltm'].notnull())

6445 observations with both _rankval_ and _salesltm_ available

## Append similarity score between acquirer and target

In [None]:
upload = newdf[newdf['agvkey'].notnull() & newdf['tgvkey'].notnull() & newdf['year'].notnull()][['agvkey', 'tgvkey', 'year']].rename(columns={'agvkey':'gvkey1', 'tgvkey':'gvkey2'})
upload.to_csv('/Users/ohn0000/Project/cko/2_pipeline/upload.csv', index=False)
!scp /Users/ohn0000/Project/cko/2_pipeline/upload.csv $WRDS:/scratch/ou/hohn

Run this on wrds server. The __TNIC_All__ files should be uploaded in scratch beforehand.

In [None]:
"""
The server killed the previous code that joins after combines all files. The current code instead loop over the files.
"""
# !cd /scratch/ou/hohn/TNIC_AllPairsDistrib
# !cat tnicall1996.txt > tnicall_combined.txt
# !for file in tnicall{1997..2017}.txt; do sed '1d' $file >> tnicall_combined.txt; done
# !cd ~


"""
atsim.py
"""


In [None]:
!scp atsim.py $WRDS:~

In [None]:
!scp $WRDS:/scratch/ou/hohn/atsim.csv /Users/ohn0000/Project/cko/2_pipeline/

In [None]:
col = list(newdf)
col.insert(col.index('bookvalue'), col.pop(col.index('atsim')))
newdf = newdf.loc[:,col]

## IV candidates

The materiality measure based on deal value will be the last resort for the IV.   
Alternatively, 2SLS using multiple IVs is feasible.

Candidates
* Max deal value
* Sum deal value
* Datedif between _dateeff_ and _datadate_
    * _dateeff_ of the first M&A
    * _dateeff_ of the largest M&A
    * weighted average of _dateeff_ 

## Cross-sections
* Similarity between acquirer and target 
    - Relation stronger in diversifying
    - Could be more of a U-shaped relation, i.e., competitors don't follow when you move far enough
* Average value of pre-similarities between acquirer and close competitors 
    - Prediction not clear
* M&A performance during the completed firm-year
    - Relation stronger when M&A was more successful <-> how do we define success of an M&A?
* Number of close competitors of the target
    - Potential targets are candidates of future mergers
* How many competitors were there initially?
    - The size of the TNIC industry