In [120]:
import pandas as pd
import numpy as np
import random
import pickle

In [121]:
data = pd.read_csv('../../MA_PREDICTOR/data/ma_detailed_data_car.csv', parse_dates=['announcement_date'])

In [122]:
data.head()

Unnamed: 0,id,announcement_date,deal_value,acquiror_total_assets,acquiror_name,acquiror_ticker,target_name,target_nation,acquiror_nation,target_status,...,target_financial_advisor,purpose,acquiror_code,target_code,acquisition_count,bidder_count,car_1,car_3,car_5,car_10
0,1629274020,2005-01-04,153.67,6166.05,SuperValu Inc,SVU,Total Logistics Inc,United States,United States,Public,...,William Blair & Co,Strengthen existing operations/expand presence...,54301020,52405030,23,1,-0.009362,0.001308,0.018041,0.056428
1,1631278020,2005-01-11,89.16,11127.95,Fulton Financial Corp,FULT,"SVB Financial Services Inc, Sommerville,New Je...",United States,United States,Public,...,Danielson Associates Inc,Expand presence in new/foreign markets,55101010,55101010,18,1,-0.011249,0.004454,0.012696,0.044807
2,1631266040,2005-01-12,427.54,1116.9,Cleveland-Cliffs Inc,CLF,Portman Ltd,Australia,United States,Public,...,Gresham Partners|Azure Capital,Strengthen existing operations/expand presence...,51201020,51201020,8,1,0.013061,0.056958,0.055917,0.016918
3,1632555020,2005-01-18,232.22,15834.52,Colonial BancGroup Inc,CNB,"FFLC Bancorp Inc,Leesburg, Florida",United States,United States,Public,...,Keefe Bruyette & Woods Inc,"Create synergies, eliminate duplicate services...",55101010,55101010,40,1,0.060476,-0.010042,0.002116,0.040544
4,1634502020,2005-01-26,2211.08,1105.45,Cimarex Energy Co,XEC,Magnum Hunter Resources Inc,United States,United States,Public,...,Deutsche Bank Securities|Merrill Lynch,Strengthen existing operations/expand presence...,50102020,50102020,0,1,-0.09376,-0.082288,-0.082288,-0.06174


In [123]:
data.columns

Index(['id', 'announcement_date', 'deal_value', 'acquiror_total_assets',
       'acquiror_name', 'acquiror_ticker', 'target_name', 'target_nation',
       'acquiror_nation', 'target_status', 'acquiror_industry',
       'target_industry', 'shares_at_announcement', 'shares_acquired',
       'consideration_offered', 'attitude', 'acquisition_technique',
       'acquiror_financial_advisor', 'target_financial_advisor', 'purpose',
       'acquiror_code', 'target_code', 'acquisition_count', 'bidder_count',
       'car_1', 'car_3', 'car_5', 'car_10'],
      dtype='object')

In [124]:
# Creating new dataframe to work on

clean = data[['announcement_date',
              'deal_value', 'acquiror_total_assets', # financials
              'target_nation', 'acquiror_nation', 'target_status', # participants
              'acquiror_code', 'target_code', # participants
              'acquisition_count', # participants
              'shares_at_announcement', 'shares_acquired', # deal
              'consideration_offered', 'acquisition_technique', 'purpose', # deal 
              'acquiror_financial_advisor', 'target_financial_advisor', # deal
              'bidder_count', # deal 
              'car_1', 'car_3', 'car_5', 'car_10'
             ]].copy()

# announcement_date (month)

In [125]:
clean['month']= pd.DatetimeIndex(clean['announcement_date']).month

# deal_value (relative deal_value)

In [126]:
clean['rel_deal_value'] = clean.apply(lambda row:
                                       row.deal_value / row.acquiror_total_assets
                                       if row.acquiror_total_assets # one observation has 0 total assets and
                                       else -1000, # will be filtered out
                                       axis=1)

# Filtering out acquiror_total_assets = 0
clean = clean[clean.rel_deal_value > 0]

# Dropping cols
clean.drop(columns=['deal_value', 'acquiror_total_assets'], inplace=True)

# acquiror_nation & target_nation (cross-border)

In [127]:
clean['cross_border'] = clean.apply(lambda row:
                                    'cross_border'
                                    if row.acquiror_nation == row.target_nation
                                    else 'national',
                                    axis=1)
# Dropping cols
clean.drop(columns=['acquiror_nation', 'target_nation'], inplace=True)

# target_status

In [128]:
clean['target_status'] = clean['target_status'].apply(lambda val:
                                                      'public'
                                                      if val== 'Public'
                                                      else 'others')

# acquiror_code & target_code (relatedness)

In [129]:
# Get relatedness from TRBC code
def relatedness(acquiror, target):
    if acquiror == target:
        return 'industry'
    elif acquiror[:6] == target[:6]:
        return 'industry_group'
    elif acquiror[:4] == target[:4]:
        return 'business_sector'
    elif acquiror[:2] == target[:2]:
        return 'economic_sector'
    else:
        return 'not_related'

In [130]:
clean['relatedness'] = clean.apply(lambda row:
                                   relatedness(str(row.acquiror_code), str(row.target_code)),
                                   axis=1)

# acquiror_code & target_code (industries)

## Declassifier

In [131]:
# transform code into different cols mentioned above
def declassifier(x):
    x = str(x)
    return x[:2], x[:4]

In [132]:
clean['economic_sector_ac'], clean['business_sector_ac'] = \
zip(*clean['acquiror_code'].map(declassifier))
clean['economic_sector_target'], clean['business_sector_target'] = \
zip(*clean['target_code'].map(declassifier))

In [133]:
clean.drop(columns=['acquiror_code', 'target_code'], inplace=True)

## Implementation of the declassifier function

In [134]:
# Implement the declassifier

pickle_in = open("../../MA_PREDICTOR/data//declassification.pkl","rb")
results = pickle.load(pickle_in)

In [135]:
def get_info_trbc(hierarchical_id):
    str_id= str(hierarchical_id)
    try:
        if len(str_id)==2:
            s = results.loc[results['hierarchical_id'].str.startswith(str_id), 'economic_sector'].reset_index(drop=True)
            return s[0]
        elif len(str_id)==4:
            r = results.loc[results['hierarchical_id'].str.startswith(str_id), 'business_sector'].reset_index(drop=True)
            return r[0]
        elif len(str_id)==6:
            q = results.loc[results['hierarchical_id'].str.startswith(str_id), 'industry_group'].reset_index(drop=True)
            return q[0] 
        elif len(str_id)==8:
            p = results.loc[results['hierarchical_id'].str.startswith(str_id), 'industry'].reset_index(drop=True)
            return p[0] 
        else:
            print("Please enter a valid hierarchical ID.")
    except:
        print("Please enter a valid hierarchical ID.")

In [136]:
clean['economic_sector_ac'] = clean.apply(lambda row: get_info_trbc(row.economic_sector_ac), axis=1)
clean['business_sector_ac'] = clean.apply(lambda row: get_info_trbc(row.business_sector_ac), axis=1)
clean['economic_sector_target'] = clean.apply(lambda row: get_info_trbc(row.economic_sector_target), axis=1)
clean['business_sector_target'] = clean.apply(lambda row: get_info_trbc(row.business_sector_target), axis=1)

# shares_acquired

In [137]:
clean['shares_acquired'] = clean['shares_acquired'].astype(str)

In [138]:
# Apply
clean['shares_acquired'] = clean['shares_acquired'].apply(lambda val:
                                                          "full"
                                                          if val=="100.0"
                                                          else "not_full")

# shares_at_announcement

In [139]:
# Transform
clean['shares_at_announcement'] = clean['shares_at_announcement'].astype(str)

# Apply
clean['shares_at_announcement'] = clean['shares_at_announcement'].apply(lambda val: "no" if val=="0.0" else "yes")

# consideration_offered

In [140]:
# transform pipes into list of all considerations
list_gen = lambda x: x.split('|')
clean['consideration_offered'] = clean.consideration_offered.apply(list_gen)

# Cluster considerations
def cash(considerations):
    
    for consideration in considerations:
        if 'Cash' not in consideration:
            return 'Other'
    return 'Cash'

# Apply
clean['consideration_offered'] = clean.apply(lambda row:
                                             cash(row.consideration_offered),
                                             axis=1)

# acquisition_technique (tbc with defined cluster of techniques)

In [141]:
cluster_dict = {
        'private_companies': ['Reverse Takeover',
                              'Acquiror Is An Investor Group',
                              'Privatization',
                              'Private Tender Offer',
                              'Privately Negotiated Purchase',
                              'Secondary Buyout', 
                              'Going Private'],
        'asset_driven_op': ['Mandatory Offering',
                            'Mergerof Equals',
                            'Collar',
                            'Joint Venture', 
                            'Reverse Morris Trust',
                            'Sale and Leaseback',
                            'Property Acquisition Flag',
                            'Concession',
                            'Asset Swap',
                            'Stock Swap'],
        'divestiture': ['Divestiture', 
                        'Institutional Buyout', 
                        'Leveraged Buyout',
                        'Tender Merger',
                        'Tender Offer', 
                        'Open Market Purchase',
                        'Financial Acquiror', 
                        'Debt Restructuring', 
                        'Internal Reorganization',
                        'Three Way Merger',
                        'Rumored Deal',
                        'Schemeof Arrangement',
                        'Unsolicited Deal'],
        'possible_problematic': ['Litigation',
                                 'Acquiror Includes Management',
                                 'Acquiror Is A White Knight',
                                 'Proxy Fight'],
        'bankruptcy': ['Bankruptcy Acquisition',
                       'Restructuring',
                       'Auction']
                                 
    }

In [142]:
clean["acquisition_technique_clean"] = clean["acquisition_technique"].apply(lambda x: x[:x.find("|")] if "|" in x else x)

In [143]:
def get_cluster(technique):
        #cluster_df = pd.DataFrame(cluster_dict)
    for k, v in cluster_dict.items():
        if technique in v:
            return k

In [144]:
clean['cluster_category'] = clean['acquisition_technique_clean'].apply(get_cluster)
    


In [145]:
clean.drop('acquisition_technique_clean', inplace=True, axis=1)

# acquiror_financial_advisor & target_financial_advisor (count)

**Top 10 M&A advisors sourced from dealroom (https://dealroom.net/blog/biggest-m-a-firms)**

In [146]:
# Defining top advisors

parties = ['acquiror_financial_advisor', 'target_financial_advisor']
adv = clean[parties].copy()

# Filling up NAs with 'Unknown'
adv.fillna('Unknown', inplace=True)

# unpack advisors and put them into set
advisors = set()
for index, row in adv.iterrows():
    for advisor in row['acquiror_financial_advisor'].split('|'):
        advisors.add(advisor)
    for advisor in row['acquiror_financial_advisor'].split('|'):
        advisors.add(advisor)
        
# Defining list of top M&A advisors ()
base = ['Barclays', 'Bank of America', 'Citi', 'Credit Suisse','Goldman Sachs',
        'JP Morgan', 'Lazard', 'Morgan Stanley', 'Rothschild', 'UBS']

# Adjust advisor's name to dataset (preliminary operation)
top_advisors = []
for advisor in advisors: # iterating through set of advisors
    for top_advisor in base:
        if top_advisor in advisor: # if (base) name of top_advisors is in set append
            top_advisors.append(advisor)
            
top_advisors = sorted(top_advisors)

# Manually investigate top_advisors and append/ remove accordingly
top_advisors.remove('Mitsubishi UFJ Morgan Stanley')

In [147]:
# Find out how many top_advisors were involved in transaction

# Filling up NAs with 'Unknown'
clean.fillna('Unknown', inplace=True)

# Transforming advisor pipe(s) into lists to iterate through
list_gen = lambda x: x.split('|')

for party in parties:
    clean[party] = clean[party].apply(list_gen)

# Counting number of top_advisors per deal (acquiror_side)
def counter(advisors):
    count = 0
    for advisor in advisors:
        if advisor in top_advisors:
            count += 1
    return count

# Create new cols for acquiror and target
clean['a_fin_adv_count'] = clean.apply(lambda row: counter(row.acquiror_financial_advisor), axis=1)
clean['t_fin_adv_count'] = clean.apply(lambda row: counter(row.target_financial_advisor), axis=1)

# Drop cols
clean.drop(columns=parties, inplace=True)

# Clean-up

In [148]:
clean.drop(columns=['acquisition_technique', 'purpose'], inplace=True) # Comment if necessary

In [149]:
clean.columns

Index(['announcement_date', 'target_status', 'acquisition_count',
       'shares_at_announcement', 'shares_acquired', 'consideration_offered',
       'bidder_count', 'car_1', 'car_3', 'car_5', 'car_10', 'month',
       'rel_deal_value', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target', 'cluster_category', 'a_fin_adv_count',
       't_fin_adv_count'],
      dtype='object')

In [150]:
old_order = clean.columns.tolist()
new_order = old_order[:7] + old_order[11:] + old_order[7:11]

In [152]:
clean = clean[new_order]

In [153]:
clean.shape

(3025, 22)

In [154]:
clean.head()

Unnamed: 0,announcement_date,target_status,acquisition_count,shares_at_announcement,shares_acquired,consideration_offered,bidder_count,month,rel_deal_value,cross_border,...,business_sector_ac,economic_sector_target,business_sector_target,cluster_category,a_fin_adv_count,t_fin_adv_count,car_1,car_3,car_5,car_10
0,2005-01-04,public,23,no,full,Cash,1,1,0.024922,cross_border,...,Food & Drug Retailing,Industrials,Transportation,divestiture,1,0,-0.009362,0.001308,0.018041,0.056428
1,2005-01-11,public,18,no,full,Other,1,1,0.008012,cross_border,...,Banking & Investment Services,Financials,Banking & Investment Services,asset_driven_op,0,0,-0.011249,0.004454,0.012696,0.044807
2,2005-01-12,public,8,no,not_full,Cash,1,1,0.382792,national,...,Mineral Resources,Basic Materials,Mineral Resources,divestiture,0,0,0.013061,0.056958,0.055917,0.016918
3,2005-01-18,public,40,no,full,Other,1,1,0.014665,cross_border,...,Banking & Investment Services,Financials,Banking & Investment Services,asset_driven_op,0,0,0.060476,-0.010042,0.002116,0.040544
4,2005-01-26,public,0,no,full,Other,1,1,2.000163,cross_border,...,Energy - Fossil Fuels,Energy,Energy - Fossil Fuels,asset_driven_op,0,0,-0.09376,-0.082288,-0.082288,-0.06174


# Uploading

In [155]:
clean.to_csv('../../MA_PREDICTOR/data/ma_detailed_data_car_clean.csv', index=False, date_format="%d/%m/%Y")