In [16]:
import pandas as pd
import numpy as np
from datetime import datetime

# Raw data

In [17]:
data = pd.read_csv('../raw_data/210906_LeWagon_finalproject_v2.csv')
data.head(2)

Unnamed: 0,SDC Deal No,Date Announced,"Deal Value\n(USD, Millions)",Acquiror Full Name,Acquiror Primary Ticker Symbol,Target Full Name,Target Nation,Acquiror Nation,Target Public Status,Acquiror TRBC Industry,Target TRBC Industry,Percentage of Shares Held at Announcement,Percentage of Shares Acquired in Transaction,Consideration Offered\n('|'),Deal Attitude,Acquisition Techniques\n('|'),Acquiror Financial Advisors Name\n('|'),Target Financial Advisors Name\n('|')
0,1631732020,01/01/2005,12.0,Plains All American Pipeline LP,PAA,Shell Pipeline Co LP-Crude Oil Pipeline Assets...,United States,United States,Subsidiary,Oil & Gas Refining and Marketing,Oil & Gas Transportation Services,,100.0,Cash|Cash Only,Friendly,Financial Acquiror|Divestiture,,
1,1653652020,01/01/2005,,Regis Corp,RGS,Scot Lewis Schools,United States,United States,Private,Personal Services,Professional & Business Education,,100.0,Unspecified,Friendly,Not Applicable,,


# Adjust data

## Column names

In [18]:
data.columns

Index(['SDC Deal No', 'Date Announced', 'Deal Value\n(USD, Millions)',
       'Acquiror Full Name', 'Acquiror Primary Ticker Symbol',
       'Target Full Name', 'Target Nation', 'Acquiror Nation',
       'Target Public Status', 'Acquiror TRBC Industry',
       'Target TRBC Industry', 'Percentage of Shares Held at Announcement',
       'Percentage of Shares Acquired in Transaction',
       'Consideration Offered\n('|')', 'Deal Attitude',
       'Acquisition Techniques\n('|')',
       'Acquiror Financial Advisors Name\n('|')',
       'Target Financial Advisors Name\n('|')'],
      dtype='object')

In [19]:
new_columns = [
    'id', 'announcement_date', 'deal_value', 'acquiror_name', 'acquiror_ticker', 'target_name',
    'target_nation', 'acquiror_nation', 'target_status', 'acquiror_industry', 'target_industry',
    'shares_at_announcement', 'shares_acquired', 'consideration_offered', 'attitude',
    'acquisition_technique', 'acquiror_financial_advisor', 'target_financial_advisor'
]

In [20]:
data.columns = new_columns

## Adding TRBC (industry classifier)

In [21]:
trbc = pd.read_pickle('../MA_PREDICTOR/data/trbc.pkl')
trbc.head()

Unnamed: 0,Title,Hierarchical_Code
0,Coal,50101010
1,Integrated Oil & Gas,50102010
2,Oil & Gas Exploration and Production,50102020
3,Oil & Gas Refining and Marketing,50102030
4,Oil & Gas Drilling,50103010


In [22]:
# Acquiror
data = data.join(trbc.set_index('Title'), on='acquiror_industry')
data.rename(columns={'Hierarchical_Code': 'acquiror_code'}, inplace=True)

# Target
data = data.join(trbc.set_index('Title'), on='target_industry')
data.rename(columns={'Hierarchical_Code': 'target_code'}, inplace=True)

## dtypes

In [23]:
data.dtypes

id                              int64
announcement_date              object
deal_value                     object
acquiror_name                  object
acquiror_ticker                object
target_name                    object
target_nation                  object
acquiror_nation                object
target_status                  object
acquiror_industry              object
target_industry                object
shares_at_announcement        float64
shares_acquired               float64
consideration_offered          object
attitude                       object
acquisition_technique          object
acquiror_financial_advisor     object
target_financial_advisor       object
acquiror_code                 float64
target_code                   float64
dtype: object

In [24]:
# transforming announcement_date
data['announcement_date'] = pd.to_datetime(data['announcement_date'], format="%d/%m/%Y")

In [25]:
# transforming deal_value
rem_com = lambda x: x.replace(',', '') if type(x) == str else x # removing comma in string, leaving float
data['deal_value'] = data['deal_value'].apply(rem_com).astype(float)

In [31]:
# Transforming trbc codes

# Removing nas
data = data[data['acquiror_code'].notna()]
data = data[data['target_code'].notna()]

# Type transformation
data['acquiror_code'] = data['acquiror_code'].astype(int)
data['target_code'] = data['target_code'].astype(int)

# Modifying data

In [32]:
data.isna().sum()

id                                0
announcement_date                 0
deal_value                    14024
acquiror_name                     0
acquiror_ticker                  35
target_name                       1
target_nation                     1
acquiror_nation                   0
target_status                     0
acquiror_industry                 0
target_industry                   0
shares_at_announcement        28425
shares_acquired                   0
consideration_offered             1
attitude                          0
acquisition_technique             0
acquiror_financial_advisor    23981
target_financial_advisor      21279
acquiror_code                     0
target_code                       0
dtype: int64

In [33]:
# deal_value=not considered
# acquiror_ticker=remove
# target_name/target_nation/target_industry=remove
# target_status=remove
# shares_at_announcement=0
# consideration_offered=remove
# acquiror/target_financial_advisor=not considered

## Fill up missing values

In [34]:
# NA in 'shares_at_announcement' implies 0
data['shares_at_announcement'].fillna(0, inplace=True)

## Removing NAs

In [35]:
drop_na = ['acquiror_ticker', 'target_name', 'target_nation', 'target_status', 'target_industry',
           'consideration_offered']

In [36]:
for col in drop_na:
    data = data[data[col].notna()]

In [37]:
# 'Unspecified' in consideration_offered has to be removed -> we would lose 44% of our data
len(data[data['consideration_offered'] == 'Unspecified']) / len(data)

0.4448732836942414

## US only

In [38]:
# Raw data was filtered for nation of incorporation, some nations stayed in
data['acquiror_nation'].unique()

array(['United States', 'Canada', 'Russia', 'Philippines', 'Poland',
       'China (Mainland)', 'Netherlands', 'France', 'Japan', 'Hong Kong',
       'Malaysia', 'United Kingdom', 'Sweden', 'Switzerland', 'Hungary',
       'Bermuda', 'Taiwan', 'Argentina', 'Israel', 'Colombia', 'Thailand',
       'Brazil', 'Peru', 'New Zealand', 'Panama', 'Australia',
       'Singapore', 'United Arab Emirates', 'Cambodia', 'Ireland',
       'Ecuador', 'Czech Republic', 'Dominican Republic', 'Fiji',
       'South Korea', 'Vietnam', 'Kenya', 'Ukraine', 'Denmark',
       'Puerto Rico', 'Indonesia', 'U.S. Virgin Islands', 'Italy',
       'Malta'], dtype=object)

In [39]:
data = data[data['acquiror_nation'] == 'United States']

## Empty acquisitions

In [40]:
data[data['shares_acquired'] <= 0.0].shape # we have 1305 empty acquisitions

(1305, 20)

In [41]:
data = data[data['shares_acquired'] > 0.0]

In [43]:
data.head()

Unnamed: 0,id,announcement_date,deal_value,acquiror_name,acquiror_ticker,target_name,target_nation,acquiror_nation,target_status,acquiror_industry,target_industry,shares_at_announcement,shares_acquired,consideration_offered,attitude,acquisition_technique,acquiror_financial_advisor,target_financial_advisor,acquiror_code,target_code
0,1631732020,2005-01-01,12.0,Plains All American Pipeline LP,PAA,Shell Pipeline Co LP-Crude Oil Pipeline Assets...,United States,United States,Subsidiary,Oil & Gas Refining and Marketing,Oil & Gas Transportation Services,0.0,100.0,Cash|Cash Only,Friendly,Financial Acquiror|Divestiture,,,50102030,50103030
1,1653652020,2005-01-01,,Regis Corp,RGS,Scot Lewis Schools,United States,United States,Private,Personal Services,Professional & Business Education,0.0,100.0,Unspecified,Friendly,Not Applicable,,,54201030,63103010
3,1949421020,2005-01-01,,Google Inc,GOOG,PhatBits,United States,United States,Private,Online Services,Software,0.0,100.0,Unspecified,Friendly,Not Applicable,,,57201030,57201020
4,1652453040,2005-01-01,20.03,Graco Inc,GGG,Gusmer Europe SL,Spain,United States,Private,Industrial Machinery & Equipment,Commodity Chemicals,0.0,100.0,Cash|Cash Only,Friendly,Not Applicable,,,52102010,51101010
5,2721963020,2005-01-01,18220.52,Exxon Mobil Corp,XOM,Exxon Mobil Corp,United States,United States,Public,Oil & Gas Refining and Marketing,Oil & Gas Refining and Marketing,0.0,5.0,Cash|Cash Only,No Applicable,Privately Negotiated Purchase|Open Market Purc...,,,50102030,50102030


# Upload

In [46]:
data.to_csv('../MA_PREDICTOR/data/ma_data.csv', index=False)