In [97]:
import pandas as pd
import numpy as np
from datetime import datetime

# Raw data

In [98]:
data = pd.read_csv('../raw_data/210906_LeWagon_finalproject_v2.csv')
data.head(2)

Unnamed: 0,SDC Deal No,Date Announced,"Deal Value\n(USD, Millions)",Acquiror Full Name,Acquiror Primary Ticker Symbol,Target Full Name,Target Nation,Acquiror Nation,Target Public Status,Acquiror TRBC Industry,Target TRBC Industry,Percentage of Shares Held at Announcement,Percentage of Shares Acquired in Transaction,Consideration Offered\n('|'),Deal Attitude,Acquisition Techniques\n('|'),Acquiror Financial Advisors Name\n('|'),Target Financial Advisors Name\n('|')
0,1631732020,01/01/2005,12.0,Plains All American Pipeline LP,PAA,Shell Pipeline Co LP-Crude Oil Pipeline Assets...,United States,United States,Subsidiary,Oil & Gas Refining and Marketing,Oil & Gas Transportation Services,,100.0,Cash|Cash Only,Friendly,Financial Acquiror|Divestiture,,
1,1653652020,01/01/2005,,Regis Corp,RGS,Scot Lewis Schools,United States,United States,Private,Personal Services,Professional & Business Education,,100.0,Unspecified,Friendly,Not Applicable,,


# Adjust data

## Column names

In [99]:
data.columns

Index(['SDC Deal No', 'Date Announced', 'Deal Value\n(USD, Millions)',
       'Acquiror Full Name', 'Acquiror Primary Ticker Symbol',
       'Target Full Name', 'Target Nation', 'Acquiror Nation',
       'Target Public Status', 'Acquiror TRBC Industry',
       'Target TRBC Industry', 'Percentage of Shares Held at Announcement',
       'Percentage of Shares Acquired in Transaction',
       'Consideration Offered\n('|')', 'Deal Attitude',
       'Acquisition Techniques\n('|')',
       'Acquiror Financial Advisors Name\n('|')',
       'Target Financial Advisors Name\n('|')'],
      dtype='object')

In [100]:
new_columns = [
    'id', 'announcement_date', 'deal_value', 'acquiror_name', 'acquiror_ticker', 'target_name',
    'target_nation', 'acquiror_nation', 'target_status', 'acquiror_industry', 'target_industry',
    'shares_at_announcement', 'shares_acquired', 'consideration_offered', 'attitude',
    'acquisition_technique', 'acquiror_financial_advisor', 'target_financial_advisor'
]

In [101]:
data.columns = new_columns

## dtypes

In [102]:
data.dtypes

id                              int64
announcement_date              object
deal_value                     object
acquiror_name                  object
acquiror_ticker                object
target_name                    object
target_nation                  object
acquiror_nation                object
target_status                  object
acquiror_industry              object
target_industry                object
shares_at_announcement        float64
shares_acquired               float64
consideration_offered          object
attitude                       object
acquisition_technique          object
acquiror_financial_advisor     object
target_financial_advisor       object
dtype: object

In [103]:
# transforming announcement_date
data['announcement_date'] = pd.to_datetime(data['announcement_date'], format="%d/%m/%Y")

In [104]:
# transforming deal_value
rem_com = lambda x: x.replace(',', '') if type(x) == str else x # removing comma in string, leaving float
data['deal_value'] = data['deal_value'].apply(rem_com).astype(float)

## Adding TRBC (industry classifier)

# Modifying data

In [105]:
data.isna().sum()

id                                0
announcement_date                 0
deal_value                    14026
acquiror_name                     0
acquiror_ticker                  35
target_name                       2
target_nation                     2
acquiror_nation                   0
target_status                     1
acquiror_industry                 0
target_industry                   2
shares_at_announcement        28428
shares_acquired                   0
consideration_offered             1
attitude                          0
acquisition_technique             0
acquiror_financial_advisor    23983
target_financial_advisor      21281
dtype: int64

In [106]:
# deal_value=not considered
# acquiror_ticker=remove
# target_name/target_nation/target_industry=remove
# target_status=remove
# shares_at_announcement=0
# consideration_offered=remove
# acquiror/target_financial_advisor=not considered

## Fill up missing values

In [107]:
# NA in 'shares_at_announcement' implies 0
data['shares_at_announcement'].fillna(0, inplace=True)

## Removing NAs

In [109]:
drop_na = ['acquiror_ticker', 'target_name', 'target_nation', 'target_status', 'target_industry',
           'consideration_offered']

In [110]:
for col in drop_na:
    data = data[data[col].notna()]

## US only

In [115]:
# Raw data was filtered for nation of incorporation, some nations stayed in
data['acquiror_nation'].unique()

array(['United States', 'Canada', 'Russia', 'Philippines', 'Poland',
       'China (Mainland)', 'Netherlands', 'France', 'Japan', 'Hong Kong',
       'Malaysia', 'United Kingdom', 'Sweden', 'Switzerland', 'Hungary',
       'Bermuda', 'Taiwan', 'Argentina', 'Israel', 'Colombia', 'Thailand',
       'Brazil', 'Peru', 'New Zealand', 'Panama', 'Australia',
       'Singapore', 'United Arab Emirates', 'Cambodia', 'Ireland',
       'Ecuador', 'Czech Republic', 'Dominican Republic', 'Fiji',
       'South Korea', 'Vietnam', 'Kenya', 'Ukraine', 'Denmark',
       'Puerto Rico', 'Indonesia', 'U.S. Virgin Islands', 'Italy',
       'Malta'], dtype=object)

In [116]:
data = data[data['acquiror_nation'] == 'United States']

## Merge with TRBC classification