# Importing Libraries


In [51]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Import and Overview the data


In [52]:
df = pd.read_csv('Acquisitions.csv')

In [53]:
df.head()

Unnamed: 0,Acquisitions ID,Acquired Company,Acquiring Company,Year of acquisition announcement,Deal announced on,Price,Status,Terms,Acquisition Profile,News,News Link
0,EMC acquired Data Domain in 2009,Data Domain,EMC,2009,8/07/2009,"$2,100,000,000",Undisclosed,Cash,http://www.crunchbase.com/acquisition/5dc676a1...,EMC acquired Data Domain,http://www.businesswire.com/news/home/20090708...
1,AOL acquired Quigo in 2007,Quigo,AOL,2007,7/11/2007,"$363,000,000",Undisclosed,Cash,http://www.crunchbase.com/acquisition/ad686848...,,
2,Cisco acquired PostPath in 2008,PostPath,Cisco Systems,2008,27/08/2008,"$215,000,000",Undisclosed,Undisclosed,http://www.crunchbase.com/acquisition/6a18cc70...,Cisco Announces Definitive Agreement to Acquir...,http://newsroom.cisco.com/dlls/2008/corp_08270...
3,Oracle acquired BigMachines in 2013,BigMachines,Oracle,2013,24/10/2013,"$400,000,000",Undisclosed,Undisclosed,http://www.crunchbase.com/acquisition/e856d7c1...,Oracle Corporation acquired BigMachines,http://www.businessinsider.in/Source-Oracle-Pa...
4,Yahoo! acquired Snip.it in 2013,Snip.it,Yahoo,2013,22/01/2013,"$10,000,000",Undisclosed,"Cash, Stock",http://www.crunchbase.com/acquisition/e2dcb91d...,Yahoo Buys Snip.it The Pinterest-Meets-News St...,http://techcrunch.com/2013/01/22/yahoo-buys-sn...


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 336 entries, 0 to 335
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   Acquisitions ID                   336 non-null    object
 1   Acquired Company                  336 non-null    object
 2   Acquiring Company                 336 non-null    object
 3   Year of acquisition announcement  336 non-null    int64 
 4   Deal announced on                 336 non-null    object
 5   Price                             336 non-null    object
 6   Status                            335 non-null    object
 7   Terms                             336 non-null    object
 8   Acquisition Profile               335 non-null    object
 9   News                              314 non-null    object
 10  News Link                         314 non-null    object
dtypes: int64(1), object(10)
memory usage: 29.0+ KB


In [55]:
# Check for missing values
print(df.isnull().sum())


Acquisitions ID                      0
Acquired Company                     0
Acquiring Company                    0
Year of acquisition announcement     0
Deal announced on                    0
Price                                0
Status                               1
Terms                                0
Acquisition Profile                  1
News                                22
News Link                           22
dtype: int64


In [56]:
value_counts_1 = df['Status'].value_counts()
value_counts_2 = df['Terms'].value_counts()

# Print unique values and their frequencies
print(value_counts_1)
print('------------------')
print(value_counts_2)

Status
Undisclosed    310
Complete        16
Pending          9
Name: count, dtype: int64
------------------
Terms
Undisclosed    148
Cash           128
Cash, Stock     36
Stock           24
Name: count, dtype: int64


#  Preprocessing Data

### Convert Price to numerical data

In [57]:
# Convert 'Price' column to numeric (remove $ and commas if they exist)
df['Price'] = df['Price'].replace(r'[\$,]', '', regex=True)
df['Price'] = pd.to_numeric(df['Price'], errors='coerce').astype('float64')


### Filling Null Values

In [58]:
# Get mode of the column
mode_value = df['Status'].mode()[0]

# Fill NaN values with the mode
df['Status'].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Status'].fillna(mode_value, inplace=True)


### One-Hot Encoding 

In [59]:
 #One-hot encode the 'Status' column
df = pd.get_dummies(df, columns=['Status'], drop_first=False)
# One-hot encode the 'Terms' column
df = pd.get_dummies(df, columns=['Terms'], drop_first=False)

In [60]:
df.head()

Unnamed: 0,Acquisitions ID,Acquired Company,Acquiring Company,Year of acquisition announcement,Deal announced on,Price,Acquisition Profile,News,News Link,Status_Complete,Status_Pending,Status_Undisclosed,Terms_Cash,"Terms_Cash, Stock",Terms_Stock,Terms_Undisclosed
0,EMC acquired Data Domain in 2009,Data Domain,EMC,2009,8/07/2009,2100000000.0,http://www.crunchbase.com/acquisition/5dc676a1...,EMC acquired Data Domain,http://www.businesswire.com/news/home/20090708...,False,False,True,True,False,False,False
1,AOL acquired Quigo in 2007,Quigo,AOL,2007,7/11/2007,363000000.0,http://www.crunchbase.com/acquisition/ad686848...,,,False,False,True,True,False,False,False
2,Cisco acquired PostPath in 2008,PostPath,Cisco Systems,2008,27/08/2008,215000000.0,http://www.crunchbase.com/acquisition/6a18cc70...,Cisco Announces Definitive Agreement to Acquir...,http://newsroom.cisco.com/dlls/2008/corp_08270...,False,False,True,False,False,False,True
3,Oracle acquired BigMachines in 2013,BigMachines,Oracle,2013,24/10/2013,400000000.0,http://www.crunchbase.com/acquisition/e856d7c1...,Oracle Corporation acquired BigMachines,http://www.businessinsider.in/Source-Oracle-Pa...,False,False,True,False,False,False,True
4,Yahoo! acquired Snip.it in 2013,Snip.it,Yahoo,2013,22/01/2013,10000000.0,http://www.crunchbase.com/acquisition/e2dcb91d...,Yahoo Buys Snip.it The Pinterest-Meets-News St...,http://techcrunch.com/2013/01/22/yahoo-buys-sn...,False,False,True,False,True,False,False


### Drop unnecessary columns

In [61]:
# Apply transformation to remove 'Cash,Stock' from the 'Terms' column
combined_mask = df['Terms_Cash, Stock'] == True
df.loc[combined_mask, 'Terms_Cash'] = True
df.loc[combined_mask, 'Terms_Stock'] = True
    
# Drop combined column
df = df.drop('Terms_Cash, Stock', axis=1)

In [None]:
#Drop unuseful columns that are not needed for the analysis
df.drop(columns=["Acquisition Profile", "News", "News Link"], inplace=True)

In [63]:
# Recheck after conversion
print("\n🔹 Data Types After Converting 'Price':")
print(df.dtypes)


🔹 Data Types After Converting 'Price':
Acquisitions ID                      object
Acquired Company                     object
Acquiring Company                    object
Year of acquisition announcement      int64
Deal announced on                    object
Price                               float64
Status_Complete                        bool
Status_Pending                         bool
Status_Undisclosed                     bool
Terms_Cash                             bool
Terms_Stock                            bool
Terms_Undisclosed                      bool
dtype: object


## Extract Date Features

In [64]:
# Convert 'Deal announced on' to datetime
df['Deal_date'] = pd.to_datetime(df['Deal announced on'], dayfirst=True, errors='coerce')


# Extract day, month, and day of week
df['Deal_day'] = df['Deal_date'].dt.day
df['Deal_month'] = df['Deal_date'].dt.month
df['Deal_dayofweek'] = df['Deal_date'].dt.dayofweek  # Monday=0, Sunday=6

# Drop combined column
df = df.drop('Deal announced on', axis=1)
df = df.drop('Deal_date', axis=1)

In [65]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 336 entries, 0 to 335
Data columns (total 14 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Acquisitions ID                   336 non-null    object 
 1   Acquired Company                  336 non-null    object 
 2   Acquiring Company                 336 non-null    object 
 3   Year of acquisition announcement  336 non-null    int64  
 4   Price                             336 non-null    float64
 5   Status_Complete                   336 non-null    bool   
 6   Status_Pending                    336 non-null    bool   
 7   Status_Undisclosed                336 non-null    bool   
 8   Terms_Cash                        336 non-null    bool   
 9   Terms_Stock                       336 non-null    bool   
 10  Terms_Undisclosed                 336 non-null    bool   
 11  Deal_day                          335 non-null    float64
 12  Deal_mon

In [66]:
df.head()

Unnamed: 0,Acquisitions ID,Acquired Company,Acquiring Company,Year of acquisition announcement,Price,Status_Complete,Status_Pending,Status_Undisclosed,Terms_Cash,Terms_Stock,Terms_Undisclosed,Deal_day,Deal_month,Deal_dayofweek
0,EMC acquired Data Domain in 2009,Data Domain,EMC,2009,2100000000.0,False,False,True,True,False,False,8.0,7.0,2.0
1,AOL acquired Quigo in 2007,Quigo,AOL,2007,363000000.0,False,False,True,True,False,False,7.0,11.0,2.0
2,Cisco acquired PostPath in 2008,PostPath,Cisco Systems,2008,215000000.0,False,False,True,False,False,True,27.0,8.0,2.0
3,Oracle acquired BigMachines in 2013,BigMachines,Oracle,2013,400000000.0,False,False,True,False,False,True,24.0,10.0,3.0
4,Yahoo! acquired Snip.it in 2013,Snip.it,Yahoo,2013,10000000.0,False,False,True,True,True,False,22.0,1.0,1.0
