In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv("/Users/rickysoh/OneDrive - Bond University/PG/Replication/Sample_01.csv", header=0, na_values=-99.99)

In [2]:
#Added Duration_effective, Duration_unconditional and Duration_diff in days unit to create dummy variable which deals completes within one year since announcement.
data.head()

Unnamed: 0,Date Announced,Duration_Effective,Dummy_Effective,Date Effective,Duration_Unconditional,Duration_diff,Dummy_Unconditional,Date Effective/Unconditional,Target Name,Target Nation,Acquiror Name,Acquiror Nation,Status,ROCE,Tobin's Q,IDDummy,Firm Size,Tangibility
0,7/1/00,70,1,17/3/00,42,28,1,18/2/00,Univak PLC,United Kingdom,Ferraris Group PLC,United Kingdom,Completed,0.01173,1.658,0,16.266516,0.186983
1,12/1/00,37,1,18/2/00,37,0,1,18/2/00,Best Software Inc,United States,Sage Group PLC,United Kingdom,Completed,0.036074,6.279,1,18.261685,0.393505
2,13/1/00,22,1,4/2/00,8,14,1,21/1/00,Moorepay Group PLC,United Kingdom,Rebus Group Ltd,United Kingdom,Completed,0.02026,6.995,1,16.204273,0.235138
3,13/1/00,180,1,11/7/00,154,26,1,15/6/00,Racal Electronics PLC,United Kingdom,Thomson-CSF,France,Completed,0.02161,2.955,0,21.315045,0.197543
4,24/1/00,46,1,10/3/00,39,7,1,3/3/00,BTP PLC,United Kingdom,Clariant AG,Switzerland,Completed,0.038192,2.426,1,20.608865,0.120417


## Variables being used in the Paper:
### ROCE, AER, Tobin's Q, Sgrowth, Liquidity, Leverage, GRDummy, IDDummy, Firm Size, FCF, Tangibility, Firm Age 
### Yes(es) below means the variables that can be obtained from SDC platinum or at least the data I have obtained
![](Variables.png)


# Below attached Danbolt's descriptive stats
![](Danbolt_des_stats.png)

In [3]:
#From the data below, it is suggesting that all the variables are not representing well for replication since it is very different from what Danbolt has
data.describe()


Unnamed: 0,Duration_Effective,Dummy_Effective,Duration_Unconditional,Duration_diff,Dummy_Unconditional,ROCE,Tobin's Q,IDDummy,Firm Size,Tangibility
count,220.0,220.0,220.0,220.0,220.0,220.0,220.0,220.0,220.0,220.0
mean,-4521.931818,0.895455,-3004.7,-1517.231818,0.913636,0.080002,5.788305,0.686364,18.811083,0.308293
std,12387.797613,0.306665,10291.934729,7537.006458,0.281541,0.495503,32.055435,0.465029,1.851791,0.536708
min,-38797.0,0.0,-38797.0,-38547.0,0.0,-0.668509,0.0,0.0,14.978661,-1.698083
25%,41.0,1.0,29.0,0.0,1.0,0.004886,0.952,0.0,17.517401,0.135367
50%,67.5,1.0,43.5,10.0,1.0,0.015657,1.7295,1.0,18.621136,0.241589
75%,95.0,1.0,63.0,33.25,1.0,0.030297,3.4825,1.0,19.830381,0.38729
max,1179.0,1.0,486.0,1122.0,1.0,6.344606,465.8,1.0,25.941178,5.146281


In [4]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

df = data[["Status", "ROCE", "Tobin's Q", "IDDummy", "Firm Size", "Tangibility"]]
df.rename(columns={"Tobin's Q":'TobinsQ',
                    "Firm Size": "FirmSize"
                    }, inplace=True)
                    
# converting to binary data
df_one = pd.get_dummies(df["Status"])
df["Status"] = df_one

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Status"] = df_one


## Logit and Probit model for completion prediction (regardless of duration to finish)

In [5]:
model = smf.logit('Status ~ ROCE + TobinsQ + IDDummy + FirmSize + Tangibility', data=df)
logit_m = model.fit()
logit_m.summary()

Optimization terminated successfully.
         Current function value: 0.271755
         Iterations 7


0,1,2,3
Dep. Variable:,Status,No. Observations:,220.0
Model:,Logit,Df Residuals:,214.0
Method:,MLE,Df Model:,5.0
Date:,"Tue, 19 Jul 2022",Pseudo R-squ.:,0.04037
Time:,11:19:09,Log-Likelihood:,-59.786
converged:,True,LL-Null:,-62.301
Covariance Type:,nonrobust,LLR p-value:,0.4122

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7.5161,2.495,3.012,0.003,2.626,12.406
ROCE,-0.1044,0.357,-0.292,0.770,-0.805,0.596
TobinsQ,0.0008,0.008,0.097,0.922,-0.016,0.017
IDDummy,-0.3960,0.598,-0.663,0.507,-1.567,0.775
FirmSize,-0.2497,0.126,-1.981,0.048,-0.497,-0.003
Tangibility,-0.0201,0.399,-0.050,0.960,-0.802,0.762


In [6]:
model2 = smf.probit('Status ~ ROCE + TobinsQ + IDDummy + FirmSize + Tangibility', data=df)
probit_m = model2.fit()
probit_m.summary()

Optimization terminated successfully.
         Current function value: 0.270984
         Iterations 6


0,1,2,3
Dep. Variable:,Status,No. Observations:,220.0
Model:,Probit,Df Residuals:,214.0
Method:,MLE,Df Model:,5.0
Date:,"Tue, 19 Jul 2022",Pseudo R-squ.:,0.04309
Time:,11:19:09,Log-Likelihood:,-59.616
converged:,True,LL-Null:,-62.301
Covariance Type:,nonrobust,LLR p-value:,0.3725

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.1278,1.316,3.136,0.002,1.548,6.708
ROCE,-0.0823,0.204,-0.403,0.687,-0.483,0.318
TobinsQ,0.0005,0.005,0.108,0.914,-0.008,0.009
IDDummy,-0.1886,0.289,-0.653,0.514,-0.755,0.378
FirmSize,-0.1350,0.068,-1.999,0.046,-0.267,-0.003
Tangibility,-0.0188,0.219,-0.086,0.932,-0.448,0.410


# Model prediction for completion in 1 year (Effective)

In [7]:
#Using effective date as dependent variable
df["Status"] = data["Dummy_Effective"]

model = smf.logit('Status ~ ROCE + TobinsQ + IDDummy + FirmSize + Tangibility', data=df)
logit_m = model.fit()
print(logit_m.summary())


Optimization terminated successfully.
         Current function value: 0.321938
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                 Status   No. Observations:                  220
Model:                          Logit   Df Residuals:                      214
Method:                           MLE   Df Model:                            5
Date:                Tue, 19 Jul 2022   Pseudo R-squ.:                 0.03887
Time:                        11:19:09   Log-Likelihood:                -70.826
converged:                       True   LL-Null:                       -73.691
Covariance Type:            nonrobust   LLR p-value:                    0.3336
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       7.2173      2.254      3.202      0.001       2.800      11.635
ROCE           -0.0365    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Status"] = data["Dummy_Effective"]


In [8]:
model2 = smf.probit('Status ~ ROCE + TobinsQ + IDDummy + FirmSize + Tangibility', data=df)
probit_m = model2.fit()
print(probit_m.summary())

Optimization terminated successfully.
         Current function value: 0.321179
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:                 Status   No. Observations:                  220
Model:                         Probit   Df Residuals:                      214
Method:                           MLE   Df Model:                            5
Date:                Tue, 19 Jul 2022   Pseudo R-squ.:                 0.04113
Time:                        11:19:09   Log-Likelihood:                -70.659
converged:                       True   LL-Null:                       -73.691
Covariance Type:            nonrobust   LLR p-value:                    0.3002
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       4.0628      1.222      3.323      0.001       1.667       6.459
ROCE           -0.0396    

# Model prediction for completion in 1 year (Unconditional)

In [9]:
#Using effective date as dependent variable
df["Status"] = data["Dummy_Unconditional"]

model = smf.logit('Status ~ ROCE + TobinsQ + IDDummy + FirmSize + Tangibility', data=df)
logit_m = model.fit()
print(logit_m.summary())

Optimization terminated successfully.
         Current function value: 0.278316
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                 Status   No. Observations:                  220
Model:                          Logit   Df Residuals:                      214
Method:                           MLE   Df Model:                            5
Date:                Tue, 19 Jul 2022   Pseudo R-squ.:                 0.05348
Time:                        11:19:09   Log-Likelihood:                -61.230
converged:                       True   LL-Null:                       -64.689
Covariance Type:            nonrobust   LLR p-value:                    0.2267
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       8.3088      2.459      3.379      0.001       3.490      13.128
ROCE           -0.0641    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Status"] = data["Dummy_Unconditional"]


In [10]:
model2 = smf.probit('Status ~ ROCE + TobinsQ + IDDummy + FirmSize + Tangibility', data=df)
probit_m = model2.fit()
print(probit_m.summary())

Optimization terminated successfully.
         Current function value: 0.277496
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:                 Status   No. Observations:                  220
Model:                         Probit   Df Residuals:                      214
Method:                           MLE   Df Model:                            5
Date:                Tue, 19 Jul 2022   Pseudo R-squ.:                 0.05627
Time:                        11:19:09   Log-Likelihood:                -61.049
converged:                       True   LL-Null:                       -64.689
Covariance Type:            nonrobust   LLR p-value:                    0.2006
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       4.5594      1.303      3.500      0.000       2.006       7.113
ROCE           -0.0619    