In [91]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.dummy import DummyRegressor
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.linear_model import Ridge

In [92]:
data = pd.read_csv('../../MA_PREDICTOR/data/ma_data_car_clean.csv')

In [93]:
data.drop('month', inplace=True, axis=1)

In [94]:
horizons = [1, 3, 5, 10]
for horizon in horizons:
    data = data[(abs(data[f'car_{horizon}']) <= 0.3)]

In [95]:
data

Unnamed: 0,consideration_offered,shares_acquired,shares_at_announcement,acquiror_code,target_code,target_status,acquisition_count,bidder_count,cross_border,relatedness,economic_sector_ac,business_sector_ac,economic_sector_target,business_sector_target,car_1,car_3,car_5,car_10
0,Other,full,no,55101010,55301010,others,5,1,cross_border,economic_sector,Financials,Banking & Investment Services,Financials,Insurance,-0.002947,-0.000599,-0.019639,-0.007640
1,Other,full,no,55101010,55301010,others,6,1,cross_border,economic_sector,Financials,Banking & Investment Services,Financials,Insurance,-0.002947,-0.000599,-0.019639,-0.007640
2,Other,full,no,52102050,53205020,others,22,1,cross_border,not_related,Industrials,Industrial Goods,Consumer Cyclicals,Cyclical Consumer Products,0.006169,0.000123,-0.020599,-0.001150
3,Other,full,no,54201010,56201040,others,34,1,cross_border,not_related,Consumer Non-Cyclicals,Personal & Household Products & Services,Healthcare,Pharmaceuticals & Medical Research,0.007969,0.029287,0.009896,0.028176
4,Other,full,no,52102050,53205020,others,23,1,cross_border,not_related,Industrials,Industrial Goods,Consumer Cyclicals,Cyclical Consumer Products,0.006169,0.000123,-0.020599,-0.001150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15921,Cash,full,no,52102010,59103010,others,15,1,cross_border,not_related,Industrials,Industrial Goods,Utilities,Utilities,0.020108,0.052302,0.070522,0.093236
15922,Other,full,no,53203020,53205020,others,60,1,cross_border,business_sector,Consumer Cyclicals,Cyclical Consumer Products,Consumer Cyclicals,Cyclical Consumer Products,-0.040156,-0.034438,-0.058843,-0.028895
15923,Other,full,no,54301020,57201010,others,52,1,national,not_related,Consumer Non-Cyclicals,Food & Drug Retailing,Technology,Software & IT Services,-0.003545,-0.022961,-0.024630,-0.004876
15924,Other,full,no,55101010,52203030,others,6,1,cross_border,not_related,Financials,Banking & Investment Services,Industrials,Industrial & Commercial Services,-0.025992,-0.009654,-0.019414,-0.025331


In [96]:
data.columns

Index(['consideration_offered', 'shares_acquired', 'shares_at_announcement',
       'acquiror_code', 'target_code', 'target_status', 'acquisition_count',
       'bidder_count', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target', 'car_1', 'car_3', 'car_5', 'car_10'],
      dtype='object')

In [97]:
y_1=data['car_1']

In [98]:
y_3=data['car_3']

In [99]:
y_5=data['car_5']

In [100]:
y_10=data['car_10']

In [101]:
X=data[['consideration_offered', 'shares_acquired', 'shares_at_announcement',
       'acquiror_code', 'target_code', 'target_status', 'acquisition_count',
       'bidder_count', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target']]

# Car_1

## Splitting of the dataset

In [102]:
#Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y_1, test_size=0.3, random_state=0)

## Pipeline and GridSearch

In [103]:
cat_transformer = OneHotEncoder(handle_unknown='ignore')

In [104]:
num_transformer= MinMaxScaler()

In [105]:
cat_features=['consideration_offered', 'shares_acquired', 'shares_at_announcement',
       'acquiror_code', 'target_code', 'target_status', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target']

In [106]:
preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, ['acquisition_count']),
    ('cat_transformer', cat_transformer, cat_features)], remainder='passthrough')

## Baseline

In [107]:
baseline_model_mean = DummyRegressor(strategy="mean") 

In [108]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='r2').mean()

-0.0005246333864007635

In [109]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean()

-0.0018494774855108622

## ElasticNet

In [110]:
# Hyperparameter Grid
grid = {'model__alpha': [0.1, 0.2, 0.3, 0.4],
        'model__l1_ratio': [0,0.05, 0.1, 0.15, 0.2],
        'model__max_iter': [10000]}

In [111]:
# Combine preprocessor and linear model in pipeline
pipe = Pipeline([
    ('preprocessing', preprocessor),
    ('model', ElasticNet())])

In [112]:
search = GridSearchCV(pipe, 
                      grid, 
                      scoring = ['neg_mean_squared_error', 'r2','neg_mean_absolute_error'],
                      refit='neg_mean_squared_error',
                      cv = 5,
                      n_jobs=-1) 

In [113]:
# Fit data to Grid Search
search.fit(X_train,y_train);

  model = cd_fast.sparse_enet_coordinate_descent(


In [114]:
# Best score 1
search.best_score_

-0.001847028794143277

In [115]:
search.best_params_

{'model__alpha': 0.2, 'model__l1_ratio': 0, 'model__max_iter': 10000}

# Car_3

## Splitting of the dataset

In [116]:
#Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y_3, test_size=0.3, random_state=0)

## Pipeline and GridSearch

In [117]:
cat_transformer = OneHotEncoder(handle_unknown='ignore')

In [118]:
num_transformer= MinMaxScaler()

In [119]:
cat_features=['consideration_offered', 'shares_acquired', 'shares_at_announcement',
       'acquiror_code', 'target_code', 'target_status', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target']

In [120]:
preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, ['acquisition_count']),
    ('cat_transformer', cat_transformer, cat_features)], remainder='passthrough')

## Baseline

In [121]:
baseline_model_mean = DummyRegressor(strategy="mean") 

In [122]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='r2').mean()

-0.0006164455356379328

In [123]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean()

-0.0028219917186760074

## ElasticNet

In [124]:
# Hyperparameter Grid
grid = {'model__alpha': [0.1, 0.2, 0.3, 0.4],
        'model__l1_ratio': [0,0.05, 0.1, 0.15, 0.2],
        'model__max_iter': [10000]}

In [125]:
# Combine preprocessor and linear model in pipeline
pipe = Pipeline([
    ('preprocessing', preprocessor),
    ('model', ElasticNet())])

In [126]:
search = GridSearchCV(pipe, 
                      grid, 
                      scoring = ['neg_mean_squared_error', 'r2','neg_mean_absolute_error'],
                      refit='neg_mean_squared_error',
                      cv = 5,
                      n_jobs=-1) 

In [127]:
# Fit data to Grid Search
search.fit(X_train,y_train);

  model = cd_fast.sparse_enet_coordinate_descent(


In [128]:
# Best score 1
search.best_score_

-0.0028190328178341975

In [129]:
search.best_params_

{'model__alpha': 0.3, 'model__l1_ratio': 0, 'model__max_iter': 10000}

# Car_5

## Splitting of the dataset

In [130]:
#Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y_5, test_size=0.3, random_state=0)

## Pipeline and GridSearch

In [131]:
cat_transformer = OneHotEncoder(handle_unknown='ignore')

In [132]:
num_transformer= MinMaxScaler()

In [133]:
cat_features=['consideration_offered', 'shares_acquired', 'shares_at_announcement',
       'acquiror_code', 'target_code', 'target_status', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target']

In [134]:
preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, ['acquisition_count']),
    ('cat_transformer', cat_transformer, cat_features)], remainder='passthrough')

## Baseline

In [135]:
baseline_model_mean = DummyRegressor(strategy="mean") 

In [136]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='r2').mean()

-0.00026360045456024216

In [137]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean()

-0.003547537274034695

## ElasticNet

In [138]:
# Hyperparameter Grid
grid = {'model__alpha': [0.1, 0.2, 0.3, 0.4],
        'model__l1_ratio': [0,0.05, 0.1, 0.15, 0.2],
        'model__max_iter': [10000]}

In [139]:
# Combine preprocessor and linear model in pipeline
pipe = Pipeline([
    ('preprocessing', preprocessor),
    ('model', ElasticNet())])

In [140]:
search = GridSearchCV(pipe, 
                      grid, 
                      scoring = ['neg_mean_squared_error', 'r2','neg_mean_absolute_error'],
                      refit='neg_mean_squared_error',
                      cv = 5,
                      n_jobs=-1) 

In [141]:
# Fit data to Grid Search
search.fit(X_train,y_train);

  model = cd_fast.sparse_enet_coordinate_descent(


In [142]:
# Best score 1
search.best_score_

-0.0035436052687067543

In [143]:
search.best_params_

{'model__alpha': 0.3, 'model__l1_ratio': 0, 'model__max_iter': 10000}

# Car_10

## Splitting of the dataset

In [144]:
#Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y_10, test_size=0.3, random_state=0)

## Pipeline and GridSearch

In [145]:
cat_transformer = OneHotEncoder(handle_unknown='ignore')

In [146]:
num_transformer= MinMaxScaler()

In [147]:
cat_features=['consideration_offered', 'shares_acquired', 'shares_at_announcement',
       'acquiror_code', 'target_code', 'target_status', 'cross_border', 'relatedness', 'economic_sector_ac',
       'business_sector_ac', 'economic_sector_target',
       'business_sector_target']

In [148]:
preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, ['acquisition_count']),
    ('cat_transformer', cat_transformer, cat_features)], remainder='passthrough')

## Baseline

In [149]:
baseline_model_mean = DummyRegressor(strategy="mean") 

In [150]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='r2').mean()

-0.0004421487262225465

In [151]:
cross_val_score(baseline_model_mean, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean()

-0.0056061923784172995

## ElasticNet

In [152]:
# Hyperparameter Grid
grid = {'model__alpha': [0.1, 0.2, 0.3, 0.4],
        'model__l1_ratio': [0,0.05, 0.1, 0.15, 0.2],
        'model__max_iter': [10000]}

In [153]:
# Combine preprocessor and linear model in pipeline
pipe = Pipeline([
    ('preprocessing', preprocessor),
    ('model', ElasticNet())])

In [154]:
search = GridSearchCV(pipe, 
                      grid, 
                      scoring = ['neg_mean_squared_error', 'r2','neg_mean_absolute_error'],
                      refit='neg_mean_squared_error',
                      cv = 5,
                      n_jobs=-1) 

In [155]:
# Fit data to Grid Search
search.fit(X_train,y_train);

  model = cd_fast.sparse_enet_coordinate_descent(


In [156]:
# Best score 1
search.best_score_

-0.005598300735268118

In [157]:
search.best_params_

{'model__alpha': 0.2, 'model__l1_ratio': 0, 'model__max_iter': 10000}

  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(


  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
