## Import Common Package

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, PowerTransformer, PolynomialFeatures, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier, XGBRegressor

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

### Import Data

In [2]:
df = pd.read_csv('InsuranceClean.csv')
pd.options.display.max_columns = 999
pd.set_option('display.float_format', lambda x: '%.2f' % x) #Change Decimal
df.head()

Unnamed: 0,Id,Age,Job,Marital,Education,Default,Balance,HHInsurance,CarLoan,Communication,LastContactDay,LastContactMonth,NoOfContacts,DaysPassed,PrevAttempts,CarInsurance,CallDuration,age_group,LastContactDayGroup,LastContactMonth_Number,NoOfContactsGroup,CallDurationGroup
0,1,32,management,single,tertiary,0,1218,1,0,telephone,28,jan,2,-1,0,0,70,31-40 Years,End of Month,1,1 - 10 times,Below Average CallDuration
1,2,32,blue-collar,married,primary,0,1156,1,0,cellular,26,may,5,-1,0,0,185,31-40 Years,End of Month,5,1 - 10 times,Below Average CallDuration
2,3,29,management,single,tertiary,0,637,1,0,cellular,3,jun,1,119,1,1,340,21-30 Years,Beginning of Month,6,One times,Below Average CallDuration
3,4,25,student,single,primary,0,373,1,0,cellular,11,may,2,-1,0,1,819,21-30 Years,Beginning of Month,5,1 - 10 times,Above Average CallDuration
4,5,30,management,married,tertiary,0,2694,0,0,cellular,3,jun,1,-1,0,0,192,21-30 Years,Beginning of Month,6,One times,Below Average CallDuration


### Data Preparation

In [3]:
df_1 = df.copy()

In [4]:
df_1.isna().sum()

Id                         0
Age                        0
Job                        0
Marital                    0
Education                  0
Default                    0
Balance                    0
HHInsurance                0
CarLoan                    0
Communication              0
LastContactDay             0
LastContactMonth           0
NoOfContacts               0
DaysPassed                 0
PrevAttempts               0
CarInsurance               0
CallDuration               0
age_group                  0
LastContactDayGroup        0
LastContactMonth_Number    0
NoOfContactsGroup          0
CallDurationGroup          0
dtype: int64

In [5]:
# Remove ID, LastContactMonth, age_group, LastContactDayGroup, NoOfContactsGroup, CallDurationGroup
df_1.drop(columns = ['Id', 'LastContactMonth', 'age_group', 'LastContactDayGroup', 'NoOfContactsGroup', 'CallDurationGroup'], inplace = True)

In [6]:
df_1.head()

Unnamed: 0,Age,Job,Marital,Education,Default,Balance,HHInsurance,CarLoan,Communication,LastContactDay,NoOfContacts,DaysPassed,PrevAttempts,CarInsurance,CallDuration,LastContactMonth_Number
0,32,management,single,tertiary,0,1218,1,0,telephone,28,2,-1,0,0,70,1
1,32,blue-collar,married,primary,0,1156,1,0,cellular,26,5,-1,0,0,185,5
2,29,management,single,tertiary,0,637,1,0,cellular,3,1,119,1,1,340,6
3,25,student,single,primary,0,373,1,0,cellular,11,2,-1,0,1,819,5
4,30,management,married,tertiary,0,2694,0,0,cellular,3,1,-1,0,0,192,6


In [7]:
### One Hot Encoding Column fuel, seller_type, transmission (don't have levelling)
df_1 =  pd.get_dummies(data=df_1, columns=['Job', 'Marital', 'Education', 'Communication'])

### Check Imbalance Data

In [8]:
df_1['CarInsurance'].value_counts()

0    2396
1    1604
Name: CarInsurance, dtype: int64

In [9]:
pd.crosstab(index=df_1['CarInsurance'], columns='count', normalize=True)*100

col_0,count
CarInsurance,Unnamed: 1_level_1
0,59.9
1,40.1


### Splitting Data

In [10]:
df_1.head()

Unnamed: 0,Age,Default,Balance,HHInsurance,CarLoan,LastContactDay,NoOfContacts,DaysPassed,PrevAttempts,CarInsurance,CallDuration,LastContactMonth_Number,Job_admin.,Job_blue-collar,Job_entrepreneur,Job_housemaid,Job_management,Job_retired,Job_self-employed,Job_services,Job_student,Job_technician,Job_unemployed,Marital_divorced,Marital_married,Marital_single,Education_primary,Education_secondary,Education_tertiary,Communication_cellular,Communication_telephone
0,32,0,1218,1,0,28,2,-1,0,0,70,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1
1,32,0,1156,1,0,26,5,-1,0,0,185,5,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0
2,29,0,637,1,0,3,1,119,1,1,340,6,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0
3,25,0,373,1,0,11,2,-1,0,1,819,5,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0
4,30,0,2694,0,0,3,1,-1,0,0,192,6,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0


In [11]:
X = df_1.drop(columns='CarInsurance')
y = df_1['CarInsurance']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = .20, random_state = 42)

Dataset is balance

We will focus to the accuracy score

## Machine Learning Modelling

## 1. Random Forest

### 1a. Random Forest - Base Algorithm

In [13]:
RF_Base = RandomForestClassifier()

In [14]:
RF_Base.fit(X_train, y_train)

RandomForestClassifier()

In [15]:
y_pred_RFBase = RF_Base.predict(X_test)

In [16]:
print(classification_report(y_test, y_pred_RFBase))

              precision    recall  f1-score   support

           0       0.86      0.86      0.86       479
           1       0.79      0.79      0.79       321

    accuracy                           0.83       800
   macro avg       0.82      0.82      0.82       800
weighted avg       0.83      0.83      0.83       800



In [17]:
cm_RF_Base = confusion_matrix(y_test, y_pred_RFBase , labels=[1,0])

In [18]:
df_RF_Base = pd.DataFrame(cm_RF_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,253,68
Akt 0,67,412


### 1b. Random Forest - Hyper Parameter Tuning

In [19]:
param_RF = {
    "n_estimators" : np.arange(100, 10000, 100),
    "max_depth" : [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    "min_samples_leaf" :np.arange(1,101),
    "max_features" : ['auto', 0.3, 0.5, 0.8],
    "class_weight" : [{0:x, 1: 1 - x} for x in [.1, .20, .35, .40]]
}

In [20]:
RF_rand = RandomForestClassifier(random_state=42)

In [21]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [22]:
RF_RS= RandomizedSearchCV(RF_rand, param_RF, cv=skf, n_iter=50, n_jobs=-1, verbose=1, random_state=42, scoring='accuracy')

In [23]:
RF_RS.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 10.8min finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
                   estimator=RandomForestClassifier(random_state=42), n_iter=50,
                   n_jobs=-1,
                   param_distributions={'class_weight': [{0: 0.1, 1: 0.9},
                                                         {0: 0.2, 1: 0.8},
                                                         {0: 0.35, 1: 0.65},
                                                         {0: 0.4, 1: 0.6}],
                                        'max_depth': [None, 5, 10, 25, 30, 40,
                                                      50, 60, 80, 95, 100],
                                        'max_features': ['auto', 0.3, 0.5, 0.8],
                                        'min_samples_leaf':...
       3400, 3500, 3600, 3700, 3800, 3900, 4000, 4100, 4200, 4300, 4400,
       4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300, 5400, 5500,
       5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 640

In [24]:
RF_RS.best_params_

{'n_estimators': 8100,
 'min_samples_leaf': 2,
 'max_features': 0.3,
 'max_depth': 80,
 'class_weight': {0: 0.35, 1: 0.65}}

In [25]:
Model_RF_Tuned = RF_RS.best_estimator_

In [26]:
y_pred_RF_Tuned = Model_RF_Tuned.predict(X_test)

In [27]:
print(classification_report(y_test, y_pred_RF_Tuned))

              precision    recall  f1-score   support

           0       0.88      0.85      0.86       479
           1       0.79      0.82      0.80       321

    accuracy                           0.84       800
   macro avg       0.83      0.84      0.83       800
weighted avg       0.84      0.84      0.84       800



In [28]:
cm_RF_Tuned = confusion_matrix(y_test, y_pred_RF_Tuned, labels=[1,0])

In [29]:
df_RF_Tuned = pd.DataFrame(cm_RF_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,263,58
Akt 0,71,408


===============================================================================================================

## 2. Decison Tree

### 2a. Decison Tree - Base Algorithm

In [30]:
DT_Base = DecisionTreeClassifier()

In [31]:
DT_Base.fit(X_train, y_train)

DecisionTreeClassifier()

In [32]:
y_pred_DTBase = DT_Base.predict(X_test)

In [33]:
print(classification_report(y_test, y_pred_DTBase))

              precision    recall  f1-score   support

           0       0.78      0.84      0.81       479
           1       0.73      0.65      0.69       321

    accuracy                           0.76       800
   macro avg       0.75      0.74      0.75       800
weighted avg       0.76      0.76      0.76       800



In [34]:
cm_DT_Base = confusion_matrix(y_test, y_pred_DTBase , labels=[1,0])

In [35]:
df_DT_Base = pd.DataFrame(cm_DT_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_DT_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,209,112
Akt 0,78,401


### 2b. Decision Tree - Hyper Parameter Tuning

In [36]:
param_DT = {
#     "n_estimators" : np.arange(100, 10000, 100),
     "max_depth" : [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    "min_samples_leaf" :np.arange(1,101),
    "max_features" : ['auto', 0.3, 0.5, 0.8],
    "class_weight" : [{0:x, 1: 1 - x} for x in [.1, .20, .35, .40]]
}

In [37]:
DT_rand = DecisionTreeClassifier(random_state=42)

In [38]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [39]:
DT_RS= RandomizedSearchCV(DT_rand, param_DT, cv=skf, n_iter=50, n_jobs=-1, verbose=1, random_state=42, scoring='accuracy')

In [40]:
DT_RS.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    0.4s finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
                   estimator=DecisionTreeClassifier(random_state=42), n_iter=50,
                   n_jobs=-1,
                   param_distributions={'class_weight': [{0: 0.1, 1: 0.9},
                                                         {0: 0.2, 1: 0.8},
                                                         {0: 0.35, 1: 0.65},
                                                         {0: 0.4, 1: 0.6}],
                                        'max_depth': [None, 5, 10, 25, 30, 40,
                                                      50, 60, 80, 95, 100],
                                        'max_features': ['auto', 0.3, 0.5, 0.8],
                                        'min_samples_leaf': array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  

In [41]:
DT_RS.best_params_

{'min_samples_leaf': 28,
 'max_features': 0.5,
 'max_depth': 30,
 'class_weight': {0: 0.35, 1: 0.65}}

In [42]:
Model_DT_Tuned = DT_RS.best_estimator_

In [43]:
y_pred_DT_Tuned = Model_DT_Tuned.predict(X_test)

In [44]:
print(classification_report(y_test, y_pred_DT_Tuned))

              precision    recall  f1-score   support

           0       0.87      0.73      0.79       479
           1       0.67      0.84      0.75       321

    accuracy                           0.77       800
   macro avg       0.77      0.79      0.77       800
weighted avg       0.79      0.77      0.78       800



In [45]:
cm_DT_Tuned = confusion_matrix(y_test, y_pred_DT_Tuned, labels=[1,0])

In [46]:
df_DT_Tuned = pd.DataFrame(cm_DT_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_DT_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,271,50
Akt 0,131,348


===============================================================================================================

## 3. XGBoost

### 3a. XGBoost - Base Algorithm

In [47]:
XG_Base = XGBClassifier()

In [48]:
XG_Base.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [49]:
y_pred_XGBase = XG_Base.predict(X_test)

In [50]:
print(classification_report(y_test, y_pred_XGBase))

              precision    recall  f1-score   support

           0       0.85      0.87      0.86       479
           1       0.80      0.77      0.79       321

    accuracy                           0.83       800
   macro avg       0.83      0.82      0.82       800
weighted avg       0.83      0.83      0.83       800



In [51]:
cm_XG_Base = confusion_matrix(y_test, y_pred_XGBase , labels=[1,0])

In [52]:
df_XG_Base = pd.DataFrame(cm_XG_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_XG_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,247,74
Akt 0,60,419


### 3b. XGBoost- Hyper Parameter Tuning

In [53]:
param_XG = {
    'learning_rate': [1],
    "n_estimators" : np.arange(100, 10000, 100),
 "max_depth" : [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    'min_child_weight': [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    'eta':[.3],
    'subsample': [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    'colsample_bytree': [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    # Other parameters
    'objective': ['binary:logistic'],
    'scale_pos_weight' : [0.99]
}

In [54]:
XGBClassifier().get_params()

{'objective': 'binary:logistic',
 'use_label_encoder': True,
 'base_score': None,
 'booster': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'gamma': None,
 'gpu_id': None,
 'importance_type': 'gain',
 'interaction_constraints': None,
 'learning_rate': None,
 'max_delta_step': None,
 'max_depth': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'n_estimators': 100,
 'n_jobs': None,
 'num_parallel_tree': None,
 'random_state': None,
 'reg_alpha': None,
 'reg_lambda': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': None,
 'validate_parameters': None,
 'verbosity': None}

In [55]:
XG_rand = XGBClassifier(random_state=42)

In [56]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [57]:
XG_RS= RandomizedSearchCV(XG_rand, param_XG, cv=skf, n_iter=50, n_jobs=-1, verbose=1, random_state=42, scoring='accuracy')

In [58]:
XG_RS.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 135 out of 150 | elapsed:    1.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   13.0s finished




RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
                   estimator=XGBClassifier(base_score=None, booster=None,
                                           colsample_bylevel=None,
                                           colsample_bynode=None,
                                           colsample_bytree=None, gamma=None,
                                           gpu_id=None, importance_type='gain',
                                           interaction_constraints=None,
                                           learning_rate=None,
                                           max_delta_step=None, max_depth=None,
                                           min_child_weight=None...
       5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6500, 6600,
       6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700,
       7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800,
       8900, 9000, 9100, 9200, 9300, 9400, 9500, 

In [59]:
XG_RS.best_params_

{'subsample': None,
 'scale_pos_weight': 0.99,
 'objective': 'binary:logistic',
 'n_estimators': 3600,
 'min_child_weight': 40,
 'max_depth': 40,
 'learning_rate': 1,
 'eta': 0.3,
 'colsample_bytree': None}

In [60]:
Model_XG_Tuned = XG_RS.best_estimator_

In [61]:
y_pred_XG_Tuned = Model_XG_Tuned.predict(X_test)

In [62]:
print(classification_report(y_test, y_pred_XG_Tuned))

              precision    recall  f1-score   support

           0       0.83      0.85      0.84       479
           1       0.77      0.74      0.75       321

    accuracy                           0.81       800
   macro avg       0.80      0.80      0.80       800
weighted avg       0.81      0.81      0.81       800



In [63]:
cm_XG_Tuned = confusion_matrix(y_test, y_pred_XG_Tuned, labels=[1,0])

In [64]:
df_XG_Tuned = pd.DataFrame(cm_XG_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_XG_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,237,84
Akt 0,70,409


## 4.Logistic Regression

### 4a. LR Base

In [65]:
LR_Base = LogisticRegression()

In [66]:
LR_Base.fit(X_train, y_train)

LogisticRegression()

In [67]:
y_pred_LRBase = LR_Base.predict(X_test)

In [68]:
print(classification_report(y_test, y_pred_LRBase))

              precision    recall  f1-score   support

           0       0.78      0.85      0.81       479
           1       0.74      0.65      0.69       321

    accuracy                           0.77       800
   macro avg       0.76      0.75      0.75       800
weighted avg       0.77      0.77      0.77       800



In [69]:
cm_LR_Base = confusion_matrix(y_test, y_pred_LRBase , labels=[1,0])

In [70]:
df_LR_Base = pd.DataFrame(cm_LR_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_LR_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,210,111
Akt 0,74,405


### 4c. LR With Hyperparameter tuning

In [71]:
param_LR = {
    'penalty' : ['none', 'l1', 'l2'],
    'C' : np.logspace(-4 , 4, 14),
    'class_weight' : [{0 : x, 1 : 1 - x} for x in [.05,.1,.15, .2,]]
}

In [72]:
LR_HP = LogisticRegression()

In [73]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [74]:
LR_GS = GridSearchCV(LR_HP, param_LR, cv=skf, n_jobs=-1, verbose=1, scoring='accuracy')

In [75]:
LR_GS.fit(X_train, y_train)

Fitting 3 folds for each of 168 candidates, totalling 504 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 489 out of 504 | elapsed:    5.8s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 504 out of 504 | elapsed:    6.0s finished


GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': array([1.00000000e-04, 4.12462638e-04, 1.70125428e-03, 7.01703829e-03,
       2.89426612e-02, 1.19377664e-01, 4.92388263e-01, 2.03091762e+00,
       8.37677640e+00, 3.45510729e+01, 1.42510267e+02, 5.87801607e+02,
       2.42446202e+03, 1.00000000e+04]),
                         'class_weight': [{0: 0.05, 1: 0.95}, {0: 0.1, 1: 0.9},
                                          {0: 0.15, 1: 0.85},
                                          {0: 0.2, 1: 0.8}],
                         'penalty': ['none', 'l1', 'l2']},
             scoring='accuracy', verbose=1)

In [76]:
LR_GS.best_params_

{'C': 34.55107294592218, 'class_weight': {0: 0.2, 1: 0.8}, 'penalty': 'l2'}

In [77]:
LR_GS_Tuned = LR_GS.best_estimator_

In [78]:
y_pred_LR_Tuned = LR_GS_Tuned.predict(X_test)

In [79]:
print(classification_report(y_test, y_pred_LR_Tuned))

              precision    recall  f1-score   support

           0       0.91      0.50      0.64       479
           1       0.55      0.93      0.69       321

    accuracy                           0.67       800
   macro avg       0.73      0.71      0.67       800
weighted avg       0.77      0.67      0.66       800



In [80]:
cm_LR_Tuned = confusion_matrix(y_test, y_pred_LR_Tuned, labels=[1,0])

In [81]:
df_LR_Tuned = pd.DataFrame(cm_LR_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_LR_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,298,23
Akt 0,241,238


# Recommendation

- Focus to create interactive marketing campaign which attract and provide 31 - 40 years old customers needs
- Create additional benefit for customers working as management such as free pick up and delivery for car repairs
- Create collaboration with House Hold Insurance Division/company to offer House Hold Insurance Package with Car Insurance 
- Create collaboration with Car Loan Division to offer Car Loan package with Car Insurance
- Do more offering on March, September, October, December
- Optimize Frontline SOP to ensure Frontline don't make call to same customers more than 10 times
- Optimize Frontline SOP to ensure providing interesting talk script to engage customers and explained the Car Insurance benefit correctly

# Summary

- We suggest to use Model Random Forest with Hyper Parameter Tuning
- Create marketing program as insight on EDA