# Import Library

In [1]:
!pip install imblearn



In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from numpy import mean
from numpy import std
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score



# Read Data

In [3]:
data = pd.read_csv("bank-full.csv", delimiter=';')

# Simple Data Visualization

In [4]:
data.shape

(45211, 17)

# Model Construction

## Label Encoding For Categorical Data Train

Label Encoding are used to transform non-numerical&nbsp;labels&nbsp;(as long as they are hashable and comparable) to numerical&nbsp;labels. Label that are being transform to numerical are categorical data column which is job, marital, education, default, housing, loan, contact, month and poutcome.

In [5]:
le = LabelEncoder()
encode_x = data.iloc[ : , :-1]
encode_x.job = le.fit_transform(encode_x.job)
encode_x.marital = le.fit_transform(encode_x.marital)
encode_x.education = le.fit_transform(encode_x.education)
encode_x.default = le.fit_transform(encode_x.default)
encode_x.housing = le.fit_transform(encode_x.housing)
encode_x.loan = le.fit_transform(encode_x.loan)
encode_x.contact = le.fit_transform(encode_x.contact)
encode_x.month = le.fit_transform(encode_x.month)
encode_x.poutcome = le.fit_transform(encode_x.poutcome)

## Split Data

Split data are used to split the original data into 2 different parts which is training data and testing data. Training data will also be divided into 2 parts training data and validation training data. This process to ensure when the model being trained we have a data to validate the predicted output of a model.

In [6]:
split_input_data = encode_x
split_output_data = data['y']

In [7]:
split_input_data

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,58,4,1,2,0,2143,1,0,2,5,8,261,1,-1,0,3
1,44,9,2,1,0,29,1,0,2,5,8,151,1,-1,0,3
2,33,2,1,1,0,2,1,1,2,5,8,76,1,-1,0,3
3,47,1,1,3,0,1506,1,0,2,5,8,92,1,-1,0,3
4,33,11,2,3,0,1,0,0,2,5,8,198,1,-1,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,9,1,2,0,825,0,0,0,17,9,977,3,-1,0,3
45207,71,5,0,0,0,1729,0,0,0,17,9,456,2,-1,0,3
45208,72,5,1,1,0,5715,0,0,0,17,9,1127,5,184,3,2
45209,57,1,1,1,0,668,0,0,1,17,9,508,4,-1,0,3


We divided the data into 4 parts:
- X_train is a training data that have no output.
- X_test is a testing data that have no output. This data will be use by the model to predict the output of the X_test data
- y_train is a training data that has the output data from X_train data.
- y_test is the original output data from X_test. This data will be used to validate the accuracy of predicted data that were generated by the model.

In [8]:
X_train, X_test, y_train, y_test = train_test_split(split_input_data, split_output_data, test_size=0.3, random_state=10)

In [9]:
pd.DataFrame(y_train).value_counts()

y  
no     27916
yes     3731
dtype: int64

In [11]:
y_test.value_counts()

no     12006
yes     1558
Name: y, dtype: int64

## Scale Data

Here we transform the data to fit within a specific scale using these algorithms a change of "1" in any numeric feature will give the same importance to each data. We used StandardScaler() method from Sklearn library. Define the transformation for train and test data:
- X_train will be scale using fit_transform()
- X_test will be scale using transform()

In [12]:
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train))
X_test_scaled = pd.DataFrame(scaler.transform(X_test))

In [13]:
X_train_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,-1.026305,1.423354,-0.280363,-1.640840,-0.13531,-0.410155,0.893093,-0.436666,-0.710219,-0.094313,0.827351,-0.148404,-0.250493,2.575874,0.232301,-2.600080
1,-0.367918,-1.325856,1.364851,-0.298415,-0.13531,-0.440750,-1.119705,-0.436666,1.521344,1.470066,0.827351,-0.938292,3.303510,-0.411514,-0.304903,0.443520
2,0.102359,-1.325856,-0.280363,-0.298415,-0.13531,-0.440750,0.893093,-0.436666,-0.710219,-0.936671,-0.169407,2.929436,-0.573585,-0.411514,-0.304903,0.443520
3,-0.838194,-1.020388,-0.280363,-0.298415,-0.13531,-0.434309,0.893093,2.290082,1.521344,-0.936671,0.827351,-0.549185,0.072598,-0.411514,-0.304903,0.443520
4,1.513190,0.201483,-0.280363,-0.298415,-0.13531,-0.483583,-1.119705,-0.436666,-0.710219,0.387035,-1.498418,-0.237899,-0.250493,-0.411514,-0.304903,0.443520
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31642,-0.744139,-0.103985,1.364851,1.044009,-0.13531,-0.277792,-1.119705,-0.436666,-0.710219,-1.418018,0.162846,-0.319611,-0.573585,0.507682,0.769506,-0.571014
31643,0.102359,0.812419,-1.925577,-0.298415,-0.13531,-0.290675,-1.119705,-0.436666,-0.710219,1.470066,-0.501660,-0.817669,-0.573585,-0.411514,-0.304903,0.443520
31644,-0.932250,1.423354,1.364851,1.044009,-0.13531,0.082582,-1.119705,-0.436666,-0.710219,-1.658692,-0.833913,-0.389651,-0.573585,-0.411514,-0.304903,0.443520
31645,-1.308471,0.812419,1.364851,-0.298415,-0.13531,-0.331575,-1.119705,-0.436666,-0.710219,-1.418018,0.162846,-0.296265,-0.573585,-0.411514,-0.304903,0.443520


In [14]:
X_test_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,1.325079,1.423354,-1.925577,-0.298415,-0.13531,-0.440750,0.893093,-0.436666,1.521344,0.507372,0.827351,0.415802,0.395689,-0.411514,-0.304903,0.44352
1,-0.932250,0.506951,1.364851,1.044009,-0.13531,0.145382,0.893093,-0.436666,1.521344,0.507372,0.827351,-0.739847,-0.573585,-0.411514,-0.304903,0.44352
2,-1.214416,-0.103985,1.364851,1.044009,-0.13531,0.284829,0.893093,-0.436666,1.521344,0.266698,0.162846,2.520873,-0.250493,-0.411514,-0.304903,0.44352
3,-1.214416,1.423354,-0.280363,-0.298415,-0.13531,1.958202,0.893093,-0.436666,-0.710219,-1.057007,0.827351,-0.253463,-0.573585,-0.411514,-0.304903,0.44352
4,0.196415,-0.714920,-0.280363,-0.298415,-0.13531,-0.421749,-1.119705,-0.436666,1.521344,-0.334986,0.827351,0.695959,-0.250493,-0.411514,-0.304903,0.44352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13559,-1.684693,0.812419,-0.280363,-0.298415,-0.13531,-0.436241,0.893093,2.290082,0.405563,-0.334986,0.827351,-0.930510,1.688054,3.175350,0.232301,-2.60008
13560,-0.932250,1.423354,-0.280363,-0.298415,-0.13531,-0.278759,-1.119705,-0.436666,-0.710219,0.507372,1.159604,-0.311829,-0.573585,-0.411514,-0.304903,0.44352
13561,-0.650084,1.423354,1.364851,-0.298415,-0.13531,1.404920,0.893093,-0.436666,-0.710219,0.627708,-0.169407,-0.611442,0.072598,-0.411514,-0.304903,0.44352
13562,-0.179807,0.812419,-0.280363,-0.298415,-0.13531,-0.424003,-1.119705,-0.436666,0.405563,-1.658692,-0.833913,4.520886,0.072598,-0.411514,-0.304903,0.44352


## Mix Sampling using SMOTE &amp; Random Under Sampler

Here we distributed the data using SMOTE and Random Under Sampler to make the classification output data balance. We used SMOTE to duplicates and variance the 'yes' data for the model to learn more variance of yes data.  We used Random Under Sampler to delete random sample of the 'no' data to make the model not to have low bias towards 'yes' data.

In [15]:
mixSample_X = X_train_scaled

# define pipeline
over = SMOTE(sampling_strategy=0.15)
under = RandomUnderSampler(sampling_strategy=0.5)
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)

# transform the dataset
mixSample_X, mixSample_Y = pipeline.fit_resample(mixSample_X, y_train)
pd.DataFrame(mixSample_Y).value_counts()



no     8374
yes    4187
dtype: int64

# Logistic Regression from SKLEARN Library

## Model Fit with default Parameters

Here we used logistic regression without specifying the parameters 

In [10]:
model = LogisticRegression()

## Train the Model using Training Data and Validation Training Data

In [11]:
model.fit(mixSample_X, mixSample_Y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Predict Model using Testing Data

In [12]:
y_pred = model.predict(X_test_scaled)

### Before evaluation we need to know the value count of each class from the original output data

Here are the count of 'no' data and the count of 'yes' data from the original output data

In [16]:
y_test.value_counts()

no     12006
yes     1558
Name: y, dtype: int64

## Evaluation Metrics using Confusion Matrix

Here we used confusion matrix which is crosstab() method from pandas There are 4 terms as a representation of the result of the classification process confusion matrix. The four terms: 
- True Positive (TP): Represents positive data that is predicted to be correct.
- True Negative (TN): Represents negative data that is predicted to be correct.
- False Positive (FP) Type I Error: Represents negative data but predicted as positive data.
- False Negative (FN) Type II Error: Represents positive data but predicted as negative data.

In [14]:
confusion_matrix = pd.crosstab(y_test, y_pred)
print (confusion_matrix)

col_0     no   yes
y                 
no     10843  1163
yes      606   952


 The conclusion from the above result: 
- True Positive (TP): **952** 'Yes' predicted data is correct **from 1558** 'Yes' original data 
- True Negative (TN): **10843** 'No' predicted data is correct from **12006** 'No' original data
- False Negative (FN): **606** 'Yes' predicted data falsely predicted as 'No' data
- False Positive (FP): **1163** 'No' predicted data falsely predicted as 'Yes' data

## Evaluation Metrics using Classification Report

Here we used the classification report from sklearn library in the classification report function we have precision, recall, f1-score and support for the evaluation metrics.

### Precision

Precision is the ability of a classifier not to label an instance positive that is actually negative. For each class, it is defined as the ratio of true positives to the sum of a true positive and false positive.

### Recall

Recall is the ability of a classifier to find all positive instances. For each class it is defined as the ratio of true positives to the sum of true positives and false negatives.

### F1-Score

The F1 score is a weighted harmonic mean of precision and recall such that the best score is 1.0 and the worst is 0.0. F1 scores are lower than accuracy measures as they embed precision and recall into their computation. As a rule of thumb, the weighted average of F1 should be used to compare classifier models, not global accuracy.

### Support

Support is the number of actual occurrences of the class in the specified dataset. Imbalanced support in the training data may indicate structural weaknesses in the reported scores of the classifier and could indicate the need for stratified sampling or rebalancing. Support doesn’t change between models but instead diagnoses the evaluation process.

In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

          no       0.95      0.90      0.92     12006
         yes       0.45      0.61      0.52      1558

    accuracy                           0.87     13564
   macro avg       0.70      0.76      0.72     13564
weighted avg       0.89      0.87      0.88     13564



 The conclusion from the above result:
- Precision: **95%** 'No' predicted data are correctly predict &amp; **45%** 'Yes' predicted data are correctly predict
- Recall: **90%** 'No' original data have been predicted by the model &amp; **61%** 'Yes' original data have been predicted by the model
- F1-Score: The mean from precision and recall for 'No' predicted data is **92%** and for 'Yes' predicted data is **52%**

# Model Fit with Best Parameters using Random Search CV

We used Random Search CV from to find best parameter for our Logistic regression model here we will compare the model that use default parameter and the model that will be given the best parameter from Random Search CV

## Define Parameters

In [16]:
parameters = {
    'tol': [0.00001, 0.0001, 0.001],
    'C': [0.001,0.01,0.1,1,10,100,1000],
    'class_weight': ['balanced', None],
    'random_state' : [None,10,20,30,42],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'max_iter' : [100],
    'verbose':[0,1,2],
}

## Find Best Parameter using RandomSearch CV

Here we want to focus the result based on the Recall evaluation metric because we would like to know if the result of the predicted output made by the model already predict most of the correct data inside the predicted output.

In [17]:
from sklearn.metrics import precision_score, recall_score, make_scorer

recall_scorer = make_scorer(recall_score,pos_label='yes')

grid_model = RandomizedSearchCV(LogisticRegression(), param_distributions = parameters,n_iter=20,verbose=2,scoring=recall_scorer)

grid_model.fit(mixSample_X, mixSample_Y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1 
convergence after 9 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1 
convergence after 9 epochs took 0 seconds


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1 
convergence after 10 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


convergence after 10 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1 
convergence after 11 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=30, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


convergence after 17 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1 
convergence after 17 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


convergence after 18 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1 
convergence after 16 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


convergence after 17 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=saga, random_state=None, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001 
convergence after 14 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


convergence after 13 epochs took 1 seconds
[CV]  verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001 
convergence after 13 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


convergence after 14 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001 
convergence after 12 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=sag, random_state=20, max_iter=100, class_weight=None, C=0.001, total=   0.1s
[CV] verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


convergence after 24 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100 
convergence after 24 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


convergence after 23 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100 
convergence after 25 epochs took 1 seconds
[CV]  verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


convergence after 24 epochs took 0 seconds
[CV]  verbose=1, tol=1e-05, solver=sag, random_state=20, max_iter=100, class_weight=balanced, C=100, total=   0.2s
[CV] verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10, total=   0.2s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10 
[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10, total=   0.2s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10, total=   0.2s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10 
[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=42, max_iter=100, class_weight=balanced, C=10, total=   0.2s
[CV] verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000 
[LibLinear]

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000, total=   0.1s
[CV] verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000 
[LibLinear][CV]  verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000, total=   0.1s
[CV] verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000 
[LibLinear][CV]  verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000, total=   0.1s
[CV] verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000 
[LibLinear][CV]  verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000, total=   0.1s
[CV] verbose=2, tol=0.001, solver=liblinear, random_state=42, max_iter=100, class_weight=balanced, C=1000 
[LibLinear][CV]  verbose=2, tol=0.001, solver=libli

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1, total=   0.2s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1 
[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1, total=   0.2s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1, total=   0.2s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1 
[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1, total=   0.1s
[CV] verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=2, tol=1e-05, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=0.1, total=   0.2s
[CV] verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.2s
[CV] verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1 
[CV]  verbose=0, tol=0.001, solver=newton-cg, random_state=20, max_iter=100, class_weight=None, C=1, total=   0.1s
[CV] verbose=0, tol=0.0

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10, total=   0.2s
[CV] verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10 
[CV]  verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10 
[CV]  verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=2, tol=0.0001, solver=newton-cg, random_state=None, max_iter=100, class_weight=balanced, C=10, total=   0.2s
[CV] verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000 
convergence after 13 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000 
convergence after 13 epochs took 1 seconds
[CV]  verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000 
convergence after 13 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


convergence after 12 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000, total=   0.1s
[CV] verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000 
convergence after 12 epochs took 0 seconds
[CV]  verbose=2, tol=0.0001, solver=saga, random_state=20, max_iter=100, class_weight=None, C=1000, total=   0.1s
[CV] verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[LibLinear][CV]  verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1, total=   0.1s
[CV] verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1 
[LibLinear][CV]  verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1, total=   0.1s
[CV] verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1 
[LibLinear][CV]  verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1, total=   0.1s
[CV] verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1 
[LibLinear][CV]  verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1, total=   0.1s
[CV] verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_iter=100, class_weight=None, C=0.1 
[LibLinear][CV]  verbose=2, tol=1e-05, solver=liblinear, random_state=10, max_it

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000 
[CV]  verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


[CV]  verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000 
[CV]  verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000, total=   0.2s
[CV] verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=1, tol=1e-05, solver=newton-cg, random_state=30, max_iter=100, class_weight=None, C=1000, total=   0.2s
[CV] verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10 
convergence after 10 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10 
convergence after 9 epochs took 0 seconds


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10 
convergence after 9 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10 
convergence after 10 epochs took 0 seconds


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   12.8s finished


[CV]  verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10, total=   0.1s
[CV] verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10 
convergence after 9 epochs took 0 seconds
[CV]  verbose=2, tol=0.001, solver=saga, random_state=None, max_iter=100, class_weight=None, C=10, total=   0.1s
[LibLinear]

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=LogisticRegression(C=1.0, class_weight=None,
                                                dual=False, fit_intercept=True,
                                                intercept_scaling=1,
                                                l1_ratio=None, max_iter=100,
                                                multi_class='auto', n_jobs=None,
                                                penalty='l2', random_state=None,
                                                solver='lbfgs', tol=0.0001,
                                                verbose=0, warm_start=False),
                   iid='deprecated', n_iter=20, n_jobs=None,
                   param_distributions={'...001, 0.01, 0.1, 1, 10, 100,
                                              1000],
                                        'class_weight': ['balanced', None],
                                        'max_iter': [100],
                  

## Check Best Parameter using Random Search CV

In [18]:
grid_model.best_params_

{'C': 0.01,
 'class_weight': 'balanced',
 'max_iter': 100,
 'random_state': 20,
 'solver': 'liblinear',
 'tol': 0.001,
 'verbose': 1}

## Input Best Parameter into the Logistic Regression Function

In [25]:
model = LogisticRegression(C= 0.01,
 class_weight= 'balanced',
 max_iter= 100,
 random_state= 20,
 solver= 'liblinear',
 tol= 0.001,
 verbose= 1)

## Train the Model using Training Data and Validation Training Data

In [26]:
model.fit(mixSample_X, mixSample_Y)

[LibLinear]

LogisticRegression(C=0.01, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=20, solver='liblinear', tol=0.001, verbose=1,
                   warm_start=False)

## Predict Model using Testing Data

In [27]:
y_pred = model.predict(X_test_scaled)

## Evaluation Metrics using Confusion Matrix

In [28]:
confusion_matrix = pd.crosstab(y_test, y_pred)
print (confusion_matrix)

col_0    no   yes
y                
no     9646  2360
yes     302  1256


 The conclusion from the above result: 
- True Positive (TP): **1256** 'Yes' predicted data is correct **from 1558** 'Yes' original data 
- True Negative (TN): **9646** 'No' predicted data is correct **from 12006** 'No' original data
- False Negative (FN): **302** 'Yes' predicted data falsely predicted as 'No' data
- False Positive (FP): **2360** 'No' predicted data falsely predicted as 'Yes' data

## Evaluation Metrics using Classification Report

In [29]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

          no       0.97      0.80      0.88     12006
         yes       0.35      0.81      0.49      1558

    accuracy                           0.80     13564
   macro avg       0.66      0.80      0.68     13564
weighted avg       0.90      0.80      0.83     13564



 The conclusion from the above result:
- Precision: **97%** 'No' predicted data are correctly predict &amp; **35%** 'Yes' predicted data are correctly predict
- Recall: **80%** 'No' original data have been predicted by the model &amp; **81%** 'Yes' original data have been predicted by the model
- F1-Score: The mean from precision and recall for 'No' predicted data is **88%** and for 'Yes' predicted data is **49%**

# Evaluation Metrics using Cross Validation

In [30]:
cv = KFold(n_splits=5, shuffle=True)
scores = cross_val_score(model, split_input_data, split_output_data, cv=cv)

[LibLinear][LibLinear][LibLinear][LibLinear][LibLinear]

In [31]:
print('F1_Score: %.3f (%.3f)' % (mean(scores), std(scores)))

F1_Score: 0.798 (0.003)


In [32]:
kf= KFold(n_splits=5)
X = split_input_data.to_numpy()
y = split_output_data
le = LabelEncoder()
y = le.fit_transform(y)
F1=[]
Accuracy=[]
Recall=[]
Precision=[]
for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
    X_train = X[train_index]
    y_train = y[train_index]  # Based on your code, you might need a ravel call here, but I would look into how you're generating your y
    X_test = X[test_index]
    y_test = y[test_index]  # See comment on ravel and  y_train
    
    #Create the Dataframe
    X_train = pd.DataFrame(X_train,columns=data.drop('y',axis=1).columns)
    X_test = pd.DataFrame(X_test,columns=data.drop('y',axis=1).columns)
    

    #Standard Scalling
    ss = StandardScaler()
    X_train= ss.fit_transform(X_train)
    X_test = ss.transform(X_test)
    
    X_train = pd.DataFrame(X_train, columns = split_input_data.columns)
    X_test = pd.DataFrame(X_test , columns = split_input_data.columns)
    

    # Sampling
    over = SMOTE(sampling_strategy = 0.18)
    under = RandomUnderSampler(sampling_strategy=0.85)
    steps = [('o',over),('u',under)]
    pipeline = Pipeline(steps=steps)
    
    X_train,y_train_s =pipeline.fit_resample(X_train,y_train)
    
    #Modelling
    model1 = LogisticRegression(C= 0.001,
 class_weight= 'balanced',
 max_iter= 100,
 random_state= None,
 solver= 'liblinear',
 tol= 1e-05,
 verbose= 2)
    model1.fit(X_train,y_train_s)
    y_pred1 = model1.predict(X_test)
    
    F1.append(f1_score(y_test, y_pred1))
    Accuracy.append(accuracy_score(y_test, y_pred1))
    Recall.append(recall_score(y_test, y_pred1))
    Precision.append(precision_score(y_test, y_pred1))



[LibLinear]



[LibLinear]



[LibLinear]



[LibLinear][LibLinear]



In [33]:
print('F1 : '+str(np.mean(F1)))
print('Accuracy : '+str(np.mean(Accuracy)))
print('Recall : '+str(np.mean(Recall)))
print('Precision : '+str(np.mean(Precision)))

F1 : 0.37310730355916066
Accuracy : 0.7309680776817917
Recall : 0.7570421598604739
Precision : 0.28437546637333205
