### Logistic Regression

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Here I will use the scaled dataset

In [11]:
data = pd.read_csv('scaleddata.csv')
data

Unnamed: 0,X1,X2,y
0,-1.529905,1.524954,1
1,-1.285864,1.309072,1
2,-1.675753,1.421765,1
3,-1.467577,1.351920,1
4,-1.529905,1.524954,1
...,...,...,...
2220,1.243449,1.431132,0
2221,1.384919,1.207115,0
2222,1.357108,1.224123,0
2223,1.210627,1.385059,0


In [13]:
X = data.drop('y', 1)
y = data['y']

Unnamed: 0,X1,X2
0,-1.529905,1.524954
1,-1.285864,1.309072
2,-1.675753,1.421765
3,-1.467577,1.351920
4,-1.529905,1.524954
...,...,...
2220,1.243449,1.431132
2221,1.384919,1.207115
2222,1.357108,1.224123
2223,1.210627,1.385059


In [14]:
# splitting the dataset into the training and test set

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [15]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression()

In [16]:
y_pred = lr.predict(X_test)

In [17]:
y_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,

In [18]:
from sklearn.metrics import *

In [19]:
accuracy_score(y_test,y_pred)

0.7235955056179775

In [20]:
confusion_matrix(y_test,y_pred)

array([[238,   2],
       [121,  84]], dtype=int64)

### Hyper Parameter Tuning

In [22]:
paralist = {
    'penalty':['l1','l2'],
    'tol':[1e-3,1e-4,1e-5],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'multi_class' : ['auto', 'ovr', 'multinomial'],
    'max_iter':[100,150,200,250,300]
}

In [23]:
from sklearn.model_selection import RandomizedSearchCV

In [24]:
randsearch = RandomizedSearchCV(estimator=lr,param_distributions=paralist,cv=5,n_jobs=-1,verbose=1)

In [27]:
randsearch.fit(X_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:   11.8s remaining:    1.8s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:   11.9s finished


RandomizedSearchCV(cv=5, estimator=LogisticRegression(), n_jobs=-1,
                   param_distributions={'max_iter': [100, 150, 200, 250, 300],
                                        'multi_class': ['auto', 'ovr',
                                                        'multinomial'],
                                        'penalty': ['l1', 'l2'],
                                        'solver': ['newton-cg', 'lbfgs',
                                                   'liblinear', 'sag', 'saga'],
                                        'tol': [0.001, 0.0001, 1e-05]},
                   verbose=1)

In [28]:
randsearch.best_params_

{'tol': 0.001,
 'solver': 'liblinear',
 'penalty': 'l2',
 'multi_class': 'auto',
 'max_iter': 100}

In [29]:
newlr = randsearch.best_estimator_
newlr

LogisticRegression(solver='liblinear', tol=0.001)

In [30]:
ny_pred = newlr.predict(X_test)

In [31]:
accuracy_score(y_test,ny_pred)

0.7235955056179775

In [32]:
confusion_matrix(y_test,ny_pred)

array([[238,   2],
       [121,  84]], dtype=int64)