In [1]:
## Lets see more complex data

# make a prediction logistic regression model
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=2, random_state=1)

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

## Logisitic Regression Hyperparameter Tuning

In [3]:
LogisticRegression()

LogisticRegression()

## Model Training Hyperparameter Tuning
### GridsearchCV

In [6]:
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [7]:
parameters = {
            'penalty' : ('l1','l2','elasticnet'),
             'C': [1, 10, 20, 30]
             }

In [8]:
classifier = LogisticRegression()

In [10]:
clf = GridSearchCV(classifier, param_grid = parameters,
            cv = 5)

# penalty * C * cv = 3 * 4 * 5 = 60 iterations it will run

In [11]:
clf

GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': [1, 10, 20, 30],
                         'penalty': ('l1', 'l2', 'elasticnet')})

In [12]:
## Splitting of training data to train and validation
clf.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': [1, 10, 20, 30],
                         'penalty': ('l1', 'l2', 'elasticnet')})

## Randomized Search cv

In [13]:
clf.best_params_

{'C': 1, 'penalty': 'l2'}

In [14]:
clf.best_score_

0.8087500000000001

In [15]:
clf.cv_results_

{'mean_fit_time': array([0.        , 0.00579348, 0.00043058, 0.00019917, 0.00518737,
        0.        , 0.00040021, 0.00638161, 0.00019937, 0.00059843,
        0.0053721 , 0.0007041 ]),
 'std_fit_time': array([0.        , 0.00164004, 0.0005298 , 0.00039835, 0.00150576,
        0.        , 0.00049016, 0.00176728, 0.00039873, 0.00048862,
        0.00114718, 0.00060337]),
 'mean_score_time': array([0.        , 0.00039868, 0.        , 0.        , 0.00059867,
        0.        , 0.        , 0.00059805, 0.        , 0.        ,
        0.00040245, 0.        ]),
 'std_score_time': array([0.        , 0.00048829, 0.        , 0.        , 0.00048881,
        0.        , 0.        , 0.00048831, 0.        , 0.        ,
        0.00049293, 0.        ]),
 'param_C': masked_array(data=[1, 1, 1, 10, 10, 10, 20, 20, 20, 30, 30, 30],
              mask=[False, False, False, False, False, False, False, False,
                    False, False, False, False],
        fill_value='?',
             dtype=objec

In [16]:
classifier = LogisticRegression(C = 1, penalty='l2')
classifier

LogisticRegression(C=1)

In [17]:
classifier.fit(X_train, y_train)

LogisticRegression(C=1)

In [19]:
y_pred = classifier.predict(X_test)
y_pred

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 1])

In [20]:
classifier.predict_proba(X_test)

array([[0.71649202, 0.28350798],
       [0.19508969, 0.80491031],
       [0.12418141, 0.87581859],
       [0.05045906, 0.94954094],
       [0.88775659, 0.11224341],
       [0.75067497, 0.24932503],
       [0.97980488, 0.02019512],
       [0.3921745 , 0.6078255 ],
       [0.59920135, 0.40079865],
       [0.39295203, 0.60704797],
       [0.20428696, 0.79571304],
       [0.80257879, 0.19742121],
       [0.86422932, 0.13577068],
       [0.92665682, 0.07334318],
       [0.00131743, 0.99868257],
       [0.04171096, 0.95828904],
       [0.56288536, 0.43711464],
       [0.89322764, 0.10677236],
       [0.29278211, 0.70721789],
       [0.00870994, 0.99129006],
       [0.71879454, 0.28120546],
       [0.5108267 , 0.4891733 ],
       [0.76230298, 0.23769702],
       [0.73170811, 0.26829189],
       [0.10155737, 0.89844263],
       [0.04046512, 0.95953488],
       [0.57926768, 0.42073232],
       [0.00526468, 0.99473532],
       [0.03101648, 0.96898352],
       [0.96093035, 0.03906965],
       [0.

In [21]:
classifier.predict_log_proba(X_test)

array([[-3.33388172e-01, -1.26051500e+00],
       [-1.63429589e+00, -2.17024420e-01],
       [-2.08601183e+00, -1.32596294e-01],
       [-2.98659297e+00, -5.17766320e-02],
       [-1.19057679e-01, -2.18708550e+00],
       [-2.86782523e-01, -1.38899787e+00],
       [-2.04018331e-02, -3.90231410e+00],
       [-9.36048381e-01, -4.97867448e-01],
       [-5.12157590e-01, -9.14296104e-01],
       [-9.34067728e-01, -4.99147468e-01],
       [-1.58822958e+00, -2.28516666e-01],
       [-2.19925251e-01, -1.62241570e+00],
       [-1.45917131e-01, -1.99678798e+00],
       [-7.61719899e-02, -2.61260572e+00],
       [-6.63207605e+00, -1.31829377e-03],
       [-3.17699125e+00, -4.26058391e-02],
       [-5.74679290e-01, -8.27559791e-01],
       [-1.12913809e-01, -2.23705623e+00],
       [-1.22832659e+00, -3.46416476e-01],
       [-4.74329094e+00, -8.74808824e-03],
       [-3.30179718e-01, -1.26866971e+00],
       [-6.71724885e-01, -7.15038455e-01],
       [-2.71411193e-01, -1.43675844e+00],
       [-3.

In [23]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[78 13]
 [29 80]]
0.79
              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



### Randomized Search CV

In [24]:
from sklearn.model_selection import RandomizedSearchCV

In [25]:
random_clf=RandomizedSearchCV(LogisticRegression(),param_distributions=parameters,cv=5,n_iter=20)

In [26]:
random_clf

RandomizedSearchCV(cv=5, estimator=LogisticRegression(), n_iter=20,
                   param_distributions={'C': [1, 10, 20, 30],
                                        'penalty': ('l1', 'l2', 'elasticnet')})

In [27]:
random_clf.fit(X_train,y_train)

RandomizedSearchCV(cv=5, estimator=LogisticRegression(), n_iter=20,
                   param_distributions={'C': [1, 10, 20, 30],
                                        'penalty': ('l1', 'l2', 'elasticnet')})

In [28]:
random_clf.best_params_

{'penalty': 'l2', 'C': 1}