In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [10]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, random_state=42)

In [11]:
from sklearn.model_selection import train_test_split    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
params = {
    "penalty": ["l1", "l2", "elasticnet"],
    "C": [1,2,10,20,30,40],
}

In [13]:
params

{'penalty': ['l1', 'l2', 'elasticnet'], 'C': [1, 2, 10, 20, 30, 40]}

In [14]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

In [15]:
clf = GridSearchCV(classifier, params, cv=5, verbose=2)
clf

In [17]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV] END ....................................C=1, penalty=l1; total time=   0.0s
[CV] END ....................................C=1, penalty=l1; total time=   0.0s
[CV] END ....................................C=1, penalty=l1; total time=   0.0s
[CV] END ....................................C=1, penalty=l1; total time=   0.0s
[CV] END ....................................C=1, penalty=l1; total time=   0.0s
[CV] END ....................................C=1, penalty=l2; total time=   0.0s
[CV] END ....................................C=1, penalty=l2; total time=   0.0s
[CV] END ....................................C=1, penalty=l2; total time=   0.0s
[CV] END ....................................C=1, penalty=l2; total time=   0.0s
[CV] END ....................................C=1, penalty=l2; total time=   0.0s
[CV] END ............................C=1, penalty=elasticnet; total time=   0.0s
[CV] END ............................C=1, penalt

In [18]:
clf.best_params_

{'C': 1, 'penalty': 'l2'}

In [19]:
clf.best_score_

np.float64(0.8214285714285714)

In [20]:
clf.best_estimator_.predict(X_test)

array([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0])

In [21]:
model = LogisticRegression(penalty="l2", C=1)
model.fit(X_train, y_train)
model.predict(X_test)
y_pred = model.predict(X_test)

In [22]:
y_pred

array([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0])

In [23]:
model.predict_proba(X_test)

array([[8.23159945e-01, 1.76840055e-01],
       [8.62622747e-01, 1.37377253e-01],
       [1.22395953e-01, 8.77604047e-01],
       [3.16050393e-01, 6.83949607e-01],
       [1.65942667e-01, 8.34057333e-01],
       [7.50859497e-01, 2.49140503e-01],
       [9.34387796e-01, 6.56122043e-02],
       [9.59846791e-01, 4.01532085e-02],
       [8.91892192e-01, 1.08107808e-01],
       [9.98427804e-01, 1.57219559e-03],
       [1.49356289e-01, 8.50643711e-01],
       [4.90395181e-01, 5.09604819e-01],
       [3.16316684e-01, 6.83683316e-01],
       [5.24952815e-01, 4.75047185e-01],
       [3.32335212e-01, 6.67664788e-01],
       [8.87593853e-01, 1.12406147e-01],
       [9.20745026e-01, 7.92549744e-02],
       [6.01451815e-01, 3.98548185e-01],
       [9.33059957e-01, 6.69400430e-02],
       [1.90471781e-01, 8.09528219e-01],
       [9.35430516e-01, 6.45694837e-02],
       [1.60020039e-01, 8.39979961e-01],
       [2.70660275e-01, 7.29339725e-01],
       [3.05889727e-01, 6.94110273e-01],
       [3.606268

In [24]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.83      0.79       144
           1       0.82      0.75      0.79       156

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.79      0.79      0.79       300

[[119  25]
 [ 39 117]]
0.7866666666666666


In [25]:
#randomized search
clf = RandomizedSearchCV(classifier, params, n_iter=10, cv=5)
clf.fit(X_train, y_train)

In [26]:
clf.best_score_

np.float64(0.82)

In [27]:
clf.best_params_

{'penalty': 'l2', 'C': 40}