<a href="https://colab.research.google.com/github/hasanocal7/Kur_3/blob/main/Logistic_Regression_and_Random_Search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [5]:
X, y = make_classification(n_samples = 2000, n_features = 50, n_informative = 10, random_state = 812, n_classes = 2)

X

array([[ 1.23173717,  1.10532801,  0.22776564, ..., -0.78356596,
        -0.15711703, -1.65414508],
       [ 0.48439638, -0.14303632, -2.51423517, ...,  0.60566089,
        -0.13130922, -0.21336398],
       [ 1.43086386, -0.13222191, -0.97300061, ..., -1.24373168,
        -0.8441854 , -1.68326389],
       ...,
       [ 1.31133403,  0.03317187,  2.49820238, ..., -0.42860444,
         1.11198425, -0.37050314],
       [ 1.40062972, -0.98004073, -0.89407227, ...,  1.63212064,
         0.06716772,  1.21445157],
       [ 0.44062982, -0.17520526,  3.96299506, ...,  0.25240773,
        -0.095253  ,  0.83673848]])

In [6]:
y

array([0, 0, 0, ..., 0, 1, 0])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, train_size = 0.8)

In [9]:
vanilla_lr = LogisticRegression()

In [10]:
vanilla_lr.fit(X_train, y_train)

In [11]:
y_pred = vanilla_lr.predict(X_test)

In [12]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.69      0.76      0.72       187
           1       0.77      0.70      0.74       213

    accuracy                           0.73       400
   macro avg       0.73      0.73      0.73       400
weighted avg       0.73      0.73      0.73       400



# Hyperparameter Tuning

In [13]:
from sklearn.model_selection import RandomizedSearchCV
# GridSearch ile benzer çaışma mantığı

In [17]:
param_dist = {
    "C" :  [0.01, 0.1, 1, 10, 100],
    # Verisetin gerçek hayatı ne kadar ölçüde yansıtıyor
    # Büyük C datasetime güveniyorum
    # Küçük C güvenmiyorum
    "penalty" : ["l1","l2"],
    "solver" : ["saga", "liblinear"]
}

In [18]:
tune_model = LogisticRegression()

In [29]:
random_search = RandomizedSearchCV(tune_model, param_dist, cv = 5, n_iter = 5, verbose = 1)

In [30]:
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 5 candidates, totalling 25 fits




In [31]:
random_search.best_params_

{'solver': 'saga', 'penalty': 'l1', 'C': 1}

In [32]:
best_model = random_search.best_estimator_

In [33]:
y_pred = best_model.predict(X_test)

In [34]:
print(classification_report(y_test, y_pred, digits = 3))

              precision    recall  f1-score   support

           0      0.691     0.754     0.721       187
           1      0.765     0.704     0.733       213

    accuracy                          0.728       400
   macro avg      0.728     0.729     0.727       400
weighted avg      0.731     0.728     0.728       400



In [35]:
tuned_probas = best_model.predict_proba(X_test)[:, 1]
tuned_probas

array([0.93788942, 0.66638773, 0.83202023, 0.41991771, 0.75854922,
       0.74791648, 0.37374344, 0.93841681, 0.9310206 , 0.28963968,
       0.89805008, 0.70290016, 0.61516996, 0.23478161, 0.91205055,
       0.48825326, 0.32427351, 0.42564336, 0.95963352, 0.62825019,
       0.27439291, 0.61747263, 0.04401193, 0.74380465, 0.08047815,
       0.31853303, 0.04981681, 0.17749084, 0.96200023, 0.80299462,
       0.2014121 , 0.68041909, 0.2195676 , 0.41929387, 0.73065418,
       0.07845937, 0.9831193 , 0.62912276, 0.31523576, 0.87114754,
       0.23769825, 0.22727389, 0.6950789 , 0.34037443, 0.57794353,
       0.19646675, 0.33495768, 0.99345489, 0.99391669, 0.86274764,
       0.5066727 , 0.33915629, 0.21457539, 0.53186746, 0.21672131,
       0.23912126, 0.0417351 , 0.22314976, 0.70038254, 0.1615942 ,
       0.78799706, 0.09123176, 0.40251659, 0.7207201 , 0.42979251,
       0.60212524, 0.34457993, 0.69993738, 0.34907155, 0.07933136,
       0.04607905, 0.44650912, 0.30477793, 0.55071479, 0.09215

In [39]:
np.where(tuned_probas > 0.5, 1, 0)

array([1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,