In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

In [4]:
X,y = make_classification(n_samples=1000,n_features=10,n_classes=3,random_state=20,n_informative=3)

In [6]:
X.shape

(1000, 10)

In [11]:
y

array([1, 2, 1, 2, 2, 0, 0, 2, 0, 1, 2, 1, 2, 1, 0, 2, 1, 1, 2, 0, 2, 1,
       0, 1, 2, 1, 0, 1, 1, 0, 0, 0, 1, 0, 2, 1, 0, 0, 0, 1, 0, 2, 0, 1,
       0, 0, 1, 0, 0, 0, 2, 2, 1, 1, 2, 0, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0,
       2, 0, 2, 2, 0, 1, 1, 0, 2, 1, 2, 1, 2, 0, 0, 0, 2, 2, 0, 0, 2, 1,
       0, 2, 2, 2, 0, 1, 0, 2, 2, 2, 0, 0, 0, 1, 1, 2, 2, 2, 2, 1, 0, 0,
       2, 0, 0, 2, 1, 0, 1, 0, 0, 1, 0, 1, 2, 2, 0, 2, 0, 0, 1, 0, 1, 2,
       2, 1, 2, 0, 0, 2, 1, 2, 0, 0, 1, 2, 1, 0, 0, 2, 1, 1, 2, 1, 2, 0,
       0, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 1, 2, 1, 2, 0, 0, 0, 2, 1, 2, 1,
       2, 1, 1, 2, 0, 0, 1, 2, 0, 0, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2,
       1, 0, 0, 2, 1, 1, 2, 1, 2, 2, 1, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 1,
       0, 1, 0, 2, 0, 2, 1, 1, 0, 1, 0, 1, 0, 1, 2, 0, 1, 0, 0, 0, 1, 0,
       0, 2, 0, 0, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 0, 0, 2, 0, 2, 2, 2, 1,
       1, 2, 1, 2, 2, 2, 0, 1, 2, 2, 0, 2, 2, 1, 1, 2, 1, 0, 2, 0, 1, 2,
       0, 2, 2, 2, 1, 1, 2, 2, 0, 1, 2, 0, 1, 2, 2,

In [8]:
# train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=10)

In [9]:
X_train.shape

(700, 10)

## model using multiclass = 'ovr'

In [10]:
# model training
from sklearn.linear_model import LogisticRegression

In [12]:
logistic = LogisticRegression(multi_class='ovr')

In [13]:
logistic.fit(X_train,y_train)



In [14]:
logistic.score(X_test,y_test)

0.7666666666666667

In [15]:
y_pred = logistic.predict(X_test)

In [16]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [17]:
score = accuracy_score(y_test,y_pred)
score

0.7666666666666667

In [19]:
matrix = confusion_matrix(y_test,y_pred)
print(matrix)
print(classification_report(y_test,y_pred))

[[71 13  9]
 [11 73 10]
 [16 11 86]]
              precision    recall  f1-score   support

           0       0.72      0.76      0.74        93
           1       0.75      0.78      0.76        94
           2       0.82      0.76      0.79       113

    accuracy                           0.77       300
   macro avg       0.77      0.77      0.77       300
weighted avg       0.77      0.77      0.77       300



## onevsrest lib

In [20]:
from sklearn.multiclass import OneVsRestClassifier

In [21]:
model = OneVsRestClassifier(LogisticRegression())

In [22]:
model.fit(X_train,y_train)

In [23]:
model.score(X_test,y_test)

0.7666666666666667

## Hyper parameter tuning

In [24]:
from sklearn.model_selection import GridSearchCV

In [41]:
# param_grid = {
#     'C': [0.001, 0.01, 0.1, 1, 10, 100],       # Regularization strength
#     'penalty': ['l2'],                         # Regularization type
#     'solver': ['newton-cg', 'lbfgs', 'saga'],  # Solvers supporting multi-class
#     'class_weight': [None, 'balanced'],        # Handle imbalanced classes
#     'max_iter': [100, 200, 500],               # Maximum iterations
# }


# change the param grid
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],  # Wider range
    'penalty': ['l2'],
    'solver': ['newton-cg', 'lbfgs', 'saga'],
    'class_weight': [None, 'balanced'],
    'max_iter': [100, 200, 500, 1000],          # Increased maximum iterations
}


In [42]:
log_reg = LogisticRegression()

In [43]:
grid = GridSearchCV(estimator=log_reg,param_grid=param_grid,scoring='accuracy',cv=5,verbose=1)

In [44]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 168 candidates, totalling 840 fits


In [45]:
grid.best_params_

{'C': 0.1,
 'class_weight': None,
 'max_iter': 100,
 'penalty': 'l2',
 'solver': 'newton-cg'}

In [46]:
grid.score(X_test,y_test)

0.7666666666666667

In [47]:
grid.best_score_

0.7571428571428571

In [48]:
score = accuracy_score(y_test,grid.predict(X_test))
score

0.7666666666666667

## randomize searchCV

In [49]:
from sklearn.model_selection import RandomizedSearchCV

In [50]:
random = RandomizedSearchCV(estimator=LogisticRegression(),param_distributions=param_grid,scoring='accuracy',n_jobs=-1,cv=5,random_state=20)

In [51]:
random.fit(X_train,y_train)

In [52]:
random.best_score_

0.7571428571428571