In [16]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score,accuracy_score,confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import GridSearchCV

from grid_search_utils import plot_grid_search,table_grid_search

from data_prepare_func import convert_to_array

In [17]:
x_train,y_train = convert_to_array("data_train/",size=28)
x_test,y_test = convert_to_array('data_test/',size=28)

In [18]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [19]:
lr = LogisticRegression()

In [20]:
lr.fit(x_train,y_train)

train_yhat = lr.predict(x_train)
train_acc = accuracy_score(train_yhat,y_train)

test_yhat = lr.predict(x_test)
test_acc = accuracy_score(test_yhat,y_test)


print('Train Score\n',classification_report(y_train,train_yhat))
print('Test Score\n',classification_report(y_test,test_yhat))
print(f"AVG Accuracy-Score Train: {train_acc}\nAVG Accuracy-Score Test: {test_acc}")

Train Score
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       173
           1       1.00      1.00      1.00       175
           2       1.00      1.00      1.00       177
           3       1.00      1.00      1.00       178
           4       1.00      1.00      1.00       174
           5       1.00      1.00      1.00       175
           6       1.00      1.00      1.00       174
           7       1.00      1.00      1.00       175
           8       1.00      1.00      1.00       174
           9       1.00      1.00      1.00       178

    accuracy                           1.00      1753
   macro avg       1.00      1.00      1.00      1753
weighted avg       1.00      1.00      1.00      1753

Test Score
               precision    recall  f1-score   support

           0       0.89      0.85      0.87        91
           1       0.90      0.71      0.80        90
           2       0.58      0.48      0.52        90

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
parameters = {
    # 'penalty': ['l1', 'l2', 'elasticnet'],
    'C' : [0.001, 0.01, 0.1,1],
    'solver': [ 'newton-cg','sag','saga','lbfgs'],
    'max_iter' : [50,100],
    'multi_class' : ['auto','ovr','multinomial']
}

In [22]:
# 50m 20.9s
grid = GridSearchCV(lr,parameters,cv=5,n_jobs=-1,scoring='accuracy')
grid.fit(x_train, y_train)
print("Best parameter (CV score=%0.3f):" % grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

Best parameter (CV score=0.857):
{'C': 0.01, 'max_iter': 50, 'multi_class': 'auto', 'solver': 'saga'}
LogisticRegression(C=0.01, max_iter=50, solver='saga')




In [29]:
lr1 = LogisticRegression(C= 0.01, max_iter= 50, multi_class = 'auto', solver = 'saga')
# lr1 = LogisticRegression(C= 1, max_iter= 100, multi_class = 'ovr', solver = 'sag')

In [30]:
lr1

LogisticRegression(C=0.01, max_iter=50, solver='saga')

In [31]:
lr1.fit(x_train,y_train)

train_yhat = lr1.predict(x_train)
train_acc = accuracy_score(train_yhat,y_train)

test_yhat = lr1.predict(x_test)
test_acc = accuracy_score(test_yhat,y_test)


print('Train Score\n',classification_report(y_train,train_yhat))
print('Test Score\n',classification_report(y_test,test_yhat))
print(f"AVG Accuracy-Score Train: {train_acc}\nAVG Accuracy-Score Test: {test_acc}")

Train Score
               precision    recall  f1-score   support

           0       1.00      0.99      1.00       173
           1       0.99      0.99      0.99       175
           2       0.99      1.00      1.00       177
           3       0.99      0.99      0.99       178
           4       0.98      0.94      0.96       174
           5       0.97      1.00      0.98       175
           6       1.00      0.99      1.00       174
           7       0.98      0.98      0.98       175
           8       0.97      0.96      0.97       174
           9       0.97      0.99      0.98       178

    accuracy                           0.98      1753
   macro avg       0.98      0.98      0.98      1753
weighted avg       0.98      0.98      0.98      1753

Test Score
               precision    recall  f1-score   support

           0       0.86      0.84      0.85        91
           1       0.95      0.64      0.77        90
           2       0.79      0.49      0.60        90


The max_iter was reached which means the coef_ did not converge



In [24]:
plot_grid_search(grid)

In [25]:
table_grid_search(grid)


this method is deprecated in favour of `Styler.hide(axis="index")`

