In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score,accuracy_score,confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import GridSearchCV

from grid_search_utils import plot_grid_search,table_grid_search

from data_prepare_func import convert_to_array

In [2]:
x_kit,y_kit = convert_to_array("data_fr_kittinan/",size=28)
x_diy,y_diy = convert_to_array('data_writing_diy/',size=28)

In [3]:
x = np.append(x_kit,x_diy,axis=0)

y = np.append(y_kit,y_diy,axis=0)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42,stratify=y)

In [9]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [10]:
lr = LogisticRegression()

In [11]:
lr.fit(x_train,y_train)

train_yhat = lr.predict(x_train)
train_acc = accuracy_score(train_yhat,y_train)

test_yhat = lr.predict(x_test)
test_acc = accuracy_score(test_yhat,y_test)


print('Train Score\n',classification_report(y_train,train_yhat))
print('Test Score\n',classification_report(y_test,test_yhat))
print(f"AVG Accuracy-Score Train: {train_acc}\nAVG Accuracy-Score Test: {test_acc}")

Train Score
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       185
           1       1.00      0.99      1.00       185
           2       1.00      0.99      0.99       187
           3       0.99      0.97      0.98       188
           4       0.97      0.94      0.96       185
           5       0.93      0.93      0.93       185
           6       0.87      0.97      0.92       185
           7       0.99      0.98      0.98       185
           8       0.99      0.96      0.98       184
           9       0.98      0.97      0.97       190

    accuracy                           0.97      1859
   macro avg       0.97      0.97      0.97      1859
weighted avg       0.97      0.97      0.97      1859

Test Score
               precision    recall  f1-score   support

           0       0.88      0.91      0.89        79
           1       0.95      0.89      0.92        80
           2       0.85      0.72      0.78        80

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
parameters = {
    # 'penalty': ['l1', 'l2', 'elasticnet'],
    'C' : [0.001, 0.01, 0.1,1],
    'solver': [ 'newton-cg','sag','saga','lbfgs'],
    'max_iter' : [50,100],
    'multi_class' : ['auto','ovr','multinomial']
}

In [13]:
# 50m 20.9s
grid = GridSearchCV(lr,parameters,cv=5,n_jobs=-1,scoring='accuracy')
grid.fit(x_train, y_train)
print("Best parameter (CV score=%0.3f):" % grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

Best parameter (CV score=0.789):
{'C': 0.01, 'max_iter': 50, 'multi_class': 'auto', 'solver': 'sag'}
LogisticRegression(C=0.01, max_iter=50, solver='sag')




In [14]:
lr1 = LogisticRegression(C= 0.01, max_iter= 50, multi_class = 'auto', solver = 'sag')
# lr1 = LogisticRegression(C= 1, max_iter= 100, multi_class = 'ovr', solver = 'sag')

In [15]:
lr1

LogisticRegression(C=0.01, max_iter=50, solver='sag')

In [16]:
lr1.fit(x_train,y_train)

train_yhat = lr1.predict(x_train)
train_acc = accuracy_score(train_yhat,y_train)

test_yhat = lr1.predict(x_test)
test_acc = accuracy_score(test_yhat,y_test)


print('Train Score\n',classification_report(y_train,train_yhat))
print('Test Score\n',classification_report(y_test,test_yhat))
print(f"AVG Accuracy-Score Train: {train_acc}\nAVG Accuracy-Score Test: {test_acc}")

Train Score
               precision    recall  f1-score   support

           0       0.99      0.98      0.99       185
           1       0.99      0.98      0.99       185
           2       0.96      0.99      0.98       187
           3       0.98      0.97      0.98       188
           4       0.99      0.89      0.94       185
           5       0.89      0.91      0.90       185
           6       0.86      0.96      0.91       185
           7       0.95      0.93      0.94       185
           8       0.97      0.90      0.93       184
           9       0.92      0.97      0.94       190

    accuracy                           0.95      1859
   macro avg       0.95      0.95      0.95      1859
weighted avg       0.95      0.95      0.95      1859

Test Score
               precision    recall  f1-score   support

           0       0.92      0.96      0.94        79
           1       0.93      0.93      0.93        80
           2       0.93      0.81      0.87        80



In [17]:
plot_grid_search(grid)

In [25]:
table_grid_search(grid)


this method is deprecated in favour of `Styler.hide(axis="index")`

