In [None]:
import pandas as pd

# Read training data
training_binary = pd.read_csv("../input/training_binary.csv")
y_train = training_binary['cuisine']
x_train = training_binary.drop(['cuisine', 'id'],axis=1)

# Read test data
test_binary = pd.read_csv("../input/test_binary.csv")
y_test = test_binary['cuisine']
x_test = test_binary.drop(['cuisine', 'id'],axis=1)

In [None]:
from sklearn.linear_model import LogisticRegression

# Create logistic classifier using default parameters
clf = LogisticRegression()
clf.fit(x_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix

# Confusing matrix training data default parameters
training_predictions = clf.predict(x_train)
cm = pd.DataFrame(confusion_matrix(y_train,training_predictions,labels=list(set(y_train))),index=list(set(y_train)),columns=list(set(y_train)))

In [None]:
# Performance results using default parameters

from sklearn.metrics import precision_score, recall_score, accuracy_score

print("Accuracy: {}".format(accuracy_score(y_train, training_predictions)))
print("Number of inequalities: {}\n".format((y_train!=training_predictions).sum()))

# Precision
print("Average precision rate: {}".format(precision_score(y_train, training_predictions, average='micro')))

# Recall
print("Average recall rate: {}".format(recall_score(y_train, training_predictions, average='micro')))

In [None]:
from sklearn.model_selection import GridSearchCV

grid_params = {
    'C': [0.001,0.01,0.1,1,10,100],
    'penalty': ['l1','l2']
}

gridsearch = GridSearchCV(LogisticRegression(), grid_params)
gridsearch.fit(x_train, y_train)

In [None]:
pd.DataFrame(gridsearch.cv_results_)

In [None]:
clf = LogisticRegression(C = 1, penalty='l2')
clf.fit(x_train, y_train)

# Confusing matrix training data gridsearch found parameters
training_predictions = clf.predict(x_train)
cm_training = pd.DataFrame(confusion_matrix(y_train,training_predictions,labels=list(set(y_train))),index=list(set(y_train)),columns=list(set(y_train)))

# Confusing matrix test data gridsearch found parameters
test_predictions = clf.predict(x_test)
cm_test = pd.DataFrame(confusion_matrix(y_test,test_predictions,labels=list(set(y_test))),index=list(set(y_test)),columns=list(set(y_test)))

In [None]:
# Training confusion matrix
cm_training

In [None]:
print("Accuracy on training set: {}".format(accuracy_score(y_train, training_predictions)))
print("Precision on training set: {}".format(precision_score(y_train, training_predictions, average='micro')))
print("Recall on training set: {}".format(recall_score(y_train, training_predictions, average='micro')))

In [None]:
# Test confusion matrix
cm_test

In [None]:
print("Accuracy on test set: {}".format(accuracy_score(y_test, test_predictions)))
print("Precision on test set: {}".format(precision_score(y_test, test_predictions, average='micro')))
print("Recall on test set: {}".format(recall_score(y_test, test_predictions, average='micro')))