In [34]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, KFold

In [38]:
cancer = datasets.load_breast_cancer()

logistic_regression = LogisticRegression(max_iter=10000)

X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, random_state=12, test_size=0.2)

###Create model
model = logistic_regression.fit(X_train, y_train)
    
predictions = model.predict(X_test)

###Accuracy on train and test sets
print('Train accuracy: {:.2f}'.format(model.score(X_train, y_train)))
print('Test accuracy: {:.2f}'.format(model.score(X_test, y_test)))

###Metrics
print('Accuracy: {:.2f}'.format(metrics.accuracy_score(y_test, predictions)))
print('ROC AUC: {:.2f}'.format(metrics.roc_auc_score(y_test, predictions)))
print('F1: {:.2f}'.format(metrics.f1_score(y_test, predictions)))
print('MSE: {:.2f}'.format(metrics.mean_squared_error(y_test, predictions)))

Train accuracy: 0.96
Test accuracy: 0.94
Accuracy: 0.94
ROC AUC: 0.93
F1: 0.95
MSE: 0.06


In [39]:
cv = KFold(n_splits=5) ### Train with 5 folds

cv_score = cross_val_score (logistic_regression, X_train, y_train,
                           scoring='accuracy', cv=cv)

print('Cross val score: {}'.format(cv_score))
print('Mean cross val score: {}'.format(cv_score.mean()))

Cross val score: [0.92307692 0.96703297 0.95604396 0.97802198 0.94505495]
Mean cross val score: 0.9538461538461538
