In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

## Load Data

In [8]:
bunch = load_breast_cancer()
features = pd.DataFrame(bunch['data'], columns=bunch['feature_names'])
target = pd.Series(bunch['target'], name=bunch['target_names'][0])

## Split

In [9]:
from sklearn.model_selection import train_test_split

test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=42)

## Model Training

We're not here for the best model, just to get an example.

In [18]:
from sklearn.linear_model import LogisticRegression
# We need to increase the number of iterations to converge.
lr = LogisticRegression(max_iter=10000)

In [19]:
lr.fit(X_train, y_train)

How do we do on the test set?

In [20]:
lr.score(X_test, y_test)

0.956140350877193

In [21]:
lr.predict(X_test)

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 0])

## Save model

In [22]:
lr

In [23]:
import pickle
from pathlib import Path

In [24]:
pickle_path = Path("breast_cancer_regression.pickle")
with open(pickle_path, "wb") as f:
    pickle.dump(lr, f)

Load it back in just to be sure it worked properly.

In [25]:
with open(pickle_path, "rb") as f:
    model2 = pickle.load(f)

In [26]:
model2.score(X_test, y_test)

0.956140350877193