# Logistic Regression with Implementation

## Introduction

Logistic regression is one of the most fundamental machine learning models for binary classification. I will summarize its methodology and implement it from scratch using NumPy.

The problem we solve is **binary classification,** for example, the doctor would like to base on patients's features, including mean radius, mean texture, etc, to classify breat cancer into one of the following two case:

- "malignant":  𝑦=1 
- "benign":  𝑦=0 

which correspond to serious and gentle case respectively.

We will load the breast cancer data from scikit-learn as a toy dataset, and split the data into the training and test datasets.

## Logistic Regression Model

[To be continued.]

## Numpy Implementation of Logistic Regression

In [1]:
import random
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import tensorflow as tf

In [2]:
np.random.seed(71)

In [3]:
class LogisticRegression(object):
    """Numpy implementation of Logistic Regression."""

    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_weights(self):
        """Create model weights and bias."""
        self.w = np.zeros(self.n_inputs).reshape(self.n_inputs, 1)
        self.b = np.zeros(1).reshape(1, 1)

    def _logit(self, X):
        """Logit: unnormalized log probability."""
        return np.matmul(X, self.w) + self.b

    def _sigmoid(self, logit):
        """Sigmoid function by stabilization trick.

        sigmoid(z) = 1 / (1 + exp(-z)) 
                   = exp(z) / (1 + exp(z)) * exp(z_max) / exp(z_max)
                   = exp(z - z_max) / (exp(-z_max) + exp(z - z_max)),
        where z is the logit, and z_max = z - max(0, z).
        """
        logit_max = np.maximum(0, logit)
        logit_stable = logit - logit_max
        return np.exp(logit_stable) / (np.exp(-logit_max) + np.exp(logit_stable))
    
    def _model(self, X):
        """Logistic regression model."""
        logit = self._logit(X)
        return self._sigmoid(logit)

    def _loss(self, y, logit):
        """Cross entropy loss by stabilizaiton trick.

        cross_entropy_loss(y, z) 
          = - 1/n * \sum_{i=1}^n y_i * log p(y_i = 1|x_i) + (1 - y_i) * log p(y_i = 0|x_i)
          = - 1/n * \sum_{i=1}^n y_i * (z_i - log(1 + exp(z_i))) + (1 - y_i) * (-log(1 + exp(z_i))),
        where z is the logit, z_max = z - max(0, z),
          log p(y = 1|x)
            = log (1 / (1 + exp(-z))) 
            = log (exp(z) / (1 + exp(z)))
            = z - log(1 + exp(z))
        and 
          log(1 + exp(z)) := logsumexp(z)
            = log(exp(0) + exp(z))
            = log(exp(0) + exp(z) * exp(z_max) / exp(z_max))
            = z_max + log(exp(-z_max) + exp(z - z_max)).
        """
        logit_max = np.maximum(0, logit)
        logit_stable = logit - logit_max
        logsumexp_stable = logit_max + np.log(np.exp(-logit_max) + np.exp(logit_stable))
        self.cross_entropy = -(y * (logit - logsumexp_stable) + (1 - y) * (-logsumexp_stable))
        return np.mean(self.cross_entropy)

    def _optimize(self, X, y):
        """Optimize by stochastic gradient descent."""
        m = X.shape[0]

        y_ = self._model(X) 
        dw = 1 / m * np.matmul(X.T, y_ - y)
        db = np.mean(y_ - y)

        for (param, grad) in zip([self.w, self.b], [dw, db]):
            param[:] = param - self.lr * grad

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        self._create_weights()

        for epoch in range(1, self.n_epochs + 1):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                y_train_b = y_train_b.reshape((y_train_b.shape[0], -1))
                self._optimize(X_train_b, y_train_b)
                train_loss = self._loss(y_train_b, self._logit(X_train_b))
                total_loss += train_loss * X_train_b.shape[0]

            if epoch % 100 == 0:
                print('epoch {0}: training loss {1}'.format(epoch, total_loss / self.n_examples))

        return self

    def get_coeff(self):
        return self.b, self.w.reshape((-1,))

    def predict(self, X):
        return self._model(X).reshape((-1,))

## Data Preparation and Preprocessing

In [5]:
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression as LogisticRegressionSklearn

import sys
sys.path.append('../numpy/')
from metrics import accuracy

In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
# Read breast cancer data.
X, y = load_breast_cancer(return_X_y=True)

In [8]:
X.shape, y.shape

((569, 30), (569,))

In [9]:
X[:3]

array([[1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
        3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
        8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
        3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
        1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, 1.326e+03, 8.474e-02, 7.864e-02,
        8.690e-02, 7.017e-02, 1.812e-01, 5.667e-02, 5.435e-01, 7.339e-01,
        3.398e+00, 7.408e+01, 5.225e-03, 1.308e-02, 1.860e-02, 1.340e-02,
        1.389e-02, 3.532e-03, 2.499e+01, 2.341e+01, 1.588e+02, 1.956e+03,
        1.238e-01, 1.866e-01, 2.416e-01, 1.860e-01, 2.750e-01, 8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, 1.203e+03, 1.096e-01, 1.599e-01,
        1.974e-01, 1.279e-01, 2.069e-01, 5.999e-02, 7.456e-01, 7.869e-01,
        4.585e+00, 9.403e+01, 6.150e-03, 4.006e-02, 3.832e-02, 2.058e-02,
        2.250e-02, 4.571e-03, 2.357e

In [10]:
y[:3]

array([0, 0, 0])

In [11]:
# Split data into training and test datasets.
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=71, shuffle=True, stratify=y)

In [12]:
print(X_train_raw.shape, y_train.shape)
print(X_test_raw.shape, y_test.shape)

(426, 30) (426,)
(143, 30) (143,)


In [13]:
# Feature engineering for standardizing features by min-max scaler.
min_max_scaler = MinMaxScaler()

X_train = min_max_scaler.fit_transform(X_train_raw)
X_test = min_max_scaler.transform(X_test_raw)

In [14]:
# Convert arrays to float32.
X_train, X_test, y_train, y_test = (
    np.float32(X_train), np.float32(X_test), np.float32(y_train), np.float32(y_test))

In [15]:
X_train.dtype, y_train.dtype

(dtype('float32'), dtype('float32'))

## Fitting Logistic Regression in NumPy

In [16]:
# Fit our Logistic Regression.
logreg = LogisticRegression(batch_size=64, lr=1, n_epochs=1000)

In [17]:
# Get datasets and build graph.
logreg.get_data(X_train, y_train, shuffle=True)

In [18]:
logreg.fit()

epoch 100: training loss 0.10443525190662527
epoch 200: training loss 0.08464973705789447
epoch 300: training loss 0.07583937135941907
epoch 400: training loss 0.0705509468784471
epoch 500: training loss 0.06690170616830086
epoch 600: training loss 0.06417783027799635
epoch 700: training loss 0.062042051819268855
epoch 800: training loss 0.060310190425810026
epoch 900: training loss 0.058870828091813716
epoch 1000: training loss 0.05765140534755181


<__main__.LogisticRegression at 0x7ff8b01a4da0>

In [19]:
# Get coefficient.
logreg.get_coeff()

(array([[16.49769365]]),
 array([-1.23763372, -3.14823343, -1.28327428, -2.66976833, -1.26073015,
         1.70359925, -4.61596622, -6.73301166, -1.78754085,  2.78448706,
        -8.13104215, -0.45917356, -6.31535977, -4.8076773 ,  1.5589128 ,
         4.22185062,  1.95796959,  1.21993   ,  3.15256459,  3.1790416 ,
        -5.71007754, -4.96241985, -5.04347022, -5.31482161, -3.47833738,
        -0.64314481, -4.26656352, -5.36190171, -3.13587351, -1.2303652 ]))

In [20]:
# Predicted probabilities for training data.
p_train_ = logreg.predict(X_train)
p_train_[:10]

array([9.95420960e-01, 9.14158593e-13, 9.60845611e-05, 9.98332667e-01,
       9.99162754e-01, 9.99462799e-01, 4.39143844e-03, 1.27917133e-03,
       9.99966843e-01, 1.38193279e-02])

In [21]:
# Predicted labels for training data.
y_train_ = (p_train_ > 0.5) * 1
y_train_[:3]

array([1, 0, 0])

In [22]:
# Prediction accuracy for training data.
accuracy(y_train, y_train_)

0.9859154929577465

In [23]:
# Predicted label correctness for test data.
p_test_ = logreg.predict(X_test)
print(p_test_[:10])
y_test_ = (p_test_ > 0.5) * 1

[1.62551196e-04 9.99932646e-01 3.80863555e-03 9.97752557e-01
 5.92904880e-05 9.99873548e-01 3.75842444e-06 9.99999685e-01
 4.65653085e-01 6.78878009e-09]


In [24]:
# Prediction accuracy for test data.
accuracy(y_test, y_test_)

0.9790209790209791

## PyTorch Implementation of Logistic Regression

In [25]:
class LogisticRegressionTorch(nn.Module):
    """PyTorch implementation of Logistic Regression."""

    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        super(LogisticRegressionTorch, self).__init__()
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_model(self):
        """Create logistic regression model."""
        self.model = nn.Sequential(
            nn.Linear(self.n_inputs, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        y = self.model(x)
        return y

    def _create_loss(self):
        """Create (binary) cross entropy loss."""
        self.criterion = nn.BCELoss()

    def _create_optimizer(self):
        """Create optimizer by stochastic gradient descent."""
        self.optimizer = optim.SGD(self.parameters(), lr=self.lr)

    def build(self):
        """Build model, loss function and optimizer."""
        self._create_model()
        self._create_loss()
        self._create_optimizer()

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        for epoch in range(1, self.n_epochs + 1):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                # Convert to Tensor from NumPy array and reshape ys.
                X_train_b, y_train_b = (
                    torch.from_numpy(X_train_b), 
                    torch.from_numpy(y_train_b).view(-1, 1))

                y_pred_b = self.model(X_train_b)
                batch_loss = self.criterion(y_pred_b, y_train_b)
                total_loss += batch_loss * X_train_b.shape[0]

                # Zero grads, performs backward pass, and update weights.
                self.optimizer.zero_grad()
                batch_loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print('Epoch {0}: training loss: {1}'
                      .format(epoch, total_loss / self.n_examples))

    def get_coeff(self):
        """Get model coefficients."""
        # Detach var which require grad.
        return (self.model[0].bias.detach().numpy(),
                self.model[0].weight.detach().numpy())

    def predict(self, X):
        """Predict for new data."""
        with torch.no_grad():
            X_ = torch.from_numpy(X)
            return self.model(X_).numpy().reshape((-1,))

## Fitting Logistic Regression in PyTorch

In [26]:
# Fit PyTorch Logistic Regression.
logreg_torch = LogisticRegressionTorch(batch_size=64, lr=0.5, n_epochs=1000)

In [27]:
logreg_torch.get_data(X_train, y_train, shuffle=True)

In [28]:
logreg_torch.build()

In [29]:
logreg_torch.model

Sequential(
  (0): Linear(in_features=30, out_features=1, bias=True)
  (1): Sigmoid()
)

In [30]:
logreg_torch.fit()

Epoch 100: training loss: 0.13479387760162354
Epoch 200: training loss: 0.10654068738222122
Epoch 300: training loss: 0.09373412281274796
Epoch 400: training loss: 0.08608490973711014
Epoch 500: training loss: 0.0808713510632515
Epoch 600: training loss: 0.077021524310112
Epoch 700: training loss: 0.07402186095714569
Epoch 800: training loss: 0.07159381359815598
Epoch 900: training loss: 0.06957228481769562
Epoch 1000: training loss: 0.06785281747579575


In [31]:
# Get coefficient.
logreg_torch.get_coeff()

(array([13.916823], dtype=float32),
 array([[-1.4268652 , -2.549197  , -1.5523304 , -2.5370297 , -0.7911085 ,
          0.5448145 , -4.0649996 , -5.915671  , -1.0603013 ,  2.8874927 ,
         -5.3981338 , -0.49421582, -4.305504  , -3.3629694 ,  0.9630922 ,
          3.2628772 ,  1.6829526 ,  1.0362617 ,  2.422982  ,  2.5293124 ,
         -4.5462875 , -4.24545   , -4.2334833 , -4.2405033 , -2.9201255 ,
         -0.96793085, -3.162557  , -4.6634545 , -2.2361047 , -0.7845473 ]],
       dtype=float32))

In [32]:
# Predicted probabilities for training data.
p_train_ = logreg_torch.predict(X_train)
p_train_[:10]

array([9.9009502e-01, 4.7314323e-11, 2.9769778e-04, 9.9505866e-01,
       9.9718696e-01, 9.9842620e-01, 7.1979985e-03, 2.7312785e-03,
       9.9983168e-01, 1.9007621e-02], dtype=float32)

In [33]:
# Predicted labels for training data.
y_train_ = (p_train_ > 0.5) * 1
y_train_[:3]

array([1, 0, 0])

In [34]:
# Prediction accuracy for training data.
accuracy(y_train, y_train_)

0.9812206572769953

In [35]:
# Predicted label correctness for test data.
p_test_ = logreg_torch.predict(X_test)
print(p_test_[:10])
y_test_ = (p_test_ > 0.5) * 1

[6.8844593e-04 9.9967468e-01 1.7188739e-02 9.9625444e-01 3.1697255e-04
 9.9954104e-01 2.4362822e-05 9.9999559e-01 5.2819997e-01 1.0668931e-07]


In [36]:
# Prediction accuracy for test data.
accuracy(y_test, y_test_)

0.965034965034965

## TensorFlow Implementation of Logistic Regression

In [37]:
def reset_tf_graph(seed=71):
    """Reset default TensorFlow graph."""
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


class LogisticRegressionTF(object):
    """A TensorFlow implementation of Logistic Regression."""

    def __init__(self, batch_size=64, learning_rate=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.learning_rate = learning_rate

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        idx = list(range(self.n_examples))
        if shuffle:
            random.shuffle(idx)
        self.X_train = self.X_train[idx]
        self.y_train = self.y_train[idx]

    def _create_placeholders(self):
        """Create placeholder for features and labels."""
        self.X = tf.placeholder(tf.float32, shape=(None, self.n_inputs), name='X')
        self.y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

    def _create_weights(self):
        """Create and initialize model weights and bias."""
        self.w = tf.get_variable(shape=[self.n_inputs, 1],
                                 initializer=tf.random_normal_initializer(),
                                 name='weights')
        self.b = tf.get_variable(shape=[1],
                                 initializer=tf.zeros_initializer(),
                                 name='bias')

    def _logit(self, X):
        """Logit: unnormalized log probability."""
        return tf.matmul(X, self.w) + self.b

    def _model(self, X):
        """Logistic regression model."""
        logits = self._logit(X)
        return tf.math.sigmoid(logits)

    def _create_model(self):
        # Create logistic regression model.
        self.logits = self._logit(self.X)

    def _create_loss(self):
        # Create cross entropy loss.
        self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=self.y,
            logits=self.logits,
            name='cross_entropy')
        self.loss = tf.reduce_mean(self.cross_entropy, name='loss')

    def _create_optimizer(self):
        # Create gradient descent optimization.
        self.optimizer = (
            tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            .minimize(self.loss))

    def build_graph(self):
        """Build computational graph."""
        self._create_placeholders()
        self._create_weights()
        self._create_model()
        self._create_loss()
        self._create_optimizer()

    def _fetch_batch(self): 
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train[idx_batch, :], self.y_train[idx_batch].reshape(-1, 1))

    def fit(self):
        """Fit model."""
        saver = tf.train.Saver()

        with tf.Session() as sess:            
            sess.run(tf.global_variables_initializer())

            for epoch in range(1, self.n_epochs + 1):
                total_loss = 0
                for X_train_b, y_train_b in self._fetch_batch():
                    feed_dict = {self.X: X_train_b, self.y: y_train_b}
                    _, batch_loss = sess.run([self.optimizer, self.loss],
                                             feed_dict=feed_dict)
                    total_loss += batch_loss * X_train_b.shape[0]

                if epoch % 100 == 0:
                    print('Epoch {0}: training loss: {1}'
                          .format(epoch, total_loss / self.n_examples))

            # Save model.
            saver.save(sess, 'checkpoints/logreg')

    def get_coeff(self):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/logreg')
            return self.b.eval(), self.w.eval().reshape((-1,))

    def predict(self, X):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/logreg')
            return self._model(X).eval().reshape((-1,))

## Fitting Logistic Regression in TensorFlow

In [38]:
reset_tf_graph()
logreg_tf = LogisticRegressionTF(batch_size=64, learning_rate=0.5, n_epochs=1000)

In [39]:
logreg_tf.get_data(X_train, y_train, shuffle=True)

In [40]:
logreg_tf.build_graph()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [41]:
logreg_tf.fit()

Epoch 100: training loss: 0.13781684776986708
Epoch 200: training loss: 0.10861393060762557
Epoch 300: training loss: 0.0952674589456527
Epoch 400: training loss: 0.08727941794994291
Epoch 500: training loss: 0.08183460252385744
Epoch 600: training loss: 0.07781627970122396
Epoch 700: training loss: 0.0746875535084608
Epoch 800: training loss: 0.07215703943385765
Epoch 900: training loss: 0.07005189100341618
Epoch 1000: training loss: 0.06826270389165118


In [42]:
logreg_tf.get_coeff()

INFO:tensorflow:Restoring parameters from checkpoints/logreg


(array([13.403018], dtype=float32),
 array([-0.54071563, -3.1991444 , -1.1178484 , -2.0254283 , -0.46014005,
         0.20824195, -3.6817396 , -7.0875072 , -1.1937386 ,  2.9473922 ,
        -4.1283927 ,  0.10490878, -4.998158  , -3.8820248 ,  0.65882015,
         2.992421  ,  2.3148916 ,  1.416513  ,  2.680786  ,  2.947883  ,
        -4.6859794 , -3.997797  , -4.4395123 , -4.3473682 , -2.385345  ,
        -1.0599583 , -3.4756687 , -4.733497  , -2.6979213 ,  0.02590139],
       dtype=float32))

In [43]:
# Predicted probabilities for training data.
p_train_ = logreg_tf.predict((tf.cast(X_train, dtype=tf.float32)))
print(p_train_[:10])

# Predicted labels for training data.
y_train_ = (p_train_ > 0.5) * 1
print(y_train_[:10])

# Prediction accuracy for training data.
accuracy(y_train, y_train_)

INFO:tensorflow:Restoring parameters from checkpoints/logreg
[9.8927605e-01 0.0000000e+00 2.9647350e-04 9.9541211e-01 9.9769449e-01
 9.9869823e-01 7.1364641e-03 3.1077266e-03 9.9985236e-01 2.0974845e-02]
[1 0 0 1 1 1 0 0 1 0]


0.9835680751173709

In [44]:
# Predicted probabilities for test data.
p_test_ = logreg_tf.predict((tf.cast(X_test, dtype=tf.float32)))
print(p_test_[:10])

# Predicted labels for training data.
y_test_ = (p_test_ > 0.5) * 1
y_test_[:3]

# Prediction accuracy for training data.
accuracy(y_test, y_test_)

INFO:tensorflow:Restoring parameters from checkpoints/logreg
[9.1996789e-04 9.9968386e-01 2.1772951e-02 9.9691284e-01 3.2475591e-04
 9.9951923e-01 2.4586916e-05 9.9999517e-01 4.9374732e-01 0.0000000e+00]


0.972027972027972

## Benchmark with Sklearn's Logistic Regression

In [53]:
# Fit sklearn's Logistic Regression.
logreg_sk = LogisticRegressionSklearn(C=1e4, solver='lbfgs', max_iter=500)

logreg_sk.fit(X_train, y_train.reshape(y_train.shape[0], ))

LogisticRegression(C=10000.0, max_iter=500)

In [54]:
# Get coefficients.
logreg_sk.intercept_, logreg_sk.coef_

(array([56.11602541]),
 array([[  53.63136958,  -27.25425984,   48.36703272,   10.51964358,
          -14.78868103,   98.61420511,  -52.60232735,  -52.14197294,
           -5.08109245,  -54.05120202,  -33.96603618,   -5.49346996,
          -19.37165788,  -44.0207036 ,   38.75866908,  -51.4588702 ,
           83.2564445 ,  -21.93862938,   14.99166539,   80.01396352,
          -59.13781692,   -3.93722124,  -63.6977547 , -103.97878785,
           -7.95987729,   20.00391142,  -21.92274902,  -21.35024576,
          -21.57544385,  -11.66180764]]))

In [55]:
# Predicted labels for training data.
p_train_ = logreg_sk.predict(X_train)
p_train_[:3]

array([1., 0., 0.], dtype=float32)

In [56]:
y_train_ = (p_train_ > 0.5) * 1

In [57]:
# Predicted label correctness for training data.
# y_pred_train == y_train

In [58]:
# Prediction accuracy for training data.
accuracy(y_train, y_train_)

1.0

In [59]:
# Predicted label correctness for test data.
p_test_ = logreg_sk.predict(X_test)
y_test_ = (p_test_ > 0.5) * 1

In [60]:
# # Prediction accuracy for test data.
accuracy(y_test, y_test_)

0.965034965034965