# Logistic Regression

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

from sklearn.base import BaseEstimator
from sklearn.datasets import make_blobs, make_circles, make_moons
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder

from scipy.special import expit, softmax

random_seed = 42

In [None]:
#@title
def plot_dataset(X, y, ax=None, marker='o'):
  ax = ax or plt.gca()
  x1, x2 = X[:, 0], X[:, 1]
  ax.scatter(x=x1, y=x2, c=y, s=150, cmap='coolwarm', 
             linewidth=2, edgecolor='k', marker=marker)


def create_domain_set(X):
  x1_min = X[:, 0].min()
  x1_max = X[:, 0].max()
  x2_min = X[:, 1].min()
  x2_max = X[:, 1].max()

  x1 = np.linspace(x1_min - 0.5, x1_max + 0.5, 100)
  x2 = np.linspace(x2_min - 0.5, x2_max + 0.5, 100)
  x1, x2 = np.meshgrid(x1, x2)
  X_line = np.stack((x1, x2)).T
  X_line = X_line.reshape(-1, 2)
  return x1, x2, X_line


def plot_contours(X, y, model, ax=None, transforms=[]):
  ax = ax or plt.gca()
  x1, x2, X_line = create_domain_set(X)
  for t in transforms:
    X_line = t.transform(X_line)
  y_pred = model.predict_proba(X_line)
  y_pred = y_pred.reshape(100, 100)
  ax.contourf(x1, x2, y_pred.T, cmap='coolwarm', levels=40)
  ax.set_title("Contour plots", size=16)


def plot_decision_boundary(X, y, model, ax=None, transforms=[]):
  ax = ax or plt.gca()
  x1, x2, X_line = create_domain_set(X)
  for t in transforms:
    X_line = t.transform(X_line)
  y_pred = model.predict(X_line)
  y_pred = y_pred.reshape(100, 100)
  ax.contourf(x1, x2, y_pred.T, cmap='coolwarm', levels=40)
  ax.set_title("Decision boundary", size=16)


def plot_parameters(model, ax=None):
  ax = ax or plt.gca()
  coef = model.coef_.flatten()
  labels = ['b%d' % (i) for i in range(len(coef) + 1)]
  params = np.insert(coef, 0, model.intercept_)
  ax.barh(labels, params)
  plt.title("Model parameters")

Binary logistic regression

In [None]:
X, y = make_blobs(n_samples=100, n_features=2, 
                  centers=2, random_state=random_seed)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X, y)

In [None]:
class LogisticRegression(BaseEstimator):

  def __init__(self, fit_intercept=True, lr=0.0001, max_iter=1000, tol=1e-4):
    self.fit_intercept = fit_intercept
    self.lr = lr
    self.max_iter = max_iter
    self.tol = tol
  
  def fit(self, X, y):
    if self.fit_intercept:
      X = self._add_bias_column(X)
    n_samples, n_features = X.shape
    w = np.zeros(n_features)

    for iter in range(self.max_iter):
      w_old = w.copy()
      mu = expit(X @ w)
      error = np.expand_dims(mu - y, axis=-1)
      grad = error * X
      w = w - self.lr * np.sum(grad, axis=0)

      if np.linalg.norm(w - w_old) < self.tol:
        break
    
    if self.fit_intercept:
      self.intercept_ = w[0]
      self.coef_ = w[1:]
    else:
      self.intercept_ = 0.0
      self.coef_ = w
  
  def predict_proba(self, X):
    probs = expit(self.intercept_ + X @ self.coef_)
    return probs

  def predict(self, X):
    probs = expit(self.intercept_ + X @ self.coef_)
    classes = np.round(probs)
    return classes
  
  def _add_bias_column(self, X):
    n_samples, n_features = X.shape
    bias = np.ones((n_samples, 1))
    X = np.hstack((bias, X))
    return X


In [None]:
ss = StandardScaler()
X = ss.fit_transform(X)

In [None]:
lr = LogisticRegression()
lr.fit(X, y)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, 
                               sharey=True, figsize=(10, 5))

plot_decision_boundary(X, y, lr, ax1)
plot_dataset(X, y, ax1)
plot_contours(X, y, lr, ax2)
plot_dataset(X, y, ax2)
fig.tight_layout()

In [None]:
plt.figure(figsize=(6, 4))
plot_parameters(lr)

Multinomial logistic regression

In [None]:
centers = np.array([[-4, 4], [0, -4], [4, 4]])
X, y = make_blobs(n_samples=100, n_features=2, 
                  centers=centers, random_state=random_seed)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X, y)

In [None]:
class MultinomialLogisticRegression(BaseEstimator):

  def __init__(self, fit_intercept=True, lr=0.0001, max_iter=1000, tol=1e-4):
    self.fit_intercept = fit_intercept
    self.lr = lr
    self.max_iter = max_iter
    self.tol = tol
  
  def fit(self, X, y):
    if self.fit_intercept:
      X = self._add_bias_column(X)
    n_samples, n_features = X.shape
    _, n_classes = y.shape
    w = np.zeros((n_classes, n_features))

    for iter in range(self.max_iter):
      w_old = w.copy()
      mu = softmax(X @ w)
      error = mu - y
      grad = error.T @ X
      w = w - self.lr * grad

      if np.linalg.norm(w - w_old) < self.tol:
        break
    
    if self.fit_intercept:
      self.intercept_ = w[:, 0][np.newaxis, :]
      self.coef_ = w[:, 1:]
    else:
      self.intercept_ = np.zeros((1, n_classes))
      self.coef_ = w
  
  def predict_proba(self, X):
    probs = softmax(self.intercept_ + X @ self.coef_.T, axis=-1)
    return probs

  def predict(self, X):
    probs = softmax(self.intercept_ + X @ self.coef_.T, axis=-1)
    classes = np.argmax(probs, axis=-1)
    return classes
  
  def _add_bias_column(self, X):
    n_samples, n_features = X.shape
    bias = np.ones((n_samples, 1))
    X = np.hstack((bias, X))
    return X


In [None]:
ohe = OneHotEncoder(sparse=False)
y_ohe = ohe.fit_transform(y[:, np.newaxis])

In [None]:
ss = StandardScaler()
X_std = ss.fit_transform(X)

In [None]:
lr = MultinomialLogisticRegression()
lr.fit(X_std, y_ohe)

plt.figure(figsize=(6, 6))
plot_decision_boundary(X, y, lr)
plot_dataset(X, y)

In [None]:
lr.coef_, lr.intercept_

Non-linear logistic classification

In [None]:
X, y = make_circles(n_samples=200, noise=0.08, 
                    factor=0.3, random_state=random_seed)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X, y)

In [None]:
ss = StandardScaler()
X_std = ss.fit_transform(X)

In [None]:
lr = LogisticRegression()
lr.fit(X_std, y)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(10, 5))
plot_decision_boundary(X, y, lr, ax1)
plot_dataset(X, y, ax1)
plot_contours(X, y, lr, ax2)
plot_dataset(X, y, ax2)
plt.tight_layout()

In [None]:
pf = PolynomialFeatures(degree=2, include_bias=False)
X_poly = pf.fit_transform(X)

In [None]:
X_poly_std = ss.fit_transform(X_poly)

In [None]:
lr = LogisticRegression()
lr.fit(X_poly_std, y)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(10, 5))
plot_decision_boundary(X, y, lr, ax1, transforms=[pf, ss])
plot_dataset(X, y, ax1)
plot_contours(X, y, lr, ax2, transforms=[pf, ss])
plot_dataset(X, y, ax2)
plt.tight_layout()

In [None]:
plt.figure(figsize=(6, 4))
plot_parameters(lr)

Regularized logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
X_train, y_train = make_moons(n_samples=30, noise=0.15, random_state=random_seed)
X_test, y_test = make_moons(n_samples=30, noise=0.15, random_state=random_seed+1)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X_train, y_train)
plot_dataset(X_test, y_test, marker='^')

In [None]:
# @title { run: "auto" }
# @markdown Select a regularization type:
penalty = "l1"  # @param ['l2', 'l1', 'none']

solver = 'liblinear' if penalty == 'l1' else 'lbfgs'

pf = PolynomialFeatures(degree=6, include_bias=False)
X_train_poly = pf.fit_transform(X_train)

ss = StandardScaler()
X_train_poly_std = ss.fit_transform(X_train_poly)

lr = LogisticRegression(penalty=penalty, solver=solver)
lr.fit(X_train_poly_std, y_train)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
plot_decision_boundary(X_test, y_test, lr, ax=ax1, transforms=[pf, ss])
plot_dataset(X_test, y_test, ax=ax1, marker='^')
plot_parameters(lr, ax=ax2)
plt.tight_layout()


ROC/AUC

In [None]:
X, y = make_moons(n_samples=200, noise=0.08, random_state=random_seed)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X, y)

In [None]:
# @title { run: "auto" }
threshold = 0.8 # @param {type: "slider", min: 0, max: 1, step: 0.01}

pf = PolynomialFeatures(degree=15, include_bias=False)
X_poly = pf.fit_transform(X)

ss = StandardScaler()
X_poly_std = ss.fit_transform(X_poly)

lr = LogisticRegression(max_iter=10_000)
lr.fit(X_poly_std, y)

x1 = np.linspace(X[:, 0].min() - 0.5, X[:, 0].max() + 0.5, 100)
x2 = np.linspace(X[:, 1].min() - 0.5, X[:, 1].max() + 0.5, 100)
x1, x2 = np.meshgrid(x1, x2)
X_line = np.stack((x1, x2)).T
X_line = X_line.reshape(-1, 2)
X_line_poly = pf.transform(X_line)
X_line_poly_std = ss.transform(X_line_poly)

probs = lr.predict_proba(X_line_poly_std)[:, 1]
y_pred = np.where(probs > threshold, 1, 0)

probs = probs.reshape(100, 100)
y_pred = y_pred.reshape(100, 100)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, 
                               sharey=True, figsize=(10, 5))
ax1.contourf(x1, x2, y_pred.T, cmap='coolwarm', levels=40)
ax2.contourf(x1, x2, probs.T, cmap='coolwarm', levels=40)
sns.scatterplot(x=X[:, 0], y=X[:, 1], c=y, s=150, linewidth=1, edgecolor='k', ax=ax1)
sns.scatterplot(x=X[:, 0], y=X[:, 1], c=y, s=150, linewidth=1, edgecolor='k', ax=ax2)
plt.tight_layout()


In [None]:
X, y = make_moons(n_samples=200, noise=0.15, random_state=random_seed)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X, y)

In [None]:
lr = LogisticRegression()
lr.fit(X, y)

In [None]:
plt.figure(figsize=(6, 6))
plot_decision_boundary(X, y, lr)
plot_dataset(X, y)

In [None]:
y_pred = lr.predict_proba(X)

In [None]:
fpr, tpr, thr = metrics.roc_curve(y, y_pred[:, 1])
auc = metrics.auc(fpr, tpr)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color="darkorange", lw=2, label="ROC curve (area = %0.2f" % (auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle="--")
plt.xlabel("False Positive Rate", size=14)
plt.ylabel("True Positive Rate", size=14)
plt.title("Receiver operating characteristic", size=18)
plt.legend(loc="lower right")
plt.show()

Classification metrics

In [None]:
X_train, y_train = make_moons(n_samples=50, noise=0.3, random_state=random_seed)
X_test, y_test = make_moons(n_samples=50, noise=0.3, random_state=random_seed+1)

In [None]:
plt.figure(figsize=(6, 6))
plot_dataset(X_test, y_test, marker='^')

In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

In [None]:
plt.figure(figsize=(6, 6))
plot_decision_boundary(X_test, y_test, lr)
plot_dataset(X_test, y_test, marker='^')

In [None]:
plt.figure(figsize=(8, 6))
cm = metrics.confusion_matrix(y_test, y_pred)
hm = sns.heatmap(cm, annot=True, cmap='Blues')
hm.set_xlabel("Predicted class", fontsize=14)
hm.set_ylabel("True class", fontsize=14)
hm.set_title('Confusion matrix', size=20)
plt.show()

#### $accuracy = \frac {TP + TN} {TP + TN + FP + FN}$

In [None]:
accuracy = metrics.accuracy_score(y_test, y_pred)

In [None]:
print("Accuracy score: %.2f" % (accuracy))

#### $precision = \frac {TP} {TP + FP}$

In [None]:
precision = metrics.precision_score(y_test, y_pred)

In [None]:
print("Precision score: %.2f" % (precision))

#### $recall = \frac {TP} {TP + FN}$

In [None]:
recall = metrics.recall_score(y_test, y_pred)

In [None]:
print("Recall score: %.2f" % (recall))

#### $F1 = 2 \cdot \frac {precision \cdot recall} {precision + recall}$

In [None]:
f1 = metrics.f1_score(y_test, y_pred)

In [None]:
print("F1 score: %.2f" % (f1))