In [1]:
# imports and setup
import numpy as np
from numpy.random import default_rng

from sklearn.datasets import make_classification, load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score, log_loss, confusion_matrix
from sklearn.model_selection import train_test_split

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
np.set_printoptions(suppress=True, precision=3)

In [2]:
def plot_data(x, y):
    plot_x_min, plot_x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
    plot_y_min, plot_y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
    
    # plot the basic data points
    class0_x = x[y==0]
    class1_x = x[y==1]
    plt.scatter(class0_x[:,0], class0_x[:,1], c='r', marker='x', label='Class 0')
    plt.scatter(class1_x[:,0], class1_x[:,1], c='b', marker='o', label='Class 1')
    
    plt.xlim(plot_x_min, plot_x_max)
    plt.ylim(plot_y_min, plot_y_max)
    plt.legend()
    plt.title("Basic data")

In [3]:
def plot_model_prediction(model, x, y):
    plot_x_min, plot_x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
    plot_y_min, plot_y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
    
    # plot the separator
    model_intercept = model.intercept_[0]
    weight1, weight2 = model.coef_.T
    line_intercept = -model_intercept/weight2
    line_slope = -weight1/weight2
    line_x = np.array([plot_x_min, plot_x_max])
    line_y = line_slope * line_x + line_intercept
    plt.plot(line_x, line_y, 'm', lw=1, ls='--')
    
    # plot the decision data (two colors)
    xx, yy = np.meshgrid(np.arange(plot_x_min, plot_x_max, 0.01),
                         np.arange(plot_y_min, plot_y_max, 0.01))
    vx, vy = xx.flatten(), yy.flatten()
    vx, vy = vx.reshape((len(vx), 1)), vy.reshape((len(vy), 1))
    grid = np.hstack((vx, vy))
    
    positive_predictions = model.predict(grid)
    zz = positive_predictions.reshape(xx.shape)
    contour = plt.contourf(xx, yy, zz, 
                           cmap=matplotlib.colors.ListedColormap(('r','b')), alpha=0.25)

    # plot the basic data points
    class0_x = x[y==0]
    class1_x = x[y==1]
    plt.scatter(class0_x[:,0], class0_x[:,1], c='r', marker='x', label='Class 0')
    plt.scatter(class1_x[:,0], class1_x[:,1], c='b', marker='o', label='Class 1')
        
    plt.xlim(plot_x_min, plot_x_max)
    plt.ylim(plot_y_min, plot_y_max)
    plt.legend()
    plt.title("Classification regions")

In [4]:
def plot_model_contour(model, x, y):
    plot_x_min, plot_x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
    plot_y_min, plot_y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
    
    # plot the separator
    model_intercept = model.intercept_[0]
    weight1, weight2 = model.coef_.T
    line_intercept = -model_intercept/weight2
    line_slope = -weight1/weight2
    line_x = np.array([plot_x_min, plot_x_max])
    line_y = line_slope * line_x + line_intercept
    plt.plot(line_x, line_y, 'm', lw=1, ls='--')
    
    # plot the probability contour (control smoothness with 'levels' parameter)
    xx, yy = np.meshgrid(np.arange(plot_x_min, plot_x_max, 0.1),
                         np.arange(plot_y_min, plot_y_max, 0.1))
    vx, vy = xx.flatten(), yy.flatten()
    vx, vy = vx.reshape((len(vx), 1)), vy.reshape((len(vy), 1))
    grid = np.hstack((vx, vy))
    
    positive_predictions = model.predict_proba(grid)[:, 1]
    zz = positive_predictions.reshape(xx.shape)
    contour = plt.contourf(xx, yy, zz, cmap='Greys', levels=10)
    plt.colorbar(contour)

    # plot the basic data points
    class0_x = x[y==0]
    class1_x = x[y==1]
    plt.scatter(class0_x[:,0], class0_x[:,1], c='r', marker='x', label='Class 0')
    plt.scatter(class1_x[:,0], class1_x[:,1], c='b', marker='o', label='Class 1')
    
    plt.xlim(plot_x_min, plot_x_max)
    plt.ylim(plot_y_min, plot_y_max)
    plt.legend()
    plt.title("Probability(x in class 1)")