# Logistic Regression

Imports and Helper Functions
---

In [1]:
import matplotlib
try:
    matplotlib.use("module://ipympl.backend_nbagg")
except:
    print("Using inline matplotlib. Interactive plots disabled.")
    matplotlib.use("module://matplotlib_inline.backend_inline")

from ipywidgets import interactive_output
import ipywidgets as widgets
import numpy as np
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn import datasets as skdatasets

try:
    import demo_datasets as ds
except:
    !curl https://raw.githubusercontent.com/reml-lab/MLDemos/refs/heads/main/demo_datasets.py -o demo_datasets.py
    import demo_datasets as ds

Using inline matplotlib. Interactive plots disabled.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1854  100  1854    0     0   8842      0 --:--:-- --:--:-- --:--:--  8870


## Data Set Generation

In [2]:
datasets={}
def make_datasets():
    global datasets
    datasets["Blobs"]={"train":ds.make_blobs(),"test":ds.make_blobs()}
    datasets["XOR"]={"train":ds.make_xor(),"test":ds.make_xor()}
    datasets["Circles"]={"train":ds.make_circles(),"test":ds.make_circles()}
    datasets["Moons"]={"train":ds.make_moons(),"test":ds.make_moons()}
    datasets["Random"]={"train":ds.make_random(),"test":ds.make_random()}
make_datasets()

## Define Prediction, Loss, and Learning Function

In [3]:
def phi(X,degree):
    phiX = []
    for d in range(degree):
        p1=np.arange(d+2).reshape((1,d+2))
        p0=np.fliplr(p1)
        phiX.append((X[:,[0]]**p0)*(X[:,[1]]**p1)/(10**d))
    return(np.hstack(phiX))

def discriminant(X,w,b,degree):
    Yhat = phi(X,degree).dot(w) + b
    return Yhat

def classification_error(Y,Yhat):
    #Compute the empirical risk given Y and Yhat
    Y=Y.flatten()
    Yhat=Yhat.flatten()
    R = np.mean(Y*Yhat<0)
    return(R)

def fit(X,Y,degree,log_lambda):
    lam = np.exp(log_lambda)
    C   = 1/lam
    clf = linear_model.LogisticRegression(C=C,max_iter=5000)
    clf.fit(phi(X,degree),Y)
    w=clf.coef_
    w=w.reshape((w.size,1))
    b=clf.intercept_
    return(w,b)


## Interaction Functions and UI

In [4]:
def plot_model(w=np.zeros((5,1)), dataset=None, N=0, learn=False, degree=1,log_lambda=0,t=0,showtrain=True,showtest=False):

    fig = plt.figure(1,figsize=(16,6))
    plt.rcParams.update({'font.size': 12})

    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2, projection='3d')

    Rtrain=0
    Rtest=0
    if(dataset is not None and N>0):
        Xtrain,Ytrain=datasets[dataset]["train"]
        Xtrain=Xtrain[:N]
        Ytrain=Ytrain[:N]

        Xtest, Ytest = datasets[dataset]["test"]

        # Plot original data

        if(showtest):
            ind1 = (Ytest == 1)
            indm1 = (Ytest == 0)
            ax1.plot(Xtest[indm1, 0], Xtest[indm1, 1], 'rx', alpha=0.5,markersize=5)
            ax1.plot(Xtest[ind1, 0], Xtest[ind1, 1], 'bx', alpha=0.5, markersize=5)

            ax2.scatter(Xtest[ind1,0],Xtest[ind1,1],0*Xtest[ind1,1],marker='x',color='b')
            ax2.scatter(Xtest[indm1,0],Xtest[indm1,1],0*Xtest[indm1,1],marker='x',color='r')

        if(showtrain):
            ind1 = (Ytrain == 1)
            indm1 = (Ytrain == 0)
            ax1.plot(Xtrain[indm1, 0], Xtrain[indm1, 1], 'rs', alpha=0.5)
            ax1.plot(Xtrain[ind1, 0], Xtrain[ind1, 1], 'bs', alpha=0.5)

            ax2.scatter(Xtrain[ind1,0],Xtrain[ind1,1],0*Xtrain[ind1,1],marker='x',color='b')
            ax2.scatter(Xtrain[indm1,0],Xtrain[indm1,1],0*Xtrain[indm1,1],marker='x',color='r')

        if(learn and np.sum(ind1)>0 and np.sum(indm1)>0):
            w,b=fit(Xtrain,Ytrain,degree,log_lambda)
            Yhat = discriminant(Xtrain,w,b,degree)
            Rtrain = classification_error(Ytrain,Yhat)

            Yhat = discriminant(Xtest,w,b,degree)
            Rtest = classification_error(Ytest,Yhat)

            gx1, gx2 = np.meshgrid(np.arange(-10, 10,1/20.0 ),np.arange(-10, 10,1/50.0))
            gx1l = gx1.flatten()
            gx2l = gx2.flatten()
            gx   = np.vstack((gx1l,gx2l)).T
            gyhat = 1/(1+np.exp(-discriminant(gx,w,b,degree)))
            gyhat = gyhat.reshape(gx1.shape)

            if(not all(gyhat.flatten()==0)):
                contours = ax1.contour(gx1, gx2, gyhat, levels=[0.5], linewidths=2,colors=["k"])
                img=ax1.imshow(gyhat, interpolation='nearest',extent=(-10, 10, -10, 10), aspect='auto', origin='lower', cmap=plt.cm.bwr_r, alpha=0.25, vmin=0,vmax=1)
                plt.colorbar(img,ax=ax1)
                ax2.plot_surface(gx1, gx2, gyhat, rstride=10, cstride=10,cmap=plt.cm.bwr_r, edgecolor='k',linewidth=0.05,alpha=0.5,norm=plt.Normalize(vmin=0, vmax=1))


    ax1.set_xlim(-10,10)
    ax1.set_ylim(-10,10)
    ax1.set_title(f"Logistic Regression (Train Error Rate: {Rtrain:0.3f}, Test Error Rate: {Rtest:0.3f})")
    ax1.set_xlabel("X0")
    ax1.set_ylabel("X1")
    ax1.grid(True)
    ax1.legend(["Class 1","Class -1"],loc='upper left')

    ax2.set_xlim(-10,10)
    ax2.set_ylim(-10,10)
    ax2.set_xlabel("X0")
    ax2.set_ylabel("X1")
    ax2.set_zlabel("P(y|x)")
    ax2.set_title("Logistic Regression (Error Rate: %.2f)"%(Rtrain))
    ax2.set_zlim(-0.1,1.1)

    plt.tight_layout()
    plt.show()

def regenerate_data(b):
    make_datasets()
    wtrigger.value = wtrigger.value + 1

wd = widgets.Dropdown(
    options=datasets.keys(),
    description="Dataset"
)
wn=widgets.IntSlider(value=10,min=1,max=1000,step=1,description="N",continuous_update=False)
wdeg=widgets.IntSlider(value=1,min=1,max=4,step=1,description="Degree",continuous_update=False)
wlambda = widgets.FloatSlider(value=-10,min=-10,max=10.0,step=1,description='log10(lam)',continuous_update=False,readout=True,readout_format='.2f',)
wl = widgets.Checkbox(value=True, description="Learn")
wrandomize = widgets.Button(description="Re-Generate Data")
wtrigger = widgets.IntText(value=0, layout=widgets.Layout(display="none"))
wtrain = widgets.Checkbox(value=True, description='Show Train', disabled=False)
wtest = widgets.Checkbox(value=False, description='Show Test', disabled=False)

wrandomize.on_click(regenerate_data)

out = interactive_output(plot_model, {"dataset":wd,"N":wn,"degree":wdeg,"log_lambda":wlambda,"learn":wl,"t":wtrigger,"showtrain":wtrain, "showtest":wtest});
box1=widgets.HBox([wd,wrandomize,wtrain, wtest, wn,wl])
box2=widgets.HBox([wdeg,wlambda])

ui=widgets.VBox([box1,box2]);


## Linear Classifier Demo with Polynomial Basis Expansion

In [5]:
display(ui,out)

VBox(children=(HBox(children=(Dropdown(description='Dataset', options=('Blobs', 'XOR', 'Circles', 'Moons', 'Ra…

Output()