# Generative Classifiers

Imports and Helper Functions
---

In [1]:
import matplotlib
try:
    matplotlib.use("module://ipympl.backend_nbagg")
except:
    print("Using inline matplotlib. Interactive plots disabled.")
    matplotlib.use("module://matplotlib_inline.backend_inline")

from ipywidgets import interactive_output
import ipywidgets as widgets
import numpy as np
from matplotlib import pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from scipy.stats import multivariate_normal
from IPython.display import clear_output

try:
    import demo_datasets as ds
except:
    !curl https://raw.githubusercontent.com/reml-lab/MLDemos/refs/heads/main/demo_datasets.py -o demo_datasets.py
    import demo_datasets as ds


Using inline matplotlib. Interactive plots disabled.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1854  100  1854    0     0   8749      0 --:--:-- --:--:-- --:--:--  8786


## Data Set Generation

In [2]:
datasets={}
def make_datasets():
    global datasets
    datasets["Blobs"]=ds.make_blobs()
    datasets["XOR"]=ds.make_xor()
    datasets["Circles"]=ds.make_circles()
    datasets["Moons"]=ds.make_moons()
    datasets["Random"]=ds.make_random()

make_datasets()

## Plotting Functions

In [3]:
def plot(dataset='Dataset 1', N=100, method="LDA", probabilistic=False,t=0):
        clear_output(wait=True)

        plt.rcParams.update({'font.size': 12})
        fig = plt.figure(figsize=(16,5))
        ax_prior =  fig.add_subplot(1, 3, 1)
        ax_class_conditional = fig.add_subplot(1, 3, 2, projection='3d')
        ax_posterior = fig.add_subplot(1, 3, 3)

        if dataset is not None and N > 0:
            # Gather dataset
            X, Y = datasets[dataset]
            X = X[:N]
            Y = Y[:N]

            # Plot original data
            ind1 = (Y == 1)
            indm1 = (Y == 0)

            #Create the classifier object with the chosen hyper-parameters
            if(method=="LDA"):
                model = LDA(store_covariance=True)
                model.fit(X,Y)
                mu = model.means_
                Sigma = [model.covariance_,model.covariance_]
                theta = model.priors_
            elif(method=="NB"):
                model = GaussianNB()
                model.fit(X,Y)
                mu = model.theta_
                Sigma = [np.diag(model.var_[0,:]), np.diag(model.var_[1,:])]
                theta = model.class_count_/np.sum(model.class_count_)
            else:
                raise ValueError("Unknown method "+method)

            pxgy0_dist = multivariate_normal(mean=mu[0,:], cov=Sigma[0])
            pxgy1_dist = multivariate_normal(mean=mu[1,:], cov=Sigma[1])

            # Calculate prediction error
            Yhat = model.predict(X)
            R = np.mean(Yhat != Y)

            #Plot class priors
            ax_prior.bar([0,1],theta, color=['r','b'])
            ax_prior.set_xticks([0, 1])
            ax_prior.set_xticklabels(['Class 0','Class 1'])
            ax_prior.set_ylim(0,1)
            ax_prior.set_title(f"{method} Class Priors: P(Y=y)")
            ax_prior.set_ylabel("P(Y=y)")
            ax_prior.grid(True)

            # Create surface plot grid
            gx1, gx2 = np.meshgrid(np.arange(-10, 10, 1 / 20.0), np.arange(-10, 10, 1 / 20.0))
            gx1l = gx1.flatten()
            gx2l = gx2.flatten()
            gx = np.vstack((gx1l, gx2l)).T
            if(probabilistic):
                if(np.sum(Y==0)==0):
                    gyhat = np.ones((gx.shape[0],1))
                elif(np.sum(Y==1)==0):
                    gyhat = np.zeros((gx.shape[0],1))
                else:
                    gyhat = model.predict_proba(gx)
                    gyhat = gyhat[:,1]
            else:
                gyhat = model.predict(gx)
            gyhat = gyhat.reshape(gx1.shape)

            #Create class conditional plot
            pxgy0 = pxgy0_dist.pdf(gx).reshape(gx1.shape)
            pxgy1 = pxgy1_dist.pdf(gx).reshape(gx1.shape)
            ax_class_conditional.scatter(X[ind1,0],X[ind1,1],0*X[ind1,1],'s',color='b')
            ax_class_conditional.scatter(X[indm1,0],X[indm1,1],0*X[indm1,1],'s',color='r')
            ax_class_conditional.plot_surface(gx1, gx2, pxgy0, rstride=10, cstride=10,color="r", edgecolor='k',linewidth=0.05,alpha=0.25)
            ax_class_conditional.plot_surface(gx1, gx2, pxgy1, rstride=10, cstride=10,color="b", edgecolor='k',linewidth=0.05,alpha=0.25)
            ax_class_conditional.set_xlabel("X0")
            ax_class_conditional.set_ylabel("X1")
            ax_class_conditional.legend(["Class 0", "Class 1"], loc='upper left')
            ax_class_conditional.set_title(f"{method} Class Conditional Densities: P(X=x|Y=y)")
            ax_class_conditional.set_xlim(-10, 10)
            ax_class_conditional.set_ylim(-10, 10)

            #Create class posterior plot
            ax_posterior.plot(X[indm1, 0], X[indm1, 1], 'rs', alpha=0.5)
            ax_posterior.plot(X[ind1, 0], X[ind1, 1], 'bs', alpha=0.5)

            if (not all(gyhat.flatten() < 0.5) and not all(gyhat.flatten()>=0.5)) :
                ax_posterior.contour(gx1, gx2, gyhat, levels=[0.5], linewidths=2, colors=["k"])

            im1 = ax_posterior.imshow(gyhat, interpolation='nearest', extent=(-10, 10, -10, 10), aspect='auto', origin='lower',
                        cmap=plt.cm.bwr_r, alpha=0.25)
            im1.set_clim(0, 1)
            fig.colorbar(im1, ax=ax_posterior)

            ax_posterior.set_xlim(-10, 10)
            ax_posterior.set_ylim(-10, 10)
            ax_posterior.set_title(f"{method} Classifier (Train Error Rate: {R:.4g})")
            ax_posterior.set_xlabel("X0")
            ax_posterior.set_ylabel("X1")
            ax_posterior.grid(True)
            ax_posterior.legend(["Class 0", "Class 1"], loc='upper left')
            plt.tight_layout()
            plt.show()

#plot(dataset='Blobs', N=100, method="NB", probabilistic=False,t=0)

In [4]:
def regenerate_data(b):
    make_datasets()
    wtrigger.value = wtrigger.value + 1

wd = widgets.Dropdown(
    options=datasets.keys(),
    description="Dataset"
)
wn = widgets.IntSlider(value=10, min=10, max=1000, step=1, description="N", continuous_update=False)
wprob = widgets.Checkbox(value=False, description='Probabilistic', disabled=False)

wclass1 = widgets.Checkbox(value=False, description='Probabilistic', disabled=False)

wmethod = widgets.Dropdown(
    options=['LDA', 'NB'],
    value='LDA',
    description='Method:',
    disabled=False,
)
wrandomize = widgets.Button(description="Re-Generate Data")
wtrigger = widgets.IntText(value=0, layout=widgets.Layout(display="none"))

wrandomize.on_click(regenerate_data)

out = interactive_output(
    plot,
    {
        "dataset": wd,
        "N": wn,
        "method": wmethod,
        "probabilistic": wprob,
        "t":wtrigger
    }
)

box1 = widgets.HBox([wd, wrandomize, wmethod, wprob,])
box2 = widgets.HBox([wn])
ui = widgets.VBox([box1, box2]);

display(ui,out)

VBox(children=(HBox(children=(Dropdown(description='Dataset', options=('Blobs', 'XOR', 'Circles', 'Moons', 'Ra…

Output()