
** Cell type : TextRead ** 


# Problem 2 : Learning Multiclass Bayes Classifiers from data with Max. Likeli.

Derive Bayes classifiers under assumptions below, and use ML estimators to compute and return the results on a test set. The $4\times 4$ loss matrix giving the loss incurred for predicting $i$ when truth is $j$ is below.

$L=\begin{bmatrix} 0 &1 & 2& 3\\ 1 &0 & 1& 2\\ 2 &1 & 0& 1\\ 3 &2 & 1& 0 \end{bmatrix}$ 

2a) Assume $X|Y=a$ is distributed as Normal with mean $\mu_a$ and variance $I$.

2b) Assume $X|Y=a$ is distributed as Normal with mean $\mu_a$ and variance $\Sigma$.

2c) Assume $X|Y=a$ is distributed as Normal with mean $\mu_a$ and variance $\Sigma_a$.



In [None]:
# Cell type : CodeWrite
# Fill in functions in this cell


def Bayes2a(X_train, Y_train, X_test):
    """ Give Bayes classifier prediction for test instances 
    using assumption 2a.

    Arguments:
    X_train: numpy array of shape (n,d)
    Y_train: {1,2,3,4} numpy array of shape (n,)
    X_test : numpy array of shape (m,d)

    Returns:
    Y_test_pred : {1,2,3,4} numpy array of shape (m,)
    
    """
    L = np.array([[0,1,2,3],[1,0,1,2],[2,1,0,1],[3,2,1,0]])
    
    y = [Y_train[Y_train==i] for i in range(1,5)]

    x = [X_train[Y_train==i,:] for i in range(1,5)]

    mean = [np.mean(x[i],axis=0) for i in range(4)]
    
    prob = [len(y[i])/len(Y_train) for i in range(4)]
    
    Y_Pred = np.zeros(np.shape(X_test)[0])
    for i in range(len(X_test)):
        etax = np.zeros(4)
        for k in range(4):
            etax[k] = prob[k]*gauss(X_test[i], mean[k])
        etaxL = np.matmul(etax,L)
        Y_Pred[i] = np.argmin(etaxL) + 1

    return Y_Pred

def gauss(x, mean):
    exponent = -0.5*(np.linalg.norm(x-mean)**2)  
        
    return np.exp(exponent)
            
def multivariate_normal(x, mean, covariance_matrix):
    """
    Calculate the PDF of a multivariate normal distribution at point x.

    Parameters:
    - x: Point at which to evaluate the PDF.
    - mean: Mean vector of the distribution.
    - covariance_matrix: Covariance matrix of the distribution.

    Returns:
    - pdf_value: Value of the PDF at the specified point.
    """
    k = len(mean)
    det_cov = np.linalg.det(covariance_matrix)**0.5
    inv_cov = np.linalg.inv(covariance_matrix)
    prefactor = 1 / ((2 * np.pi) ** (k / 2) * np.sqrt(det_cov))
    
    pdf_values = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        diff = x[i] - mean
        exponent = -0.5 * np.matmul(np.matmul(diff, inv_cov), diff.T)
        pdf_value = prefactor * np.exp(exponent)
        pdf_values[i] = pdf_value
    
    return pdf_values


def Bayes2b(X_train, Y_train, X_test):
    """ Give Bayes classifier prediction for test instances 
    using assumption 2b.

    Arguments:
    X_train: numpy array of shape (n,d)
    Y_train: {1,2,3,4} numpy array of shape (n,)
    X_test : numpy array of shape (m,d)

    Returns:
    Y_test_pred : {1,2,3,4} numpy array of shape (m,)
    
    """
    L = np.array([[0,1,2,3],[1,0,1,2],[2,1,0,1],[3,2,1,0]])
    
    y = [Y_train[Y_train==i] for i in range(1,5)]

    x = [X_train[Y_train==i,:] for i in range(1,5)]
    
    mean = [np.mean(x[i],axis=0) for i in range(4)]
    
    prob = [len(y[i])/len(Y_train) for i in range(4)]
    
    cov = [np.cov(x[i], rowvar = False, bias = True) for i in range(4)]
    
    cov_avg = (cov[0] + cov[1] + cov[2] + cov[3])/2.0
    
    pdf_values = [multivariate_normal(X_test, mean[i], cov_avg) for i in range(4)]
    
    sum_pdf = np.zeros(X_test.shape[0])
    for i in range(4):
        sum_pdf = sum_pdf + pdf_values[i]
    eta = np.array([(pdf_values[i]*prob[i])/sum_pdf for i in range(4)])
    
    etaL = np.matmul(eta.T,L)
    
    Y_test_pred = np.argmin(etaL, axis = 1) + np.ones(X_test.shape[0])
    
    return Y_test_pred
    
    

def Bayes2c(X_train, Y_train, X_test):
    """ Give Bayes classifier prediction for test instances 
    using assumption 2c.

    Arguments:
    X_train: numpy array of shape (n,d)
    Y_train: {1,2,3,4} numpy array of shape (n,)
    X_test : numpy array of shape (m,d)

    Returns:
    Y_test_pred : {1,2,3,4} numpy array of shape (m,)
    
    """
    L = np.array([[0,1,2,3],[1,0,1,2],[2,1,0,1],[3,2,1,0]])
    
    y = [Y_train[Y_train==i] for i in range(1,5)]

    x = [X_train[Y_train==i,:] for i in range(1,5)]
    
    mean = [np.mean(x[i],axis=0) for i in range(4)]
    
    prob = [len(y[i])/len(Y_train) for i in range(4)]
    
    cov = [np.cov(x[i], rowvar = False, bias = True) for i in range(4)]
    
    cov_avg = (cov[0] + cov[1] + cov[2] + cov[3])/2.0
    
    pdf_values = [multivariate_normal(X_test, mean[i], cov[i]) for i in range(4)]
    
    sum_pdf = np.zeros(X_test.shape[0])
    for i in range(4):
        sum_pdf = sum_pdf + pdf_values[i]
    eta = np.array([(pdf_values[i]*prob[i])/sum_pdf for i in range(4)])
    
    etaL = np.matmul(eta.T,L)
    
    Y_test_pred = np.argmin(etaL, axis = 1) + np.ones(X_test.shape[0])
    
    return Y_test_pred
    
    


    


**Cell type : TextRead**

# Problem 2

2d) Run the above three algorithms (Bayes2a,2b and 2c), for the two datasets given (dataset2_1.npz, dataset2_2.npz) in the cell below.

In the next CodeWrite cell, Plot all the classifiers (3 classification algos on 2 datasets = 6 plots) on a 2d plot (color the 4 areas classified as 1,2,3 and 4 differently). Add the training data points also on the plot. Plots to be organised as follows: One plot for each dataset, with three subplots in each for the three classifiers. Label the 6 plots appropriately. 

In the next Textwrite cell, summarise your observations regarding the six learnt classifiers. Give the *expected loss* (use the Loss matrix given in the problem.) of the three classifiers on the two datasets (use X_test and Y_test) as 2x3 table, with appropriately named rows and columns. Also, give the 4x4 confusion matrix of the final classifier for all three algorithms and both datasets. 


In [None]:
# Cell type : CodeWrite
# write the code for loading the data, running the three algos, and plotting here. 
# (Use the functions written previously.)

data2_1 = np.load('dataset2_1.npz')
X_train, Y_train, X_test, Y_test = data2_1['arr_0'],data2_1['arr_1'],data2_1['arr_2'],data2_1['arr_3']
Y_pred_21a = Bayes2a(X_train,Y_train,X_test)
Y_pred_21b = Bayes2b(X_train,Y_train,X_test)
Y_pred_21c = Bayes2c(X_train,Y_train,X_test)
X_min = np.min(X_train,axis=0)
X_max = np.max(X_train,axis=0)
L = np.array([[0,1,2,3],[1,0,1,2],[2,1,0,1],[3,2,1,0]])


loss = L[Y_test.astype(int)-1,Y_pred_21a.astype(int)-1]
loss = np.sum(loss,axis=0)*1.0/len(Y_test)
loss = L[Y_test.astype(int)-1,Y_pred_21b.astype(int)-1]
loss = sum(loss)*1.0/len(Y_test)
loss = L[Y_test.astype(int)-1,Y_pred_21c.astype(int)-1]
loss = sum(loss)*1.0/len(Y_test)



X,Y = np.meshgrid(np.arange(X_min[0]-0.5,X_max[0]+0.5,0.1),np.arange(X_min[1]-0.5,X_max[1]+0.5,0.1))
test_samples= np.concatenate([X.reshape(-1,1),Y.reshape(-1,1)],axis=1)
Za = Bayes2a(X_train,Y_train,test_samples)
Zb = Bayes2b(X_train,Y_train,test_samples)
Zc = Bayes2c(X_train,Y_train,test_samples)


plt.figure(0)
f, (ax1,ax2,ax3) = plt.subplots(1,3,sharex=False,sharey=True,figsize=(15,5))
ax1.contourf(X,Y,Za.reshape(X.shape),colors=['#ff796c','#5ca904','#fdaa48','#75bbfd']) 
ax1.scatter(X_train[Y_train==1][:,0],X_train[Y_train==1][:,1],s=2,c='r',label='Class 1')
ax1.scatter(X_train[Y_train==2][:,0],X_train[Y_train==2][:,1],s=2,c='#f8481c',label='Class 2')
ax1.scatter(X_train[Y_train==3][:,0],X_train[Y_train==3][:,1],s=2,c='g',label='Class 3')
ax1.scatter(X_train[Y_train==4][:,0],X_train[Y_train==4][:,1],s=2,c='b',label='Class 4')
ax1.set_title('Bayes2a classifier on dataset2_1')
ax1.legend()
ax1.set_xlabel(r'$x_{1}\rightarrow$',size=15)
ax1.set_ylabel(r'$x_{2}\rightarrow$',size=15)

ax2.contourf(X,Y,Zb.reshape(X.shape),colors=['#ff796c','#5ca904','#fdaa48','#75bbfd'])
ax2.scatter(X_train[Y_train==1][:,0],X_train[Y_train==1][:,1],s=2,c='r',label='Class 1')
ax2.scatter(X_train[Y_train==2][:,0],X_train[Y_train==2][:,1],s=2,c='#f8481c',label='Class 2')
ax2.scatter(X_train[Y_train==3][:,0],X_train[Y_train==3][:,1],s=2,c='g',label='Class 3')
ax2.scatter(X_train[Y_train==4][:,0],X_train[Y_train==4][:,1],s=2,c='b',label='Class 4')
ax2.set_title('Bayes2b classifier on dataset2_1')
ax2.legend()
ax2.set_xlabel(r'$x_{1}\rightarrow$',size=15)
ax2.set_ylabel(r'$x_{2}\rightarrow$',size=15)

ax3.contourf(X,Y,Zc.reshape(X.shape),colors=['#ff796c','#5ca904','#fdaa48','#75bbfd'])
ax3.scatter(X_train[Y_train==1][:,0],X_train[Y_train==1][:,1],s=2,c='r',label='Class 1')
ax3.scatter(X_train[Y_train==2][:,0],X_train[Y_train==2][:,1],s=2,c='#f8481c',label='Class 2')
ax3.scatter(X_train[Y_train==3][:,0],X_train[Y_train==3][:,1],s=2,c='g',label='Class 3')
ax3.scatter(X_train[Y_train==4][:,0],X_train[Y_train==4][:,1],s=2,c='b',label='Class 4')
ax3.set_title('Bayes2c classifier on dataset2_1')
ax3.legend()
ax3.set_xlabel(r'$x_{1}\rightarrow$',size=15)
ax3.set_ylabel(r'$x_{2}\rightarrow$',size=15)
plt.show()

data2_2 = np.load('dataset2_2.npz')
X_train, Y_train, X_test, Y_test = data2_2['arr_0'],data2_2['arr_1'],data2_2['arr_2'],data2_2['arr_3']
Y_pred_22a = Bayes2a(X_train,Y_train,X_test)
Y_pred_22b = Bayes2b(X_train,Y_train,X_test)
Y_pred_22c = Bayes2c(X_train,Y_train,X_test)
X_min = min(X_train[:,0]),min(X_train[:,1])
X_max = max(X_train[:,0]),max(X_train[:,1])


loss = L[Y_test.astype(int)-1,Y_pred_22a.astype(int)-1]
loss = np.sum(loss,axis=0)*1.0/len(Y_test)
loss = L[Y_test.astype(int)-1,Y_pred_22b.astype(int)-1]
loss = sum(loss)*1.0/len(Y_test)
loss = L[Y_test.astype(int)-1,Y_pred_22c.astype(int)-1]
loss = sum(loss)*1.0/len(Y_test)


X,Y = np.meshgrid(np.arange(X_min[0]-0.5,X_max[0]+0.5,0.1),np.arange(X_min[1]-0.5,X_max[1]+0.5,0.1))
test_samples= np.concatenate([X.reshape(-1,1),Y.reshape(-1,1)],axis=1)
Za = Bayes2a(X_train,Y_train,test_samples)
Zb = Bayes2b(X_train,Y_train,test_samples)
Zc = Bayes2c(X_train,Y_train,test_samples)

plt.figure(1)
f, (ax1,ax2,ax3) = plt.subplots(1,3,sharex=False,sharey=True,figsize=(15,5))
ax1.contourf(X,Y,Za.reshape(X.shape),colors=['#ff796c','#5ca904','#fdaa48','#75bbfd'])
ax1.scatter(X_train[Y_train==1][:,0],X_train[Y_train==1][:,1],s=2,c='r',label='Class 1')
ax1.scatter(X_train[Y_train==2][:,0],X_train[Y_train==2][:,1],s=2,c='#f8481c',label='Class 2')
ax1.scatter(X_train[Y_train==3][:,0],X_train[Y_train==3][:,1],s=2,c='g',label='Class 3')
ax1.scatter(X_train[Y_train==4][:,0],X_train[Y_train==4][:,1],s=2,c='b',label='Class 4')
ax1.set_title('Bayes2a classifier on dataset2_2')
ax1.legend()
ax1.set_xlabel(r'$x_{1}\rightarrow$',size=15)
ax1.set_ylabel(r'$x_{2}\rightarrow$',size=15)

ax2.contourf(X,Y,Zb.reshape(X.shape),colors=['#ff796c','#5ca904','#fdaa48','#75bbfd'])
ax2.scatter(X_train[Y_train==1][:,0],X_train[Y_train==1][:,1],s=2,c='r',label='Class 1')
ax2.scatter(X_train[Y_train==2][:,0],X_train[Y_train==2][:,1],s=2,c='#f8481c',label='Class 2')
ax2.scatter(X_train[Y_train==3][:,0],X_train[Y_train==3][:,1],s=2,c='g',label='Class 3')
ax2.scatter(X_train[Y_train==4][:,0],X_train[Y_train==4][:,1],s=2,c='b',label='Class 4')
ax2.set_title('Bayes2b classifier on dataset2_2')
ax2.legend()
ax2.set_xlabel(r'$x_{1}\rightarrow$',size=15)
ax2.set_ylabel(r'$x_{2}\rightarrow$',size=15)
ax3.contourf(X,Y,Zc.reshape(X.shape),colors=['#ff796c','#5ca904','#fdaa48','#75bbfd'])
ax3.scatter(X_train[Y_train==1][:,0],X_train[Y_train==1][:,1],s=2,c='r',label='Class 1')
ax3.scatter(X_train[Y_train==2][:,0],X_train[Y_train==2][:,1],s=2,c='#f8481c',label='Class 2')
ax3.scatter(X_train[Y_train==3][:,0],X_train[Y_train==3][:,1],s=2,c='g',label='Class 3')
ax3.scatter(X_train[Y_train==4][:,0],X_train[Y_train==4][:,1],s=2,c='b',label='Class 4')
ax3.set_title('Bayes2c classifier on dataset2_2')
ax3.legend()
ax3.set_xlabel(r'$x_{1}\rightarrow$',size=15)
ax3.set_ylabel(r'$x_{2}\rightarrow$',size=15)
plt.show()


