In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [2]:
x_train = np.loadtxt('x_train.gz')
x_test = np.loadtxt('x_test.gz')
y_train = np.loadtxt('y_train.gz')
y_test = np.loadtxt('y_test.gz')

In [3]:
pca = PCA(n_components=350)

pca.fit(x_train)
x_train = pca.transform(x_train)
x_test = pca.transform(x_test)

In [4]:
# Class for MAP model
class MAP:  
    # Fit Function for the MAP Model
    def fit(self,x_train,y_train):
        # initialize lists to store category means, Covarience matrix and Inverse Covarience matrix 
        cat_specimen = []
        cat_covarience = []
        cat_inv_covarience = []
        cat_prob = []

        # Store the possible Category Results as model attributes 
        self.categories = np.unique(y_train)

        # For each category calculate and store specimen, covarience and inverse covarience matrix
        for cat in np.unique(y_train).tolist():
            # Filter the samples for each category
            idx = (y_train==cat)
            cat_samples = x_train[idx]

            # Calculate mean of the samples for features
            cat_mean = cat_samples.mean(axis=0)

            # Calculate the Cavarience Matrix for the category
            cat_cov = np.cov(cat_samples.T)

            # Calculate the inverse covarience matrix by matrix multiplication
            cat_inv_cov = np.linalg.inv(cat_cov)

            # Store the matrixs in corresponding lists
            cat_covarience.append(cat_cov)
            cat_inv_covarience.append(cat_inv_cov)
            cat_specimen.append(cat_samples.mean(axis=0))
            cat_prob.append(len(cat_samples)/len(x_train))

        # Store means, covarience matrix and inverse covariance matrix as class attributes
        self.cat_means = np.array(cat_specimen)
        self.cat_cov = np.array(cat_covarience)
        self.cat_inv_cov = np.array(cat_inv_covarience)
        self.cat_prior_prob = np.array(cat_prob)
    # Distance function to help predict category of test vector
    def dist(self,cat,image):
        const = np.linalg.det(self.cat_cov[cat]*((2*np.pi)**len(self.cat_means[cat])))**-0.5
        variable = np.exp(np.matmul(np.matmul(np.transpose(np.subtract(image,self.cat_means[cat])),self.cat_inv_cov[cat]),np.subtract(image,self.cat_means[cat]))*-0.5)
        return const*variable*(self.cat_prior_prob[cat])
    # Predict Function for MAP model
    def predict(self, x_test):
        # Initialize a list for prediction results
        predictions = []

        # For each image calcute the distances from category specimens and make prediction using them
        for image in x_test:
            distances = [self.dist(i,image) for i in range(self.cat_means.shape[0])]
            predictions.append(self.categories[np.argmax(distances)])
        
        # store the predictions as class attribute and return the predictions
        self.predictions = predictions
        return predictions
    # Fuction to find the incorrectly categorized images (a replacement of confusion matrix for internal testing)
    def error_vals(self,y_test):
        # make a list of incorrectly identified test points and return them
        errors = [(x_test[i],self.predictions[i],y_test[i]) for i in range(len(y_test)) if self.predictions[i] != y_test[i]]
        return ([('prediction','y_val')] + errors)
    # Function for plotting Decision Boundary in 2 Dimensions
    def plot(self,x_train,y_train):
        # Calculate the min and max value for each dimension
        x_min, x_max = x_train[:, 0].min() - 100, x_train[:, 0].max() + 100
        y_min, y_max = x_train[:, 1].min() - 100, x_train[:, 1].max() + 100

        # Create a meshgrid using min and max values 
        # with intervals optimized for performance and fineness of boundary
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 10),np.arange(y_min, y_max, 10))

        # Predictions to obtain the classification results
        Z = np.array(self.predict(np.c_[xx.ravel(), yy.ravel()])).reshape(xx.shape)

        # colour parameter labeling 0 as red and 1 as blue
        col = ListedColormap(['red', 'blue'])

        # Plotting of the boundary
        plt.contourf(xx, yy, Z,cmap =col, alpha=0.3)
        scatter = plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap=col, alpha=0.6, s=1)
        plt.legend(handles = scatter.legend_elements()[0], labels = ['Class 0', 'Class 1'])
        plt.xlabel("Feature-1")
        plt.ylabel("Feature-2")

        # return the plt function to ease making customizations before plotting
        return plt

In [5]:
# Function to calculate the Prediction error using predictions and expected classification
def prediction_accu(prediction, y_test):
    if len(prediction) != len(y_test):
        print("you are trying to get prediction of lists of unequal size")
        return 0
    errors = sum([0 if prediction[i] != y_test[i] else 1 for i in range(len(y_test))])
    return (errors/len(y_test))*100

In [6]:
# Function to calculate the confusion matrix using expected and predicted results
def confusion_matrix(y_pred,y_test):
    cm = np.empty([np.unique(y_test).shape[0],np.unique(y_test).shape[0]], dtype=int)
    for cat in np.unique(y_test):
        idx = (y_test==np.unique(y_test)[cat])
        pred_event = np.array(y_pred)[idx]

        cm[cat,1] = sum(pred_event)
        cm[cat,0] = pred_event.shape[0] - cm[cat,1]
    return cm

In [7]:
MAP_model = MAP()
MAP_model.fit(x_train,y_train)
pred_MAP = MAP_model.predict(x_test)
MAP_accu = prediction_accu(pred_MAP,y_test)
MAP_accu

  r = _umath_linalg.det(a, signature=signature)


32.87671232876712