In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [2]:
x_train = np.loadtxt('x_train.gz')
x_test = np.loadtxt('x_test.gz')
y_train = np.loadtxt('y_train.gz')
y_test = np.loadtxt('y_test.gz')

In [3]:
pca = PCA(n_components=350)

pca.fit(x_train)
x_train = pca.transform(x_train)
x_test = pca.transform(x_test)

In [4]:
# Class for MED model
class MED:  
    # Fit Function for the MED Model
    def fit(self,x_train,y_train):
        # initialize a list for category specimens
        cat_specimen = []

        # Save classification categories as a model attribute 
        self.categories = np.unique(y_train)

        # Calculate the category Specimen using mean of the features of category samples
        for cat in np.unique(y_train).tolist():
            idx = (y_train==cat)
            cat_samples = x_train[idx]
            cat_specimen.append(cat_samples.mean(axis=0))

        # Store category specimen as class attributes
        self.cat_specimens = np.array(cat_specimen)
    # Predict function for the MED model
    def predict(self, x_test):
        # Initialize a list for prediction results
        predictions = []

        # For each image calcute the distances from category specimens and make prediction using them
        for image in x_test:
            distances = [np.linalg.norm(self.cat_specimens[i]-image) for i in range(self.cat_specimens.shape[0])]
            predictions.append(self.categories[np.argmin(distances)])

        # store the predictions as class attribute and return the predictions
        self.predictions = predictions
        return predictions
    # Prediction function to calculate prediction for one image (for internal test purposes)
    def predictone(self, X):
        # Calculate the distance from the category specimen
        distances = [np.linalg.norm(self.cat_specimens[i]-X) for i in range(self.cat_specimens.shape[0])]

        # Return the predicted class
        return self.categories[np.argmin(distances)]
    # Fuction to find the incorrectly categorized images (a replacement of confusion matrix for internal testing)
    def error_vals(self,y_test):
        # make a list of incorrectly identified test points and return them
        errors = [(self.predictions[i],y_test[i]) for i in range(len(y_test)) if self.predictions[i] != y_test[i]]
        return ([('prediction','y_val')] + errors)
    # Function for plotting Decision Boundary in 2 Dimensions
    def plot(self,x_train,y_train):
        # Calculate the min and max value for each dimension
        x_min, x_max = x_train[:, 0].min() - 100, x_train[:, 0].max() + 100
        y_min, y_max = x_train[:, 1].min() - 100, x_train[:, 1].max() + 100

        # Create a meshgrid using min and max values 
        # with intervals optimized for performance and fineness of boundary
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 10),np.arange(y_min, y_max, 10))

        # Predictions to obtain the classification results
        Z = np.array(self.predict(np.c_[xx.ravel(), yy.ravel()])).reshape(xx.shape)

        # colour parameter labeling 0 as red and 1 as blue
        col = ['b' if y_train[i] == 1 else 'r' for i in range(len(y_train))]

        # Plotting of the boundary
        plt.contourf(xx, yy, Z,colors =col, alpha=0.3)
        plt.scatter(x_train[:, 0], x_train[:, 1], c=col, alpha=0.6, s=1)
        plt.xlabel("Feature-1")
        plt.ylabel("Feature-2")

        # return the plt function to ease making customizations before plotting
        return plt
    # Function for calculating the parameters of MED Decision Boundary
    def decision_boundary(self):
        # Calculate the Constant in MED Decision Boundary
        self.constant = (np.dot(self.cat_specimens[0],self.cat_specimens[0]) - np.dot(self.cat_specimens[1],self.cat_specimens[1]))/2
        
        # Calculate the Coefficients of variables in the Decision Boundary
        self.coefficients = self.cat_specimens[0] - self.cat_specimens[1]

        # Print the Decision Boundary
        for i in range(self.cat_specimens.shape[1]):
          print(f"({self.coefficients[i]:+g})x{i+1}",end=' ')
        print(f"{self.constant:+g} = 0")

In [5]:
# Function to calculate the Prediction error using predictions and expected classification
def prediction_accu(prediction, y_test):
    if len(prediction) != len(y_test):
        print("you are trying to get prediction of lists of unequal size")
        return 0
    errors = sum([0 if prediction[i] != y_test[i] else 1 for i in range(len(y_test))])
    return (errors/len(y_test))*100

In [6]:
# Function to calculate the confusion matrix using expected and predicted results
def confusion_matrix(y_pred,y_test):
    cm = np.empty([np.unique(y_test).shape[0],np.unique(y_test).shape[0]], dtype=int)
    for cat in np.unique(y_test):
        idx = (y_test==np.unique(y_test)[cat])
        pred_event = np.array(y_pred)[idx]

        cm[cat,1] = sum(pred_event)
        cm[cat,0] = pred_event.shape[0] - cm[cat,1]
    return cm

In [8]:
med_model = MED()
med_model.fit(x_train,y_train)
pred_med = med_model.predict(x_test)
accu_med = prediction_accu(pred_med,y_test)
accu_med

52.3972602739726