In [1]:
#importing relevant libraries
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Train images and labels




#loading training data
trainingimages = pd.read_csv('trainingimages', sep = ' ', header = None, on_bad_lines='skip', engine='python')

#dropping the last column to make the figure 28x28 as mentioned in the description
trainingimages = trainingimages.iloc[:,:-1]

#converting the data frame into 0 and 1 
#  +,# = 1 else 0
arr =  np.where(trainingimages.isnull(),0,1)

#coverting array into dataframe again
trainingimages = pd.DataFrame(arr)

#separating each image from the dataframe on the basisi of its size (28x28)
train = []
for i in range(0, len(trainingimages), 28):
    train.append(np.array(trainingimages.iloc[i:i+28,:]).reshape(28*28))
    
#making a dataframe for the train data
train = pd.DataFrame(train)
    
    
#training labels
traininglabels =  pd.read_csv('traininglabels', header = None)
traininglabels.columns = ['label']

In [3]:
#Validation images and labels




#loading validation data
validationimages = pd.read_csv('validationimages', sep = ' ', header = None, on_bad_lines='skip', engine='python')

#dropping the last column to make the figure 28x28 as mentioned in the description
validationimages = validationimages.iloc[:,:-1]

#converting the data frame into 0 and 1 
#  +,# = 1 else 0
arr =  np.where(validationimages.isnull(),0,1)

#coverting array into dataframe again
validationimages = pd.DataFrame(arr)

#separating each image from the dataframe on the basisi of its size (28x28)
valid = []
for i in range(0, len(validationimages), 28):
    valid.append(np.array(validationimages.iloc[i:i+28,:]).reshape(28*28))
    
#making a dataframe for the train data
valid = pd.DataFrame(valid)
    
    
#validation labels
validationlabels =  pd.read_csv('validationlabels', header = None)
validationlabels.columns = ['label']

In [4]:
#test images and labels




#loading test data
testimages = pd.read_csv('testimages', sep = ' ', header = None, on_bad_lines='skip', engine='python')

#dropping the last column to make the figure 28x28 as mentioned in the description
testimages = testimages.iloc[:,:-1]

#converting the data frame into 0 and 1 
#  +,# = 1 else 0
arr =  np.where(testimages.isnull(),0,1)

#coverting array into dataframe again
testimages = pd.DataFrame(arr)

#separating each image from the dataframe on the basisi of its size (28x28)
test = []
for i in range(0, len(testimages), 28):
    test.append(np.array(testimages.iloc[i:i+28,:]).reshape(28*28))
    
#making a dataframe for the train data
test = pd.DataFrame(test)
    
    
#validation labels
testlabels =  pd.read_csv('testlabels', header = None)
testlabels.columns = ['label']

# Naive Bayes

## Prior Probabilities Pr(C)

In [5]:
prior_prob = pd.DataFrame(traininglabels.value_counts()/len(traininglabels))

## Conditional Probabilities Pr (fij/C)

In [6]:
_train = train.copy()
_train['label'] = traininglabels

In [7]:
#list to grab a conditional probability table for each class
post = []
#looping through the classes
for i in range(10):
    #appending a CPT for each class
     post.append(_train[_train['label']==i].drop('label', axis = 1))

In [8]:
# function to calculate conditional probabilties with laplace smoothening
def cond_prop_laplace(k):
    # lists to grab conditional proabilities for class 0 and 1
    probability_0 = []
    probability_1 = []
    #looping through all the classes (0-9)
    for i in range(10):
        #counting the '1' in the feature for a particular class
        sum_ = post[i].sum()
        #conditional probability of 1
        probability_1.append(( sum_ + k)/(len(post[i])+k))
        #conditional probability of 0
        probability_0.append(1 - ( sum_ + k)/(len(post[i])+k))
    #returning the conditional pribabilities
    return probability_0, probability_1

## Functon for Predictions

In [9]:
#fucction for making prediction
def prediction(data, prob_0_, prob_1_):
    #list to grab predictions
    pred = []
    #loping through the data
    for i in range(len(data)):
        # grabbing the ith tuple
        a =  data.iloc[i,:]
        #grabbing probability for each class
        probability =[]
        # looping through all the 10 classes (0-9)
        for j in range(10):
            #grabbing conditionsl probablities
            b = prob_0_[j]
            c = prob_1_[j]
            p = 1
            #looping through all the features
            for l in range(28*28):
                #multiplying the conditional probabilities as per the condition
                if a[l] ==0:
                    p = p*b[l]
                else:
                    p = p*c[l]
            # taking log and adding the probabilities as mentioned in the description
            p = np.log(p)+ np.log(prior_prob.loc[j].values)
            #appending probability od each class
            probability.append(p)
        #the class that shows maximum probability for any tuple becomes the prediction for that tuple
        pred.append(np.argmax(probability))
    # returing the predictions
    return pred

### Validation

In [10]:
# k = 1
prob_0, prob_1 = cond_prop_laplace(1)
pred_ = prediction(valid, prob_0, prob_1)
accuracy_score(validationlabels, pred_)

0.764

In [11]:
# k = 2
prob_0, prob_1 = cond_prop_laplace(2)
pred_ = prediction(valid, prob_0, prob_1)
accuracy_score(validationlabels, pred_)

0.759

In [12]:
# k = 3
prob_0, prob_1 = cond_prop_laplace(3)
pred_ = prediction(valid, prob_0, prob_1)
accuracy_score(validationlabels, pred_)

0.749

In [13]:
# k = 4
prob_0, prob_1 = cond_prop_laplace(4)
pred_ = prediction(valid, prob_0, prob_1)
accuracy_score(validationlabels, pred_)

0.744

In [14]:
# k = 5
prob_0, prob_1 = cond_prop_laplace(5)
pred_ = prediction(valid, prob_0, prob_1)
accuracy_score(validationlabels, pred_)

0.741

## Test

In [15]:
# As the best validation accuracy was achived at k = 1
# k = 1
prob_0, prob_1 = cond_prop_laplace(1)
pred_ = prediction(test, prob_0, prob_1)
accuracy_score(testlabels, pred_)

0.718

# Train

In [16]:
# As the best validation accuracy was achived at k = 1
# k = 1
prob_0, prob_1 = cond_prop_laplace(1)
pred_ = prediction(train, prob_0, prob_1)
accuracy_score(traininglabels, pred_)

0.798

Recorded errors are not zero because if the train accuracy becomes 100% then it means that the model has overfit. 

The recorded accuracy for training subset is better than that of the test subset.The reason being, train data is already seen by the model during training. However, test data is unseen data.

# Perceptron

In [17]:
#one hot encoding the labels for perceptron training
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(traininglabels)
traininglabels_enc = enc.transform(traininglabels).toarray()
#converting labels to -1 and 1
traininglabels_enc = np.where(traininglabels_enc==0,-1,1)

In [18]:
#activation function
def activation_func(value):    #sigmoid
    return (1/(1+np.exp(-value)))

In [19]:
# function for perceptron training
def perceptron(in_data,labels, epochs):
    #list to grab weight vector for each class
    w = []
    #looping through all the classes (0-9)
    for i in range(traininglabels_enc.shape[1]):
        #converting data into array
        X=np.array(in_data)
        y=np.array(labels[:,i])
        #initialization of weight vector for each class as zero
        weights=np.zeros(X.shape[1])
        original=weights
        #looping through the epochs
        for epoch in range(epochs):
            #looping through the tuples
            for key in range(X.shape[0]):
                #calling activation function
                a=activation_func(np.matmul(np.transpose(weights),X[key]))
                # 1 or -1 classification for each class
                if a>=0.5:
                    yn=1
                else:
                    yn=-1
                #weights updation
                weights=weights+(yn-y[key])*X[key] 
        #appending weight of each class to the list of weights
        w.append(weights)
    #returning weight vector
    return w

In [20]:
def perceptron_test(in_data,weights):
    #making the data as array
    X=np.array(in_data)
    #list to grab predictions
    y_pred = []
    #looping through the data
    for i in range(len(in_data)):
        #list to grab scores for each tuple
        y_ = []
        #ith tuple
        x = X[i]
        #looping through the classes (0 - 9)
        for j in range(len(weights)):
            #grabbind the weight vector for the said class
            w_ = weights[j]
            # calculating the score
            score = np.abs(activation_func(w_*x)).sum()
            #appending the score
            y_.append(score)
        #appending the class with the maximum score
        y_pred.append(np.argmax(y_))
    #returning the predictions
    return y_pred

## When epochs = 3

In [21]:
list_of_weights = perceptron(train,traininglabels_enc, 3)

# train accuracy
prediction =  perceptron_test(train,list_of_weights)
print(f'Train Accuracy: {accuracy_score(traininglabels, prediction)}')

# test accuracy
prediction =  perceptron_test(test,list_of_weights)
accuracy_score(testlabels, prediction)
print(f'Test Accuracy: {accuracy_score(testlabels, prediction)}')

Train Accuracy: 0.0946
Test Accuracy: 0.09


## When epochs = 1

In [22]:
list_of_weights = perceptron(train,traininglabels_enc, 1)

# train accuracy
prediction =  perceptron_test(train,list_of_weights)
print(f'Train Accuracy: {accuracy_score(traininglabels, prediction)}')

# test accuracy
prediction =  perceptron_test(test,list_of_weights)
accuracy_score(testlabels, prediction)
print(f'Test Accuracy: {accuracy_score(testlabels, prediction)}')

Train Accuracy: 0.0912
Test Accuracy: 0.081


## When epoch = 2

In [23]:
list_of_weights = perceptron(train,traininglabels_enc, 2)

# train accuracy
prediction =  perceptron_test(train,list_of_weights)
print(f'Train Accuracy: {accuracy_score(traininglabels, prediction)}')

# test accuracy
prediction =  perceptron_test(test,list_of_weights)
accuracy_score(testlabels, prediction)
print(f'Test Accuracy: {accuracy_score(testlabels, prediction)}')

Train Accuracy: 0.0938
Test Accuracy: 0.088


## When epoch = 4

In [24]:
list_of_weights = perceptron(train,traininglabels_enc, 4)

# train accuracy
prediction =  perceptron_test(train,list_of_weights)
print(f'Train Accuracy: {accuracy_score(traininglabels, prediction)}')

# test accuracy
prediction =  perceptron_test(test,list_of_weights)
accuracy_score(testlabels, prediction)
print(f'Test Accuracy: {accuracy_score(testlabels, prediction)}')

Train Accuracy: 0.095
Test Accuracy: 0.09


## When epoch = 5

In [25]:
list_of_weights = perceptron(train,traininglabels_enc, 5)

# train accuracy
prediction =  perceptron_test(train,list_of_weights)
print(f'Train Accuracy: {accuracy_score(traininglabels, prediction)}')

# test accuracy
prediction =  perceptron_test(test,list_of_weights)
accuracy_score(testlabels, prediction)
print(f'Test Accuracy: {accuracy_score(testlabels, prediction)}')

Train Accuracy: 0.095
Test Accuracy: 0.09


# Perceptron vs Naive Bayes

**Perceptron** laid the basis of the cutting edge **deep learning models** which are able to model any non linearities in the data with the help of complex activation functions and deep stacked layers. However, perceptron it self is not a very efficient way of solving a multi class classification. Additioally, it may require large number of epochs (computational power) to update the weights. We can see in this example that the perceptron performed poorly. *However, the accuracy, was increasing with the increase of number of epochs yet the results are not acceptable*.

**Naive Bayes** is a conventional and very powerful algorithm that is able to perform exceptional on many business problems. It is extensively used in image classification, natural language processing, etc. *In this example, we can see that Naive Bayes performed well and was able to achieve accetable accuracy on validation and the test dataset.*


However, it must be noted that while solving any **image related problem, information in spatial locations play a vital part. Neither Naive Bayes, nor Perceptron has the capability to keep the information related to spatial locations. CNN is most suited for solving such image related problems**.