In [1]:
import numpy as np

In [2]:
#assuming that the training data (y) is in acsending order of classes
def refineTrainingClasses(filename):
    t_classes = np.genfromtxt(filename, np.uint8)
    idx2, = np.where(t_classes == 2)
    t_class2 = t_classes[:len(idx2)]
    idx4, = np.where(t_classes == 4)
    t_class4 = t_classes[len(idx2):len(idx2)+(len(idx4))]
    return t_class2, t_class4, len(idx2), (len(idx4)+len(idx2))

#assuming that the training data (X) is in acsending order of classes
def refineTrainingImages(filename, last_idx2, last_idx4):
    t_imgs = np.genfromtxt(filename, np.uint8)
    t_imgs_class2 = t_imgs[:last_idx2,:]
    t_imgs_class4 = t_imgs[last_idx2:last_idx4]
    return t_imgs_class2, t_imgs_class4

def getActualClasses(filename):
    return np.genfromtxt(filename, np.uint8)

def calculateProbability(img, classx_0_prob, classx_1_prob, classx_prob):
    probx = 1
    for idx, feature in enumerate(img):
        if feature == 0:
            probx *= classx_0_prob[idx]
        elif feature == 1:
            probx *= classx_1_prob[idx]
    return (probx * classx_prob)

def getPredictedClasses(filename, actual_classes_shape, prob_list):
    imgs_features = np.genfromtxt(filename, np.uint8)
    p_classes = np.zeros((actual_classes_shape), np.uint8)
    for idx, img in enumerate(imgs_features):
        prob2 = calculateProbability(img, prob_list[1], prob_list[2], prob_list[0])
        prob4 = calculateProbability(img, prob_list[4], prob_list[5], prob_list[3])
        v = max(prob2, prob4)
        if v is prob2:
            p_classes[idx] = 2
        elif v is prob4:
            p_classes[idx] = 4
    return p_classes

def calculateAccuracy(actual, predicted):
    positive, negative = (2, 4)
    tp, tn, fp, fn = (0, 0, 0, 0)
    for a, p in zip(actual, predicted):
        if a == positive and p == positive:
            tp += 1
        elif a == positive and p == negative:
            fn += 1
        elif a == negative and p == negative:
            tn += 1
        elif a == negative and p == positive:
            fp += 1
    
    return (tp+tn)/(tp+tn+fp+fn)*100

In [3]:
# TRAINING
trn_c2, trn_c4, trn_c2_last_idx, trn_c4_last_idx = refineTrainingClasses("trainY.txt")
trn_i2, trn_i4 = refineTrainingImages("trainX.txt", trn_c2_last_idx, trn_c4_last_idx)
# CALCULATING PROBABILITIES
class2_prob = (np.count_nonzero(trn_c2 == 2))/(np.count_nonzero(trn_c2 == 2)+np.count_nonzero(trn_c4 == 4))
class4_prob = 1 - class2_prob
class2_1_prob = trn_i2.sum(axis=0)/trn_i2.shape[0]
class2_0_prob = 1 - class2_1_prob
class4_1_prob = trn_i4.sum(axis=0)/trn_i4.shape[0]
class4_0_prob = 1 - class4_1_prob
prob_list = [class2_prob, class2_0_prob, class2_1_prob, class4_prob, class4_0_prob, class4_1_prob]
# TESTING
actual_classes = getActualClasses("testY.txt")
predicted_classes = getPredictedClasses("testX.txt", actual_classes.shape, prob_list)
# CLASSIFICATION ACCURACY
accuracy = calculateAccuracy(actual_classes, predicted_classes)

In [4]:
print("Actual and Predicted Classes are following respectively:\n{0}.".format(list(zip(actual_classes,predicted_classes))))
print("Accuraccy for Actual and Predicted classes is: {0}%.".format(accuracy))

Actual and Predicted Classes are following respectively:
[(2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4)].
Accuraccy for Actual and Predicted classes is: 100.0%.
