In [None]:
import pickle
import random
import numpy as np
import cv2

In [None]:
with open('classes.pkl', 'rb') as f:
 classes = pickle.load(f)
with open('feature_data.pkl', 'rb') as f:
 feature_data = pickle.load(f)

In [None]:
# Constants
BIAS = 1                            # Dummy Feature for use in setting constant factor in Training.
TRAIN_TEST_RATIO = .6              # Default Ratio of data to be used in Training vs. Testing.
ITERATIONS = 20                    # Default Number of Training Iterations.
LR = 0.1                            # Set learning rate

In [None]:
class MultiClassPerceptron:
    # Analytics values
    precision, recall, accuracy = {}, {}, 0

    """
    A Multi-Class Perceptron Model object, with functions for loading feature data, training the algorithm,
    and running analytics on model performance.

    :param  classes           List of categories/classes (match tags in tagged data).
    :param  feature_data      Feature Data, in format specified in README, usually imported from feature_data module.
    :param  train_test_ratio  Ratio of data to be used in training vs. testing. Set to 75% by default.
    :param  iterations        Number of iterations to run training data through. Set to 100 by default.
    """
    def __init__(self, classes, feature_data, train_test_ratio=TRAIN_TEST_RATIO, iterations=ITERATIONS):
        self.classes = classes
        self.feature_data = feature_data
        self.ratio = train_test_ratio
        self.iterations = iterations

        # Split feature data into train set, and test set
        random.shuffle(self.feature_data)
        self.train_set = self.feature_data[:int(len(self.feature_data) * self.ratio)]
        self.test_set = self.feature_data[int(len(self.feature_data) * self.ratio):]

        # Initialize empty weight vectors, with extra BIAS term.
        self.weight_vectors = {c: np.array([0.0 for _ in range(900 + 1)]) for c in self.classes}
#         print(self.weight_vectors)

    def train(self):
        """
        Train the Multi-Class Perceptron algorithm using the following method:

        During each iteration of training, the data (formatted as a feature vector) is read in, and the dot
        product is taken with each unique weight vector (which are all initially set to 0). The class that
        yields the highest product is the class to which the data belongs. In the case this class is the
        correct value (matches with the actual category to which the data belongs), nothing happens, and the
        next data point is read in. However, in the case that the predicted value is wrong, the weight vectors a
        re corrected as follows: The feature vector is subtracted from the predicted weight vector, and added to
        the actual (correct) weight vector. This makes sense, as we want to reject the wrong answer, and accept
        the correct one.
        """
#         print('Weight before')
#         print(self.weight_vectors)
        for _ in range(self.iterations):
            for category, feature_dict in self.train_set:
                # Format feature values as a vector, with extra BIAS term.
                img = cv2.imread(feature_dict['path'])
                dim = (30,30)

                resized = cv2.resize(img, dim, cv2.INTER_AREA)

                input_array = []
                for i in range(resized.shape[0]):
                    for j in range(resized.shape[1]):
                        input_array.append((np.mean(resized[i][j])/255))
                
                input_array.append(BIAS)
                input_vector = np.array(input_array)
#                 print(input_vector)
                # Initialize arg_max value, predicted class.
                arg_max, predicted_class = 0, self.classes[0]
                # Multi-Class Decision Rule:
                for c in self.classes:
                    current_activation = np.dot(input_vector, self.weight_vectors[c])
                    if current_activation >= arg_max:
                        arg_max, predicted_class = current_activation, c
                # Update Rule:
                if not (category == predicted_class):
                    self.weight_vectors[category] += [i*0.1 for i in input_vector]
                    self.weight_vectors[predicted_class] -= [i*0.1 for i in input_vector]
#         print('\n\nWeight After training\n')    
#         print(self.weight_vectors)

    def predict(self, feature_dict):
        """
        Categorize a brand-new, unseen data point based on the existing collected data.

        :param  feature_dict        Dictionary of the same form as the training feature data.
        :return                     Return the predicted category for the data point.
        """
        img = cv2.imread(feature_dict['path'])
        dim = (30,30)

        resized = cv2.resize(img, dim, cv2.INTER_AREA)

        input_array = []
        for i in range(resized.shape[0]):
            for j in range(resized.shape[1]):
                input_array.append((np.mean(resized[i][j])/255))

        input_array.append(BIAS)
        feature_vector = np.array(input_array)

        # Initialize arg_max value, predicted class.
        arg_max, predicted_class = 0, self.classes[0]

        # Multi-Class Decision Rule:
        for c in self.classes:
            current_activation = np.dot(feature_vector, self.weight_vectors[c])
            if current_activation >= arg_max:
                arg_max, predicted_class = current_activation, c

        return predicted_class
    
    def test_random(self):
        item = random.choice(self.test_set)
        print("Actual class: " + item[0])
        pred_class = self.predict(item[1])
        print("Perdicted class: "+ pred_class)
    
    def run_analytics(self):
        """
        Runs analytics on the classifier, returning data on precision, recall, accuracy.

        :return: Prints statistics to screen.
        """
        print ("CLASSIFIER ANALYSIS: ")
        print ("")
        self.calculate_precision()
        print ("")
        self.calculate_recall()
        print ("")
        self.calculate_accuracy()

    def calculate_precision(self):
        """
        Calculates the precision of the classifier by running algorithm against test set and comparing
        the output to the actual categorization.
        """
        test_classes = [f[0] for f in self.test_set]
        correct_counts = {c: 0 for c in test_classes}
        total_counts = {c: 0 for c in test_classes}
        for feature_dict in self.test_set:
            actual_class = feature_dict[0]
            predicted_class = self.predict(feature_dict[1])

            if actual_class == predicted_class:
                correct_counts[actual_class] += 1
                total_counts[actual_class] += 1
            else:
                total_counts[predicted_class] += 1
                
        print ("PRECISION STATISTICS:")

        for c in correct_counts:
            if not (correct_counts[c] == 0):
                self.precision[c] = (correct_counts[c] * 1.0) / (total_counts[c] * 1.0)
                print ("%s Class Precision:" % (c.upper()), self.precision[c])
            else:
                print ("%s Class Precision:" % (c.upper()), 0)

    def calculate_recall(self):
        """
        Calculates the recall of the classifier by running algorithm against test set and comparing
        the output to the actual categorization.
        """
        test_classes = [f[0] for f in self.test_set]
        
        correct_counts = {c: 0 for c in test_classes}
        total_counts = {c: 0 for c in test_classes}

        for feature_dict in self.test_set:
            actual_class = feature_dict[0]
            predicted_class = self.predict(feature_dict[1])

            if actual_class == predicted_class:
                correct_counts[actual_class] += 1
                total_counts[actual_class] += 1
            else:
                total_counts[actual_class] += 1

        print ("RECALL STATISTICS:")

        for c in correct_counts:
            self.recall[c] = (correct_counts[c] * 1.0) / (total_counts[c] * 1.0)
            print ("%s Class Recall:" % (c.upper()), self.recall[c])

    def calculate_accuracy(self):
        """
        Calculates the accuracy of the classifier by running algorithm against test set and comparing
        the output to the actual categorization.
        """
        correct, incorrect = 0, 0
        random.shuffle(self.feature_data)
        self.test_set = self.feature_data[int(len(self.feature_data) * self.ratio):]
        for feature_dict in self.test_set:
            actual_class = feature_dict[0]
            predicted_class = self.predict(feature_dict[1])

            if actual_class == predicted_class:
                correct += 1
            else:
                incorrect += 1

        print ("ACCURACY:")
        print ("Model Accuracy:", (correct * 1.0) / ((correct + incorrect) * 1.0))
        

In [None]:
# Simple Sandbox Script to demonstrate entire Pipeline (Loading, Training, Saving, getting Analytics)
if __name__ == "__main__":
    classifier = MultiClassPerceptron(classes,feature_data)
    classifier.train()
    classifier.calculate_accuracy()