In [4]:
import numpy as np
import pickle
import random

# Constants
BIAS = 1                            # Dummy Feature for use in setting constant factor in Training.
TRAIN_TEST_RATIO = .75              # Default Ratio of data to be used in Training vs. Testing.
ITERATIONS = 100                    # Default Number of Training Iterations.
OUTPUT_PATH = "classifier_models/"  # Directory in which to save completed models.

class MultiClassPerceptron():
    # Analytics values
    precision, recall, accuracy, fbeta_score = {}, {}, 0, {}
    def __init__(self, classes, feature_list, feature_data, train_test_ratio=TRAIN_TEST_RATIO, iterations=ITERATIONS):
        self.classes = classes
        self.feature_list = feature_list
        self.feature_data = feature_data
        self.ratio = train_test_ratio
        self.iterations = iterations

        # Split feature data into train set, and test set
        random.shuffle(self.feature_data)
        self.train_set = self.feature_data[:int(len(self.feature_data) * self.ratio)]
        self.test_set = self.feature_data[int(len(self.feature_data) * self.ratio):]

        # Initialize empty weight vectors, with extra BIAS term.
        self.weight_vectors = {c: np.array([0 for _ in xrange(len(feature_list) + 1)]) for c in self.classes}

    def train(self):
        for _ in xrange(self.iterations):
            for category, feature_dict in self.train_set:
                # Format feature values as a vector, with extra BIAS term.
                feature_list = [feature_dict[k] for k in self.feature_list]
                feature_list.append(BIAS)
                feature_vector = np.array(feature_list)

                # Initialize arg_max value, predicted class.
                arg_max, predicted_class = 0, self.classes[0]

                # Multi-Class Decision Rule:
                for c in self.classes:
                    current_activation = np.dot(feature_vector, self.weight_vectors[c])
                    if current_activation >= arg_max:
                        arg_max, predicted_class = current_activation, c

                # Update Rule:
                if not (category == predicted_class):
                    self.weight_vectors[category] += feature_vector
                    self.weight_vectors[predicted_class] -= feature_vector

    def predict(self, feature_dict):
        feature_list = [feature_dict[k] for k in self.feature_list]
        feature_list.append(BIAS)
        feature_vector = np.array(feature_list)

        # Initialize arg_max value, predicted class.
        arg_max, predicted_class = 0, self.classes[0]

        # Multi-Class Decision Rule:
        for c in self.classes:
            current_activation = np.dot(feature_vector, self.weight_vectors[c])
            if current_activation >= arg_max:
                arg_max, predicted_class = current_activation, c

        return predicted_class

    def save_classifier(self, classifier_name):
        with open(OUTPUT_PATH + classifier_name + ".pik", 'wb') as f:
            pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)

    @staticmethod
    def load_classifier(classifier_name):
        with open(OUTPUT_PATH + classifier_name + ".pik", 'rb') as f:
            return pickle.load(f)

In [None]:
data = read_csv('data0.txt', header = None, sep = ' ')
data.columns = ["col1", "col2", "col3", "col4"]
data0 = data.drop(columns=["col4"])

In [5]:
# Simple Sandbox Script to demonstrate entire Pipeline (Loading, Training, Saving, getting Analytics)
if __name__ == "__main__":
    shape_classifier = MultiClassPerceptron(shape_classes, shape_feature_list, shape_feature_data)
    shape_classifier.train()
    shape_classifier.save_classifier("shape_classifier")

NameError: name 'shape_classes' is not defined