In [11]:
import logging
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from itertools import groupby

class SVC(BaseEstimator, ClassifierMixin):
    @staticmethod
    def linear_kernel(x, y):
        pass
    
    def __init__(self, kernel = None):
        if not kernel:
            kernel = SVC.linear_kernel
            
        self.kernel = kernel
        
    def fit(self, X, y):
        """Fit the data into the classifier:
        
        Args:
        
            X: (numpy.ndarray)
            
            The n*m dimensional array where n is the amount of features
            and m is the amount of samples.
            
            y: (numpy.ndarray)
            
            Array of labels for the sample. This is a BINARY classifier (`bool(label)`).
        """
            
        # Ensure we labels are binary. Convert labels to True/False. 
        # WARNING: None, 0 and False are the considered False and any other
        # value is considered True. See what python's truthy values for more
        # information.
        y = [bool(n) for n in y]
        
        sample_count, feature_count = X.shape
        
        # Create the memory for the kernel
        K = np.zeros((sample_count, feature_count))
        
        # Apply the kernel over each of the features
        for i in range(sample_count**2):
            j = i % sample_count
            k = int(np.floor(i / sample_count))
            K[j, k] = self.kernel(X[j], X[k])
            
        print K


class MulticlassSVC(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.classifiers = {} # Store the classifiers <label:str>: <LinearSVC>
    
    def fit(self, X, y):
        """Fit the data to the classifier.
        
        This generates N classifier's where N is the amount of features within the data.
        We then generate hyperplanes within each classifier to which can classify whether
        a new data point *is the feature* and *not the feature* (binary).
        
        Args:
        
            X: (numpy.ndarray)
            
            The n*m dimensional array where n is the amount of features
            and m is the amount of samples.
            
            y: (numpy.array)
            
            An array of labels for the samples. Length == sample_count
        """
        labels = MulticlassSVC.labels(y)
        sample_count, feature_count = X.shape
        
        print "Fitting {} samples with {} features and {} labels: {}".format(sample_count, feature_count, len(labels), labels)
        
        # Loop over each label in the set and generate a classifier
        # that can decide between is label or is not label.
        for label in labels:
            # Convert the labels to boolean
            ny = np.array([l == label for l in y])
            
            # Create the classifier
            classifier = SVC()
            
            # Fit the data
            classifier.fit(X, y)
            
            # And save it for voting in the prediction
            self.classifiers[label] = classifier
    
    def predict(self, X):
        """Predict the label for each sample in X
        
        Args:
            
            X: (numpy.ndarray)
            
            The sample to predict the label for.
            
        Returns:
            y: (numpy.ndarray)
            
            Returns labels for each sample
        """
        y = []
        
        # Loop over each sample in X
        for sample in X:
            # Loop over each classifier and check if the label returns true
            for label, classifier in self.classifiers:
                # Prediect the label in the classifier
                [is_label] = classifier.predict([sample])
                
                # Set the sample = label if the classifier returns true
                if is_label:
                    y.append(label)
                    
                break
            
        return np.array(y)
    
    @staticmethod
    def labels(y):
        """Group the labels and return them.
        
        Args:
            y: (list) List of labels.
        """
        
        return [label for label, ls in groupby(sorted(y))]
    
import pandas

data = pandas.read_csv("../data/owls.csv")

owl_classifier = MulticlassSVC()

X = data[["body-length", "body-width", "wing-length", "wing-width"]].values
y = data["species"].values

owl_classifier.fit(X, y)