# Machine Learning and Data Mining - Lab 2 - Davide Gallitelli

## Task 1 - Build a majority class classifier

In [3]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels

class MajorityClassifier(BaseEstimator, ClassifierMixin):
    
    # define initialization
    def __init__(self):
        # code
        print ("init Classifier")
    
    # Fitting function
    def fit (self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Use unique_labels to return an ordered array of the labels found
        self.classes_ = unique_labels(y)
        self.X_ = X
        self.y_ = y
        self.majVote_ = np.bincount(self.y_).argmax()
        # Return the classifier
        return self
    
    # Prediction function
    def predict (self, X):

        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)
        
        # Predict according to majority class
        return np.asarray([self.majVote_ for _ in X])

## Task 2 - Test the majority vote classifier and justify the evaluation result

In [21]:
# Import the necessary classes
import numpy as np
from sklearn import datasets

# Load and parse the data file
iris = datasets.load_iris()
iris_X = iris.data
iris_Y = iris.target
np.unique(iris_Y)

# Split iris data in train and test data
# A random permutation, to split the data randomly
np.random.seed ( 0 )
indices = np.random.permutation(len(iris_X))
# Take some elements from the shuffled array
iris_X_train = iris_X[indices[:-10]]
iris_Y_train = iris_Y[indices[:-10]]
iris_X_test = iris_X[indices[-10:]]
iris_Y_test = iris_Y[indices[-10:]]

iris_Y_test

array([1, 1, 1, 0, 0, 0, 2, 1, 2, 0])

In [19]:
# test using the previously defined majority vote classifier
mjclass = MajorityClassifier()
mjclass.fit(iris_X_train, iris_Y_train)
prediction = mjclass.predict(iris_X_test)
prediction

init Classifier


array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [20]:
# Evaluate score of this classifier
from sklearn.metrics import accuracy_score

accuracy_score(iris_Y_test, prediction)

0.20000000000000001

## Task 3 - A better classifier

In [7]:
class MyClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        return
    def fit(self, X, y):
        #code
        self.X_ = X
        self.Y_ = Y
        self.classes_ = unique_labels(y)
        return self
    def predict(self, X):
        #code
        return self.classes_[value]