## Import Packages Etc

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import BaggingClassifier
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from imblearn.under_sampling import RandomUnderSampler
# import other useful packages

  from numpy.core.umath_tests import inner1d


## Task 0: Load the Yeast Dataset

In [66]:
dataset = pd.read_csv('yeast.csv')
data = np.array(dataset.iloc[:,:103])
functions = np.array(dataset.iloc[:,103:])
functions[39]

array([1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)

## Task 1: Implement the Binary Relevance Algorithm

In [76]:
# Create a new classifier which is based on the sckit-learn BaseEstimator and ClassifierMixin classes
class BinaryRelevanceClassifier(BaseEstimator, ClassifierMixin):
    # Constructor for the classifier object
    def __init__(self, add_noise = False):
        self.add_noise = add_noise
        
    # The fit function to train a classifier
    def fit(self, data, functions):    
        # Create a new empty dictionary into which we will store relevance
        self.relevances_ = dict()

        # Iterate all functioins
        for i in range(14):
            status = functions[:,i]
            status = status.T
            self.relevances_[i] = BaggingClassifier(n_estimators=10, random_state=0).fit(data, status)
        
        # Return the classifier
        return self
            
    # The predict function to make a set of predictions for a set of query instances
    def predict(self, X):
        # Check is fit had been called by confirming that the teamplates_ dictiponary has been set up
        check_is_fitted(self, ['relevances_'])

        # Initialise an empty list to store the predictions made
        pos_functions = list()
        
        # Iterate all functioins to predict
        for i in range(14):
            pos_functions.append(self.relevances_[i].predict(X))
            
        return np.array(pos_functions).T

In [77]:
my_model = BinaryRelevanceClassifier()

In [78]:
my_model.fit(np.array(dataset.iloc[:40,:103]), np.array(dataset.iloc[:40,103:]))

BinaryRelevanceClassifier(add_noise=False)

In [79]:
my_model.predict(np.array(dataset.iloc[41:49,:103]))

array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int64)

## Task 2: Implement the Binary Relevance Algorithm with Under-Sampling

In [80]:
class BRUnderSample(BaseEstimator, ClassifierMixin):
    # Constructor for the classifier object
    def __init__(self, under_sampling='undersampling'):
        self.under_sampling = under_sampling
        
    # The fit function to train a classifier
    def fit(self, data, functions):
         # Create a new empty dictionary into which we will store relevance
        self.relevances_ = dict()
        
        # Add an option to under-sample
        rus = RandomUnderSampler(random_state=0)
        
        # Iterate all functioins
        for i in range(14):
            status = functions[:,i]
            status = status.T

            # Under-sample data and status
            if self.under_sampling == 'undersampling':
                temp_data, status = rus.fit_sample(data,status)
                
            self.relevances_[i] = BaggingClassifier(n_estimators=10, random_state=0).fit(temp_data, status)

        # Return the classifier
        return self

    # The predict function to make a set of predictions for a set of query instances
    def predict(self, X):
        # Check is fit had been called by confirming that the teamplates_ dictiponary has been set up
        check_is_fitted(self, ['relevances_'])
        
        # Initialise an empty list to store the predictions made
        pos_functions = list()

        # Iterate all functioins to predict
        for  i in range(14):
            pos_functions.append(self.relevances_[i].predict(X))
            
        return np.array(pos_functions).T

In [81]:
my_model = BRUnderSample(under_sampling='undersampling')
my_model.fit(np.array(dataset.iloc[:1000,:103]), np.array(dataset.iloc[:1000,103:]))
my_model.predict(np.array(dataset.iloc[1001:1009,:103]))

array([[1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1],
       [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1],
       [1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0],
       [1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0],
       [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0]], dtype=int64)

## Task 3: Compare the Performance of Different Binary Relevance Approaches

In [None]:
# Write your code here


## Task 4: Implement the Classifier Chains Algorithm

In [None]:
# Write your code here
class ClassChainsClassifier(BaseEstimator, ClassifierMixin):
    # Constructor for the classifier object
    def __init__(self, add_noise = False):
        self.add_noise = add_noise
        
    # The fit function to train a classifier
    def fit(self, data, functions):    
        # Create a new empty dictionary into which we will store relevance
        self.relevances_ = dict()

        # Iterate all functioins
        for i in range(14):
            status = functions[:,i]
            status_squeeze = np.squeeze(status)
            # print(status_squeeze.shape)
            # print(type(functions))
            # print(type(data))
            self.relevances_[i] = BaggingClassifier(n_estimators=10, random_state=0).fit(data, status_squeeze)
            data = np.concatenate((data,np.reshape(status_squeeze,(data.shape[0],1))),axis=1)
            
    # The predict function to make a set of predictions for a set of query instances
    def predict(self, X):
        # Check is fit had been called by confirming that the teamplates_ dictiponary has been set up
        check_is_fitted(self, ['relevances_'])

        # Initialise an empty list to store the predictions made
        pos_functions = list()
        
        # Iterate all functioins to predict
        for i in range(14):
            j = 103+i
            pos_functions.append(self.relevances_[i].predict(X[:,:j]))
            
        return np.array(pos_functions).T

## Task 5: Evaluate the Performance of the Classifier Chains Algorithm

In [None]:
# Write your code here


## Task 6: Reflect on the Performance of the Different Models Evaluated

*Write your reflection here (max 300 words)*