To run this properly, mount the Multi-Class-Classification folder to your drive and move into the directory using the below snippet:

In [None]:
# %cd path/to/the/drive/folder/of/MLC 

In [None]:
import os
  
path = os.path.join("drive/MyDrive", "MLC")
os.mkdir(path)

In [None]:
!git clone https://github.com/CR1502/Multi-Class-Classification.git

Cloning into 'Multi-Class-Classification'...
remote: Enumerating objects: 135, done.[K
remote: Counting objects: 100% (135/135), done.[K
remote: Compressing objects: 100% (102/102), done.[K
remote: Total 135 (delta 48), reused 117 (delta 30), pack-reused 0[K
Receiving objects: 100% (135/135), 1.01 MiB | 2.58 MiB/s, done.
Resolving deltas: 100% (48/48), done.


In [None]:
# Install before running
! pip install scikit-multilearn

# Previous MLC class
##Changes:

1. fit() method:

  It calculates the number of labels **nLabels** in y and then computes the frequencies of each label using **y.mean(axis=0)**. The labels are sorted based on their frequencies, and the top **k** labels are selected and stored in labelsAssign attribute.


2. predict() method:

  It initializes a sparse matrix **prediction** with zeros of the appropriate shape. For each instance in **X**, it iterates over the **labelsAssign** list and checks if the corresponding feature value in X is greater than 0. If it is, the corresponding entry in prediction is set to 1. Finally, it returns the prediction matrix.

3. predict_proba() method:

  It returns the **probability estimates** instead of binary predictions. It initializes a sparse matrix called **probabilities** with zeros. It iterates over the **labelsAssign** list and assigns the corresponding feature value in X to the corresponding entry in probabilities. Finally, it returns the probabilities matrix.

# Do not run this, it is for your reference

In [None]:
# Program that create a classifier chain and perform multilabel classification.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import check_X_y, check_array
from sklearn.utils.multiclass import unique_labels


class ClassifierChain:
    def __init__(self, base_classifier):
        self.base_classifier = base_classifier
        self.classifiers = []

    def fit(self, X, y):
        print(X.shape, y.shape)
        X, y = check_X_y(X, y, accept_sparse=True)
        self.classes_ = unique_labels(y)

        for i in range(y.shape[1]):
            classifier = self.base_classifier
            classifier.fit(X, y[:, i])
            self.classifiers.append(classifier)

            # Augment feature matrix
            X = np.concatenate((X, y[:, :i]), axis=1)

    def predict(self, X):
        X = check_array(X, accept_sparse=True)

        Y_pred = np.zeros((X.shape[0], len(self.classes_)), dtype=int)
        for i, classifier in enumerate(self.classifiers):
            Y_pred[:, i] = classifier.predict(X)
            X = np.concatenate((X, Y_pred[:, :i+1]), axis=1)

        return Y_pred

    def predict_proba(self, X):
        X = check_array(X, accept_sparse=True)

        Y_pred_proba = np.zeros((X.shape[0], len(self.classes_)), dtype=float)
        for i, classifier in enumerate(self.classifiers):
            Y_pred_proba[:, i] = classifier.predict_proba(X)[:, 1]
            X = np.concatenate((X, Y_pred_proba[:, :i+1]), axis=1)

        return Y_pred_proba


# Load the emotions dataset from CSV
emotions_data = pd.read_csv('emotions.csv')

# Extract features (X) and labels (y)
X = emotions_data.iloc[:, :-6].values
y = emotions_data.iloc[:, -6:].values
print(X,y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base classifier
base_classifier = RandomForestClassifier()

# Reshape y to match the number of samples
y_train = np.reshape(y_train, (y_train.shape[0], -1))
y_test = np.reshape(y_test, (y_test.shape[0], -1))

# Build the classifier chain
classifier_chain = ClassifierChain(base_classifier)

# Train the classifier chain
classifier_chain.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier_chain.predict(X_test)

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)


#Run this

In [28]:
from skmultilearn.base import MLClassifierBase
from scipy.sparse import lil_matrix
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import check_X_y, check_array
from sklearn.utils.multiclass import unique_labels

class MLC(MLClassifierBase):

    def __init__(self, k = None):
        super(MLC, self).__init__()
        self.k = k
        self.copyableAttrs = ['k']

    def fit(self, X, y):
      self.nLabels = y.shape[1]
      frequencies = y.mean(axis=0)
      labelsSortedByFrequency = sorted(range(y.shape[1]), key=lambda i: frequencies[i])
      self.labelsAssign = labelsSortedByFrequency[:self.k]
      print(self.nLabels, frequencies, labelsSortedByFrequency, self.labelsAssign)

      return self

    def predict(self, X):
        prediction = lil_matrix(np.zeros(shape=(X.shape[0], self.nLabels), dtype=int))
        for i in range(X.shape[0]):
            for j in self.labelsAssign:
                if X[i, j] > 0:
                    prediction[i, j] = 1

        return prediction

    def predict_proba(self, X):
        probabilities = lil_matrix(np.zeros(shape=(X.shape[0], self.nLabels), dtype=float))
        for i in range(X.shape[0]):
            for j in self.labelsAssign:
                probabilities[i, j] = X[i, j]

        return probabilities

# Load the emotions dataset from CSV
df = pd.read_csv('yeast.csv')

# Extract features (X) and labels (y)
X = df.iloc[:, :-6].values
y = df.iloc[:, -6:].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)

# Estimating the Accuracy Score
accuracy_score(y_test, prediction)
print('Accuracy:', accuracy_score(y_test, prediction))


Accuracy: 0.6053719008264463
