# Understanding Binary and Non-binary Classification

In this demonstration, you’ll build and test a multi-class classification model and a multi-label classification model. You will measure the accuracy, precision, recall and AUC for both models.

This first part of this demonstration uses and edited version of the Iris dataset. This dataset was used in R.A. Fisher's classic 1936 paper, *The Use of Multiple Measurements in Taxonomic Problems*, and can also be found on the UCI Machine Learning Repository.

It includes three iris species with 50 samples each as well as some properties about each flower. One flower species is linearly separable from the other two, but the other two are not linearly separable from each other.

The columns in this dataset are:

- SepalLengthCm
- SepalWidthCm
- PetalLengthCm
- PetalWidthCm
- Species


The second part of this demonstration uses the Emotions dataset [Tsoumakas et al. 2008](https://www.uco.es/kdis/mllresources/#TsoumakasEtAl2008): Also called Music in [Read 2010](https://www.uco.es/kdis/mllresources/#Read2010). This is a small dataset to classify music into emotions that it evokes according to the Tellegen-Watson-Clark model of mood: amazed-suprised, happy-pleased, relaxing-calm, quiet-still, sad-lonely and angry-aggresive. It consists of 593 songs with 6 classes. The same piece of music can cause several emotions.

The 72 features in the dataset measure the rhythm and timbre of the music to elicit the emotional responses in the listener.

# Part I - Multi-class Classification
# Upload and prepare the data

Load the Iris dataset

In [None]:
# Upload the iris.csv file

!wget 'https://raw.githubusercontent.com/cm-int/machine-learning-fundamentals/main/module_2/Democode/iris.csv'

In [None]:
import numpy as np
import pandas as pd

iris = pd.read_csv('iris.csv')
print(iris)

In [None]:
# Split the data into test and training datasets

from sklearn.model_selection import train_test_split

features = iris.drop(['Species'], axis=1)
predictions = iris['Species']

features_train, features_test, predictions_train, predictions_test = train_test_split(features, predictions, test_size=0.33, random_state=13) # Random state specified to ensure repeatability if necessary

# Create a K-Nearest Neighbors model to classify the data

The K-Nearest Neighbors algorithm is inherently multi-class.

**Note: You don't need to convert the strings in the Species column into numbers**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5)
_= model.fit(features_train, predictions_train)

# Test the model using the test dataset

Display the results of the predictions, generate the Confusion Matrix and ROC curve, and measure the AUC, accuracy, precision, and recall

In [None]:
# Make predictions

results = model.predict(features_test)
print(results) # Note that the predictions have three possible values (there are three species of iris in the dataset)
print('\n')

probabilities = model.predict_proba(features_test)
print(probabilities[0:100]) # Display the first 100 sets of probabilities. This is a small well-tuned dataset (no noise whatsoever), and we 100% accuracy with this data. 
                            # Most of the predictions are made with a probability of 1, although several predictions have a probability below this value 

In [None]:
# Generate the confusion matrix from the predictions
# A small numer of FPs and FNs

from sklearn.metrics import ConfusionMatrixDisplay

_ = ConfusionMatrixDisplay.from_predictions(predictions_test, results)


In [None]:
# Binarize the predictions test data to match the probabilities data

# We can't use label_binarize because the labels are strings, not numbers
# Instead, we can use pd.get_dummies() and convert the result into a numpy array

print(f'Before binarization:\n{predictions_test}\n')

binarized_predictions_test = np.array(pd.get_dummies(predictions_test))
print(f'After binarization:\n{binarized_predictions_test}')

In [None]:
# Calculate the accuracy, precision, and recall
# All are reasonably high

from sklearn.metrics import accuracy_score, precision_score, recall_score

print(f'Accuracy: {accuracy_score(predictions_test, results)}')
print(f'Precision: {precision_score(predictions_test, results, average=None)}') #Note that you must specify a value for the 'average' parameter with a multi-class model
print(f'Recall: {recall_score(predictions_test, results, average=None)}')

In [None]:
# Display the ROC curve
# Model predictions are ideal, AUC is close to 1.0 for all species

from sklearn.metrics import roc_curve, roc_auc_score 
import matplotlib.pyplot as plt

fpr = dict()
tpr = dict()

plt.figure(figsize=(10, 10))

num_classes = 3
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(binarized_predictions_test[:, i], probabilities[:, i])
    auc = roc_auc_score(binarized_predictions_test[:, i], probabilities[:, i])
    plt.plot(fpr[i], tpr[i], lw=2, label=f'class {model.classes_[i]}: AUC {auc}')

plt.plot((0, 1), (0,1), label="Random Guess", c='red', linewidth=5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="best")
plt.title("ROC Curves")
plt.show()

# Repeat the exercise to create a Logistic Regression model

We will use the OvO algorithm, which requires use of the OneVsOne classifier

In [None]:
# Create the model

from sklearn.multiclass import OneVsOneClassifier
from sklearn.linear_model import LogisticRegression

model = OneVsOneClassifier(LogisticRegression(max_iter=500))
_ = model.fit(features_train, predictions_train)

# Display the number of estimators created by the model (there should be three)
print(model.estimators_)

In [None]:
# Make test predictions

print(f'Predicted results:\n{predictions_test}\n')

results = model.predict(features_test)
print(f'Actual results:\n{results}') # Note that the predictions have three possible values (there are three species of iris in the dataset)

# Note: The function predict_proba() isn't available for the OneVsOne classifier.
# If uncommented, the following statement will cause an error
# probabilities = model.predict_proba(features_test)

In [None]:
# Calculate the accuracy, precision, and recall
# All figures are higher than those of KNN model

from sklearn.metrics import accuracy_score, precision_score, recall_score

print(f'Accuracy: {accuracy_score(predictions_test, results)}')
print(f'Precision: {precision_score(predictions_test, results, average=None)}')
print(f'Recall: {recall_score(predictions_test, results, average=None)}')

In [None]:
# This time we need to binarize the predictions test data and the actual results (they are both currently string data)

# Use pd.get_dummies() and convert the result into a numpy array

print(f'Test data before binarization:\n{predictions_test}\n')

binarized_predictions_test = np.array(pd.get_dummies(predictions_test))
print(f'Test data after binarization:\n{binarized_predictions_test}\n')

print(f'Results before binarization:\n{results}\n')

binarized_results = np.array(pd.get_dummies(results))
print(f'Results after binarization:\n{binarized_results}')

In [None]:
_ = ConfusionMatrixDisplay.from_predictions(predictions_test, results,)

print(f'Accuracy: {accuracy_score(binarized_predictions_test, binarized_results)}')
print(f'Precision: {precision_score(binarized_predictions_test, binarized_results, average=None)}')
print(f'Recall: {recall_score(binarized_predictions_test, binarized_results, average=None)}')

# Results close to 100%

In [None]:
# Display the ROC curve
# AUC is close to 1.0 for all species

from sklearn.metrics import roc_curve, roc_auc_score 
import matplotlib.pyplot as plt

fpr = dict()
tpr = dict()

plt.figure(figsize=(10, 10))

num_classes = 3
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(binarized_predictions_test[:, i], binarized_results[:, i])
    auc = roc_auc_score(binarized_predictions_test[:, i], binarized_results[:, i])
    plt.plot(fpr[i], tpr[i], lw=2, label=f'class {model.classes_[i]}: AUC {auc}')

plt.plot((0, 1), (0,1), label="Random Guess", c='red', linewidth=5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="best")
plt.title("ROC Curves")
plt.show()

# Part II - Multi-label Classifiction
# Upload and prepare the data

The Emotions dataset is available for download as part of the Scikit-multilearn library.

In [None]:
 # Install scikit-multilearn and arff (Attribute-Relation File Format)
 # These are only required to retrieve the Emotions dataset

!pip install scikit-multilearn
!pip install arff

In [None]:
# Download the Emotions dataset. The data has already been split into features and labels
import skmultilearn.dataset as sk

features, labels, feature_names, label_names = sk.load_dataset('emotions', 'undivided')

print(f'Features:\n{feature_names}\n\n')
print(f'Labels:\n{label_names}\n')

In [None]:
# The data has to be massaged into a format suitable for use with sklearn classifiers
# Create DataFrames for holding features and labels

import pandas as pd

pd_features = pd.DataFrame(columns=[x[0] for x in feature_names])
print(f'{pd_features}\n')

pd_labels = pd.DataFrame(columns=[x[0] for x in label_names])
print(f'{pd_labels}\n')

In [None]:
# Copy the data from the sparse array into the two new DataFrames

import numpy as np

for i in range(0, np.shape(features)[0]):
  new_row=features[i].toarray(order='C')
  temp = pd.DataFrame(new_row, columns=[x[0] for x in feature_names])
  pd_features = pd_features.append(temp, ignore_index=True).astype('float64')

pd_features

In [None]:
for i in range(0, np.shape(labels)[0]):
  new_row=labels[i].toarray(order='C')
  temp = pd.DataFrame(new_row, columns=[x[0] for x in label_names])
  pd_labels = pd_labels.append(temp, ignore_index=True).astype('float64')

pd_labels

In [None]:
# Split the data into training and test datasets

from sklearn.model_selection import train_test_split

features_train, features_test, labels_train, labels_test = train_test_split(pd_features, pd_labels, test_size=0.33, random_state=13)

In [None]:
# Create a Multi Output Classifier wrapped around a Gradient Boosted Tree to classify the data

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier

multi_model = MultiOutputClassifier(estimator=GradientBoostingClassifier())
_ = multi_model.fit(features_train, labels_train)

# Show the number of estimators created for the model. There should be one classifier per label (6)
print(multi_model.estimators_)

In [None]:
# Make test predictions

print(f'Predicted results:\n{labels_test}\n')

results = multi_model.predict(features_test)
print(f'Actual results:\n{results}') # The predictions have up to six labels

In [None]:
# Generate the confusion matrices from the predictions
# Note: One confusion matrix per label

from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import matplotlib.pyplot as plt

cm = confusion_matrix(labels_test["amazed-suprised"], results[:, 0])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
disp.plot()
plt.title('Amazed-Suprised')
plt.show()

cm = confusion_matrix(labels_test["happy-pleased"], results[:, 1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
disp.plot()
plt.title('Happy-Pleased')
plt.show()

cm = confusion_matrix(labels_test["relaxing-calm"], results[:, 2])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
disp.plot()
plt.title('Relaxing-Calm')
plt.show()

cm = confusion_matrix(labels_test["quiet-still"], results[:, 3])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
disp.plot()
plt.title('Quiet-Still')
plt.show()

cm = confusion_matrix(labels_test["sad-lonely"], results[:, 4])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
disp.plot()
plt.title('Sad-Lonely')
plt.show()

cm = confusion_matrix(labels_test["angry-aggresive"], results[:, 5])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
disp.plot()
plt.title('Angry-Aggressive')
plt.show()

In [None]:
# Calculate the accuracy, precision, and recall

from sklearn.metrics import accuracy_score, precision_score, recall_score

print(f'Accuracy: {accuracy_score(labels_test, results)}')
print(f'Precision: {precision_score(labels_test, results, average=None)}')
print(f'Recall: {recall_score(labels_test, results, average=None)}')

# Precision is good, but accuracy is poor

In [None]:
# Find the probabilities for each test prediction.

probabilities = multi_model.predict_proba(features_test)
print(f'Probabilities: {probabilities}')

In [None]:
# Generate the ROC curves for each label

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

fpr = dict()
tpr = dict()
plt.figure(figsize=(10, 10))
num_labels=6
for i in range(num_labels):
    fpr[i], tpr[i], _ = roc_curve(labels_test.iloc[:, i], probabilities[i][:, 1])
    auc = roc_auc_score(labels_test.iloc[:, i], probabilities[i][:, 1])
    plt.plot(fpr[i], tpr[i], lw=2, label=f'Label {format(i+1)}: AUC {auc}')

plt.plot((0, 1), (0,1), label="Random Guess", c='red', linewidth=5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="best")
plt.title("ROC Curves")
plt.show()