# Multiclass Classification

In [1]:
from pycaret.datasets import get_data
dataset = get_data('iris')

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
pd.crosstab(index=dataset['species'], columns='# observations')

col_0,# observations
species,Unnamed: 1_level_1
Iris-setosa,50
Iris-versicolor,50
Iris-virginica,50


In [7]:
data = dataset.sample(frac=0.9, random_state=786)
data_unseen = dataset.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (135, 5)
Unseen Data For Predictions: (15, 5)


In [8]:
from pycaret.classification import *
exp_mclf101 = setup(data = data, target = 'species', session_id=123) 

Unnamed: 0,Description,Value
0,session_id,123
1,Target,species
2,Target Type,Multiclass
3,Label Encoded,"Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2"
4,Original Data,"(135, 5)"
5,Missing Values,False
6,Numeric Features,4
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


In [9]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.9678,0.9963,0.9667,0.9758,0.9669,0.9515,0.956,0.008
nb,Naive Bayes,0.9578,0.9897,0.9556,0.9713,0.9546,0.9364,0.9442,0.008
qda,Quadratic Discriminant Analysis,0.9567,1.0,0.9556,0.9708,0.9533,0.9348,0.9433,0.011
lr,Logistic Regression,0.9478,0.9963,0.9444,0.9638,0.9444,0.9212,0.9304,1.404
knn,K Neighbors Classifier,0.9467,0.9926,0.9444,0.963,0.9432,0.9197,0.9291,0.055
lightgbm,Light Gradient Boosting Machine,0.9456,0.9852,0.9444,0.9625,0.9419,0.9182,0.9282,0.026
ada,Ada Boost Classifier,0.9256,0.9809,0.9222,0.9505,0.9194,0.8879,0.9026,0.038
gbc,Gradient Boosting Classifier,0.9256,0.9815,0.9222,0.9505,0.9194,0.8879,0.9026,0.07
et,Extra Trees Classifier,0.9256,0.9926,0.9222,0.9505,0.9194,0.8879,0.9026,0.188
dt,Decision Tree Classifier,0.9144,0.9369,0.9111,0.9366,0.9086,0.8712,0.8843,0.008


Note: The AUC metric is not available for Multiclass classification however the column will still be shown with zero values to maintain consistency between the Binary Classification and Multiclass Classification display grids.

## Model Tuning

In [10]:
tuned_best = tune_model(best)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.9,1.0,0.8889,0.925,0.8971,0.8485,0.8616
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,0.8889,1.0,0.8889,0.9167,0.8857,0.8333,0.8492
6,0.8889,1.0,0.8889,0.9167,0.8857,0.8333,0.8492
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0,1.0,1.0


The tune_model() function is a random grid search of hyperparameters over a pre-defined search space. By default, it is set to optimize Accuracy but this can be changed using the optimize parameter.

In [11]:
evaluate_model(tuned_best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Using predict_model can perform a check on the proportion of the test data that was set aside for evaluation. Now, using the final trained model stored in the tuned_best variable we will predict against the hold-out sample and evaluate the metrics to see if they are materially different than the CV results.

In [12]:
predict_model(tuned_best)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Linear Discriminant Analysis,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Label,Score
0,5.7,2.6,3.5,1.0,Iris-versicolor,Iris-versicolor,1.0
1,5.2,3.5,1.5,0.2,Iris-setosa,Iris-setosa,1.0
2,6.5,3.0,5.2,2.0,Iris-virginica,Iris-virginica,0.9967
3,4.8,3.4,1.9,0.2,Iris-setosa,Iris-setosa,1.0
4,7.0,3.2,4.7,1.4,Iris-versicolor,Iris-versicolor,0.9999
5,5.5,2.6,4.4,1.2,Iris-versicolor,Iris-versicolor,0.9998
6,4.9,3.0,1.4,0.2,Iris-setosa,Iris-setosa,1.0
7,4.6,3.6,1.0,0.2,Iris-setosa,Iris-setosa,1.0
8,5.1,3.8,1.6,0.2,Iris-setosa,Iris-setosa,1.0
9,5.8,2.7,3.9,1.2,Iris-versicolor,Iris-versicolor,1.0


The finalize_model() function fits the model onto the complete dataset including the test/hold-out sample (30% in this case). The purpose of this function is to train the model on the complete dataset before it is deployed in production.

In [15]:
final_mod = finalize_model(tuned_best)
print(final_mod)

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=0.01,
                           solver='eigen', store_covariance=False, tol=0.0001)


### Predict on unseen data

In [16]:
unseen_predictions = predict_model(final_mod, data=data_unseen)
unseen_predictions.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Label,Score
0,5.4,3.9,1.7,0.4,Iris-setosa,Iris-setosa,1.0
1,5.4,3.4,1.7,0.2,Iris-setosa,Iris-setosa,1.0
2,5.1,3.3,1.7,0.5,Iris-setosa,Iris-setosa,1.0
3,4.8,3.1,1.6,0.2,Iris-setosa,Iris-setosa,1.0
4,6.9,3.1,4.9,1.5,Iris-versicolor,Iris-versicolor,0.9942


The Label and Score columns are added onto the data_unseen set. Label is the prediction and score is the probability of the prediction. Notice that predicted results are concatenated to the original dataset while all the transformations are automatically performed in the background.