In [1]:
import pandas as pd
import numpy as np

In [3]:
from sklearn.datasets import make_classification

X,y = make_classification(n_samples=1000, n_classes=3, n_informative=3, n_features=10, random_state=42)

In [None]:
df = pd.DataFrame(X, columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9','f10'])
df['target'] = y
df['target'].unique()
df.to_csv('random_multiclass_classification_data.csv')

In [9]:
# train test split 
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.30, random_state=42)

In [10]:
# Model training
from sklearn.linear_model import LogisticRegression

logistic_model = LogisticRegression(multi_class='ovr')
logistic_model.fit(X_train,y_train)



In [11]:
# Model testing
y_pred = logistic_model.predict(X_test)
print(y_pred)

[2 0 1 2 0 1 2 0 0 2 2 2 2 2 2 2 2 1 0 0 0 0 2 0 1 0 2 1 0 2 2 0 1 0 0 2 2
 2 1 0 0 1 2 0 1 2 1 0 1 1 2 0 1 0 2 2 2 2 1 2 0 2 2 2 1 1 0 1 0 1 0 2 2 0
 0 0 2 1 1 2 2 0 2 1 0 1 1 2 1 1 2 2 1 2 2 2 1 0 0 0 0 2 1 0 2 1 1 0 0 2 0
 1 2 0 0 0 1 1 2 2 1 0 0 1 2 0 0 1 0 2 1 0 2 2 2 0 0 1 2 1 2 1 2 1 2 2 2 0
 0 1 2 0 0 2 1 0 0 0 2 0 0 0 2 2 2 2 2 1 2 1 0 2 1 0 0 2 0 1 1 2 2 0 0 2 0
 0 0 2 0 0 0 0 2 1 2 2 1 2 0 1 2 2 1 1 0 2 1 2 2 2 2 2 0 0 0 0 0 0 1 0 1 2
 2 2 0 0 0 0 0 2 2 0 0 2 2 1 0 1 2 0 0 2 0 2 0 0 2 2 0 2 2 0 1 2 2 0 1 1 0
 1 2 0 2 2 0 0 0 2 1 2 0 2 2 2 0 2 0 2 2 0 0 1 0 2 2 2 0 2 2 0 2 2 0 2 0 0
 2 2 0 1]


In [12]:
#gives you the predicted probabilities for each class, instead of just the final predicted label.
logistic_model.predict_proba(X_test)

array([[9.22522618e-03, 2.44034837e-01, 7.46739937e-01],
       [4.89544732e-01, 2.45683892e-01, 2.64771376e-01],
       [3.54058728e-01, 5.90982894e-01, 5.49583777e-02],
       [3.61503652e-02, 1.82525799e-01, 7.81323836e-01],
       [4.01755652e-01, 3.69771801e-01, 2.28472547e-01],
       [3.83231093e-01, 5.21315790e-01, 9.54531172e-02],
       [2.80650254e-01, 2.18835528e-01, 5.00514218e-01],
       [6.86748730e-01, 3.10472372e-01, 2.77889720e-03],
       [5.72702266e-01, 4.24212176e-01, 3.08555794e-03],
       [2.69546511e-02, 1.49732219e-01, 8.23313130e-01],
       [7.81732426e-02, 2.40515273e-01, 6.81311484e-01],
       [1.25869742e-02, 1.72900197e-01, 8.14512828e-01],
       [4.99079983e-02, 1.76510922e-01, 7.73581080e-01],
       [3.95794074e-02, 3.56708621e-01, 6.03711972e-01],
       [2.01209577e-02, 2.98735180e-01, 6.81143862e-01],
       [7.35993162e-03, 2.18049789e-01, 7.74590279e-01],
       [2.71868855e-02, 2.81421225e-01, 6.91391890e-01],
       [2.36865580e-01, 4.86991

In [13]:
# Performance matrices 
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

acc_score = accuracy_score(y_test,y_pred)
con_matrix = confusion_matrix(y_test,y_pred)
report = classification_report(y_test,y_pred)
print(f"Accuracy of the model : {acc_score}")
print(f"Classification report : \n{report}")
print(f"Confusion matrix is given by : \n{con_matrix}")

Accuracy of the model : 0.68
Classification report : 
              precision    recall  f1-score   support

           0       0.70      0.77      0.73       103
           1       0.58      0.40      0.47        96
           2       0.71      0.86      0.78       101

    accuracy                           0.68       300
   macro avg       0.67      0.67      0.66       300
weighted avg       0.67      0.68      0.66       300

Confusion matrix is given by : 
[[79 16  8]
 [31 38 27]
 [ 3 11 87]]
