In [1]:
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import numpy as np
import cv2




In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

In [3]:
x_train.shape,x_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

In [4]:
classes=['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

In [5]:
# Normalization
x_train = x_train/255.0
x_test = x_test/255.0

In [6]:
#sklearn expects i/p to be 2d array-model.fit(x_train,y_train)=>reshape to 2d array
nsamples, nx, ny, nrgb = x_train.shape
x_train2 = x_train.reshape((nsamples,nx*ny*nrgb))
y_train=y_train.reshape(-1,)

In [7]:
#so,eventually,model.predict() should also be a 2d input
nsamples, nx, ny, nrgb = x_test.shape
x_test2 = x_test.reshape((nsamples,nx*ny*nrgb))

## Implementing Random Forest Classifier

In [8]:
from sklearn.ensemble import RandomForestClassifier

In [9]:
model=RandomForestClassifier()

In [10]:
model.fit(x_train2,y_train)

  model.fit(x_train2,y_train)


In [11]:
y_pred=model.predict(x_test2)
y_pred

array([3, 8, 8, ..., 3, 5, 7], dtype=uint8)

In [12]:
accuracy_score(y_pred,y_test)
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.57      0.56      0.56      1025
           1       0.56      0.52      0.54      1061
           2       0.34      0.38      0.36       886
           3       0.26      0.32      0.29       824
           4       0.40      0.41      0.40       979
           5       0.38      0.40      0.39       933
           6       0.57      0.47      0.52      1223
           7       0.46      0.51      0.49       904
           8       0.61      0.58      0.60      1046
           9       0.54      0.49      0.51      1119

    accuracy                           0.47     10000
   macro avg       0.47      0.46      0.47     10000
weighted avg       0.48      0.47      0.47     10000



In [13]:
confusion_matrix(y_pred,y_test)

array([[570,  26, 104,  46,  57,  38,  11,  42,  79,  52],
       [ 33, 556,  49,  39,  17,  29,  31,  42,  94, 171],
       [ 63,  20, 337,  71, 145,  85,  86,  49,  13,  17],
       [ 19,  30,  83, 260,  56, 175,  73,  58,  30,  40],
       [ 31,  19, 136,  73, 397,  75, 102, 102,  25,  19],
       [ 26,  32,  67, 201,  39, 377,  50,  91,  28,  22],
       [ 21,  38, 109, 148, 158,  91, 575,  45,  14,  24],
       [ 25,  41,  60,  68,  86,  70,  28, 464,  22,  40],
       [161,  63,  30,  27,  25,  29,  10,  20, 610,  71],
       [ 51, 175,  25,  67,  20,  31,  34,  87,  85, 544]], dtype=int64)

## Implementing a KNN

In [14]:
from sklearn.neighbors import KNeighborsClassifier

In [15]:
knn=KNeighborsClassifier(n_neighbors=7)

In [16]:
knn.fit(x_train2,y_train)

  return self._fit(X, y)


In [17]:
y_pred_knn=knn.predict(x_test2)
y_pred_knn

array([2, 8, 8, ..., 5, 6, 4], dtype=uint8)

In [18]:
accuracy_score(y_pred_knn,y_test)
print(classification_report(y_pred_knn,y_test))

              precision    recall  f1-score   support

           0       0.52      0.40      0.45      1299
           1       0.20      0.70      0.30       280
           2       0.44      0.22      0.29      2013
           3       0.20      0.28      0.23       707
           4       0.52      0.23      0.32      2222
           5       0.20      0.40      0.27       504
           6       0.27      0.35      0.30       761
           7       0.20      0.70      0.31       281
           8       0.69      0.39      0.50      1749
           9       0.14      0.73      0.23       184

    accuracy                           0.34     10000
   macro avg       0.34      0.44      0.32     10000
weighted avg       0.45      0.34      0.35     10000



In [19]:
confusion_matrix(y_pred_knn,y_test)

array([[518, 118, 110,  66,  60,  63,  22,  78, 112, 152],
       [  7, 195,   2,   5,   0,   2,   1,   8,  10,  50],
       [118, 124, 437, 243, 268, 225, 271, 185,  47,  95],
       [ 12,  51,  50, 199,  33, 156,  56,  57,  34,  59],
       [ 46, 173, 250, 214, 521, 201, 319, 294,  69, 135],
       [  5,  32,  24, 102,  14, 204,  32,  60,  12,  19],
       [ 30,  55,  65, 108,  44,  80, 265,  48,  14,  52],
       [  5,   8,   7,  11,  11,  10,   1, 196,   7,  25],
       [255, 223,  53,  51,  48,  53,  32,  68, 688, 278],
       [  4,  21,   2,   1,   1,   6,   1,   6,   7, 135]], dtype=int64)

## Implementing a Decision Tree

In [20]:
from sklearn.tree import DecisionTreeClassifier

In [21]:
dtc=DecisionTreeClassifier()

In [22]:
dtc.fit(x_train2,y_train)

In [23]:
y_pred_dtc=dtc.predict(x_test2)
y_pred_dtc

array([7, 1, 9, ..., 2, 2, 1], dtype=uint8)

In [24]:
accuracy_score(y_pred_dtc,y_test)
print(classification_report(y_pred_dtc,y_test))

              precision    recall  f1-score   support

           0       0.36      0.33      0.34      1077
           1       0.28      0.28      0.28       984
           2       0.21      0.20      0.21      1061
           3       0.18      0.19      0.18       992
           4       0.22      0.21      0.22      1047
           5       0.23      0.23      0.23       993
           6       0.28      0.29      0.29       951
           7       0.27      0.28      0.28       955
           8       0.37      0.38      0.37       962
           9       0.30      0.30      0.30       978

    accuracy                           0.27     10000
   macro avg       0.27      0.27      0.27     10000
weighted avg       0.27      0.27      0.27     10000



In [25]:
confusion_matrix(y_pred_dtc,y_test)

array([[357,  67,  98,  68,  71,  51,  48,  81, 153,  83],
       [ 81, 278,  53,  59,  44,  57,  44,  86, 114, 168],
       [ 83,  65, 215, 119, 161, 106, 122,  87,  41,  62],
       [ 50,  77,  91, 184, 100, 155, 126,  91,  53,  65],
       [ 54,  67, 155, 119, 222,  88, 140, 109,  48,  45],
       [ 55,  53, 119, 136,  89, 233, 108,  98,  45,  57],
       [ 32,  50, 102, 127, 119, 102, 279,  57,  32,  51],
       [ 54,  76,  79,  86, 106,  93,  71, 269,  37,  84],
       [160, 105,  38,  46,  34,  53,  21,  50, 366,  89],
       [ 74, 162,  50,  56,  54,  62,  41,  72, 111, 296]], dtype=int64)

## Implementing a Naive Bayes classifier

In [26]:
from sklearn.naive_bayes import GaussianNB

In [27]:
nb=GaussianNB()

In [28]:
nb.fit(x_train2,y_train)

  y = column_or_1d(y, warn=True)


In [29]:
y_pred_nb=nb.predict(x_test2)
y_pred_nb

array([6, 8, 8, ..., 6, 5, 4], dtype=uint8)

In [30]:
accuracy_score(y_pred_nb,y_test)
print(classification_report(y_pred_nb,y_test))

              precision    recall  f1-score   support

           0       0.49      0.27      0.35      1817
           1       0.17      0.41      0.24       405
           2       0.08      0.19      0.12       443
           3       0.08      0.25      0.12       305
           4       0.42      0.24      0.30      1737
           5       0.26      0.31      0.29       840
           6       0.47      0.25      0.33      1846
           7       0.13      0.42      0.20       310
           8       0.47      0.39      0.42      1219
           9       0.41      0.38      0.39      1078

    accuracy                           0.30     10000
   macro avg       0.30      0.31      0.28     10000
weighted avg       0.39      0.30      0.32     10000



In [31]:
confusion_matrix(y_pred_nb,y_test)

array([[494, 141, 225, 163,  86, 156, 106, 134, 168, 144],
       [ 20, 166,  24,  36,   8,  17,   2,  24,  41,  67],
       [ 39,  24,  83,  54,  57,  55,  60,  36,  18,  17],
       [ 10,  31,  15,  76,  26,  51,  18,  41,  17,  20],
       [ 84,  66, 292, 151, 417, 167, 228, 228,  56,  48],
       [ 34,  72,  48, 129,  38, 264,  46,  94,  83,  32],
       [ 50, 192, 209, 262, 265, 159, 467, 102,  39, 101],
       [  9,  19,  21,  26,  22,  36,  15, 131,   8,  23],
       [200, 121,  54,  34,  50,  57,  19,  72, 471, 141],
       [ 60, 168,  29,  69,  31,  38,  39, 138,  99, 407]], dtype=int64)

## Implementing a Logistic Regression classifier

In [32]:
from sklearn.linear_model import LogisticRegression

In [33]:
lr=LogisticRegression()

In [34]:
lr.fit(x_train2,y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [36]:
y_pred_lr=lr.predict(x_test2)

In [37]:
accuracy_score(y_pred_nb,y_test)
print(classification_report(y_pred_lr,y_test))

              precision    recall  f1-score   support

           0       0.47      0.43      0.45      1094
           1       0.49      0.47      0.48      1046
           2       0.28      0.32      0.30       863
           3       0.25      0.31      0.28       814
           4       0.29      0.36      0.33       810
           5       0.37      0.34      0.35      1084
           6       0.49      0.42      0.45      1158
           7       0.44      0.46      0.45       957
           8       0.52      0.47      0.49      1114
           9       0.46      0.43      0.44      1060

    accuracy                           0.41     10000
   macro avg       0.41      0.40      0.40     10000
weighted avg       0.42      0.41      0.41     10000



In [38]:
confusion_matrix(y_pred_lr,y_test)

array([[474,  57, 109,  39,  55,  41,  15,  46, 178,  80],
       [ 42, 492,  47,  59,  28,  45,  32,  44,  67, 190],
       [ 59,  21, 275,  97, 135,  91,  71,  73,  20,  21],
       [ 36,  30,  87, 252,  51, 144, 116,  54,  24,  20],
       [ 20,  22, 118,  55, 295,  81, 101,  92,   9,  17],
       [ 32,  42,  86, 206,  98, 369,  87,  84,  50,  30],
       [ 26,  37, 138, 124, 159,  86, 490,  49,   8,  41],
       [ 54,  52,  67,  48, 122,  70,  39, 438,  19,  48],
       [186,  80,  49,  47,  32,  40,  22,  41, 521,  96],
       [ 71, 167,  24,  73,  25,  33,  27,  79, 104, 457]], dtype=int64)

#  Testing on random image

In [39]:
img_path='bird.jpeg'
img_arr=cv2.imread(img_path)
img_arr=cv2.resize(img_arr,(32,32))

In [40]:
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))

In [41]:
ans=model.predict(img_arr2)
print(classes[ans[0]])
#RandomForestClassifier

bird
