<a href="https://colab.research.google.com/github/ali-hamad-bakar/1st-pro/blob/main/assignmet_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Normalize image data
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Define the CNN model
model = keras.Sequential(
    [
        layers.Conv2D(32, (3, 3), activation="relu", input_shape=(32, 32, 3)),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ]
)

# Compile the model
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

# Train the model
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_split=0.1)

# Evaluate the model on the test set
_, test_acc = model.evaluate(x_test, y_test)
print("Test accuracy:", test_acc)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.6995000243186951


In [2]:
# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Print the shape of the data and labels
print("Train data shape:", x_train.shape)
print("Train labels shape:", y_train.shape)
print("Test data shape:", x_test.shape)
print("Test labels shape:", y_test.shape)

Train data shape: (50000, 32, 32, 3)
Train labels shape: (50000, 1)
Test data shape: (10000, 32, 32, 3)
Test labels shape: (10000, 1)


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# Load CIFAR-10 dataset
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Normalize image data
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Split training set into training and validation sets
validation_split = 0.1
num_samples = len(x_train_full)
num_validation_samples = int(num_samples * validation_split)

x_train = x_train_full[:-num_validation_samples]
y_train = y_train_full[:-num_validation_samples]
x_val = x_train_full[-num_validation_samples:]
y_val = y_train_full[-num_validation_samples:]

# Print the shape of the data and labels
print("Train data shape:", x_train.shape)
print("Train labels shape:", y_train.shape)
print("Validation data shape:", x_val.shape)
print("Validation labels shape:", y_val.shape)
print("Test data shape:", x_test.shape)
print("Test labels shape:", y_test.shape)

Train data shape: (45000, 32, 32, 3)
Train labels shape: (45000, 1)
Validation data shape: (5000, 32, 32, 3)
Validation labels shape: (5000, 1)
Test data shape: (10000, 32, 32, 3)
Test labels shape: (10000, 1)


In [4]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# Load CIFAR-10 dataset
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Normalize image data
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0





In [5]:
# (a) No validation set
x_train_a = x_train_full
y_train_a = y_train_full

In [6]:
# (b) 1 validation set with 20% of 80%
validation_split_b = 0.2
num_samples_b = len(x_train_full)
num_validation_samples_b = int(num_samples_b * validation_split_b)

x_train_b = x_train_full[:-num_validation_samples_b]
y_train_b = y_train_full[:-num_validation_samples_b]
x_val_b = x_train_full[-num_validation_samples_b:]
y_val_b = y_train_full[-num_validation_samples_b:]

In [7]:
# (c) 3-fold cross-validation set
k_fold_c = 3
num_samples_c = len(x_train_full)
fold_size_c = int(num_samples_c / k_fold_c)

x_train_c = []
y_train_c = []
x_val_c = []
y_val_c = []

for fold in range(k_fold_c):
    start = fold * fold_size_c
    end = (fold + 1) * fold_size_c

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)
    x_train_c.append(x_train_fold)
    y_train_c.append(y_train_fold)
    x_val_c.append(x_val_fold)
    y_val_c.append(y_val_fold)


In [8]:
# (d) 5-fold cross-validation set
k_fold_d = 5
num_samples_d = len(x_train_full)
fold_size_d = int(num_samples_d / k_fold_d)

x_train_d = []
y_train_d = []
x_val_d = []
y_val_d = []

for fold in range(k_fold_d):
    start = fold * fold_size_d
    end = (fold + 1) * fold_size_d

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)
    x_train_d.append(x_train_fold)
    y_train_d.append(y_train_fold)
    x_val_d.append(x_val_fold)
    y_val_d.append(y_val_fold)

In [9]:
# (e) 10-fold cross-validation set
k_fold_e = 10
num_samples_e = len(x_train_full)
fold_size_e = int(num_samples_e / k_fold_e)

x_train_e = []
y_train_e = []
x_val_e = []
y_val_e = []

for fold in range(k_fold_e):
    start = fold * fold_size_e
    end = (fold + 1) * fold_size_e

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)
    x_train_e.append(x_train_fold)
    y_train_e.append(y_train_fold)
    x_val_e.append(x_val_fold)
    y_val_e.append(y_val_fold)

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# Load CIFAR-10 dataset
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Normalize image data
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# (a) No validation set
x_train_a = x_train_full
y_train_a = y_train_full

# (b) 1 validation set with 20% of 80%
validation_split_b = 0.2
num_samples_b = len(x_train_full)
num_validation_samples_b = int(num_samples_b * validation_split_b)

x_train_b = x_train_full[:-num_validation_samples_b]
y_train_b = y_train_full[:-num_validation_samples_b]
x_val_b = x_train_full[-num_validation_samples_b:]
y_val_b = y_train_full[-num_validation_samples_b:]

# (c) 3-fold cross-validation set
k_fold_c = 3
num_samples_c = len(x_train_full)
fold_size_c = int(num_samples_c / k_fold_c)

x_train_c = []
y_train_c = []
x_val_c = []
y_val_c = []

for fold in range(k_fold_c):
    start = fold * fold_size_c
    end = (fold + 1) * fold_size_c

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)

    x_train_c.append(x_train_fold)
    y_train_c.append(y_train_fold)
    x_val_c.append(x_val_fold)
    y_val_c.append(y_val_fold)


#1


In [11]:
# (d) 5-fold cross-validation set
k_fold_d = 5
num_samples_d = len(x_train_full)
fold_size_d = int(num_samples_d / k_fold_d)

x_train_d = []
y_train_d = []
x_val_d = []
y_val_d = []

for fold in range(k_fold_d):
    start = fold * fold_size_d
    end = (fold + 1) * fold_size_d

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)

    x_train_d.append(x_train_fold)
    y_train_d.append(y_train_fold)
    x_val_d.append(x_val_fold)
    y_val_d.append(y_val_fold)

# (e) 10-fold cross-validation set
k_fold_e = 10
num_samples_e = len(x_train_full)
fold_size_e = int(num_samples_e / k_fold_e)

x_train_e = []
y_train_e = []
x_val_e = []
y_val_e = []

for fold in range(k_fold_e):
    start = fold * fold_size_e
    end = (fold + 1) * fold_size_e

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)

    x_train_e.append(x_train_fold)
    y_train_e.append(y_train_fold)
    x_val_e.append(x_val_fold)
    y_val_e.append(y_val_fold)
#2

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# Load CIFAR-10 dataset
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Normalize image data
x_train_full = x_train_full.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# (a) No validation set
x_train_a = x_train_full
y_train_a = y_train_full

# (b) 1 validation set with 20% of 80%
validation_split_b = 0.2
num_samples_b = len(x_train_full)
num_validation_samples_b = int(num_samples_b * validation_split_b)

x_train_b = x_train_full[:-num_validation_samples_b]
y_train_b = y_train_full[:-num_validation_samples_b]
x_val_b = x_train_full[-num_validation_samples_b:]
y_val_b = y_train_full[-num_validation_samples_b:]

# (c) 3-fold cross-validation set
k_fold_c = 3
num_samples_c = len(x_train_full)
fold_size_c = int(num_samples_c / k_fold_c)

x_train_c = []
y_train_c = []
x_val_c = []
y_val_c = []

for fold in range(k_fold_c):
    start = fold * fold_size_c
    end = (fold + 1) * fold_size_c

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)

    x_train_c.append(x_train_fold)
    y_train_c.append(y_train_fold)
    x_val_c.append(x_val_fold)
    y_val_c.append(y_val_fold)

# (d) 5-fold cross-validation set
k_fold_d = 5
num_samples_d = len(x_train_full)
fold_size_d = int(num_samples_d / k_fold_d)

x_train_d = []
y_train_d = []
x_val_d = []
y_val_d = []

for fold in range(k_fold_d):
    start = fold * fold_size_d
    end = (fold + 1) * fold_size_d

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)

    x_train_d.append(x_train_fold)
    y_train_d.append(y_train_fold)
    x_val_d.append(x_val_fold)
    y_val_d.append(y_val_fold)

# (e) 10-fold cross-validation set
k_fold_e = 10
num_samples_e = len(x_train_full)
fold_size_e = int(num_samples_e / k_fold_e)

x_train_e = []
y_train_e = []
x_val_e = []
y_val_e = []

for fold in range(k_fold_e):
    start = fold * fold_size_e
    end = (fold + 1) * fold_size_e

    x_val_fold = x_train_full[start:end]
    y_val_fold = y_train_full[start:end]

    x_train_fold = np.concatenate([x_train_full[:start], x_train_full[end:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:start], y_train_full[end:]], axis=0)

    x_train_e.append(x_train_fold)
    y_train_e.append(y_train_fold)
    x_val_e.append(x_val_fold)
    y_val_e.append(y_val_fold)

# (f) Leave-one-out cross-validation set
k_fold_f = num_samples_e
x_train_f = []
y_train_f = []
x_val_f = []
y_val_f = []

for fold in range(k_fold_f):
    x_val_fold = x_train_full[fold]
    y_val_fold = y_train_full[fold]

    x_train_fold = np.concatenate([x_train_full[:fold], x_train_full[fold + 1:]], axis=0)
    y_train_fold = np.concatenate([y_train_full[:fold], y_train_full[fold + 1:]], axis=0)

    x_train_f.append(x_train_fold)
    y_train_f.append(y_train_fold)
    x_val_f.append(x_val_fold)
    y_val_f.append(y_val_fold)


# Print the shape of the data and labels for each scenario
print("(a) No validation set")
print("Sorry, but it seems that")
print("Train data shape:", x_train_a.shape)
print("Train labels shape:", y_train_a.shape)

print("(b) 1 validation set with 20% of 80%")
print("Train data shape:", x_train_b.shape)
print("Train labels shape:", y_train_b.shape)
print("Validation data shape:", x_val_b.shape)
print("Validation labels shape:", y_val_b.shape)

print("(c) 3-fold cross-validation set")
for fold in range(k_fold_c):
    print(f"Fold {fold+1}")
    print("Train data shape:", x_train_c[fold].shape)
    print("Train labels shape:", y_train_c[fold].shape)
    print("Validation data shape:", x_val_c[fold].shape)
    print("Validation labels shape:", y_val_c[fold].shape)

print("(d) 5-fold cross-validation set")
for fold in range(k_fold_d):
    print(f"Fold {fold+1}")
    print("Train data shape:", x_train_d[fold].shape)
    print("Train labels shape:", y_train_d[fold].shape)
    print("Validation data shape:", x_val_d[fold].shape)
    print("Validation labels shape:", y_val_d[fold].shape)

print("(e) 10-fold cross-validation set")
for fold in range(k_fold_e):
    print(f"Fold {fold+1}")
    print("Train data shape:", x_train_e[fold].shape)
    print("Train labels shape:", y_train_e[fold].shape)
    print("Validation data shape:", x_val_e[fold].shape)
    print("Validation labels shape:", y_val_e[fold].shape)

print("(f) Leave-one-out cross-validation set")
for fold in range(k_fold_f):
    print(f"Fold {fold+1}")
    print("Train data shape:", x_train_f[fold].shape)
    print("Train labels shape:", y_train_f[fold].shape)
    print("Validation data shape:", x_val_f[fold].shape)
    print("Validation labels shape:", y_val_f[fold].shape)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
rf_predictions = rf_classifier.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)

# k-Nearest Neighbors (k-NN)
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)
knn_predictions = knn_classifier.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_predictions)
print("k-NN Accuracy:", knn_accuracy)

# Naive Bayes
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
nb_predictions = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)

# Decision Tree
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
dt_predictions = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)

Random Forest Accuracy: 0.9722222222222222
k-NN Accuracy: 0.9861111111111112
Naive Bayes Accuracy: 0.8472222222222222
Decision Tree Accuracy: 0.8416666666666667


In [3]:
from sklearn.metrics import classification_report

# Evaluate Random Forest
rf_predictions = rf_classifier.predict(X_test)
rf_report = classification_report(y_test, rf_predictions)
print("Random Forest Metrics:")
print(rf_report)

# Evaluate k-Nearest Neighbors (k-NN)
knn_predictions = knn_classifier.predict(X_test)
knn_report = classification_report(y_test, knn_predictions)
print("k-NN Metrics:")
print(knn_report)

# Evaluate Naive Bayes
nb_predictions = nb_classifier.predict(X_test)
nb_report = classification_report(y_test, nb_predictions)
print("Naive Bayes Metrics:")
print(nb_report)

# Evaluate Decision Tree
dt_predictions = dt_classifier.predict(X_test)
dt_report = classification_report(y_test, dt_predictions)
print("Decision Tree Metrics:")
print(dt_report)

Random Forest Metrics:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        33
           1       0.97      1.00      0.98        28
           2       1.00      1.00      1.00        33
           3       1.00      0.94      0.97        34
           4       0.98      1.00      0.99        46
           5       0.94      0.96      0.95        47
           6       0.97      0.97      0.97        35
           7       0.97      0.97      0.97        34
           8       0.97      0.97      0.97        30
           9       0.95      0.95      0.95        40

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360

k-NN Metrics:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.

In [4]:
from sklearn.metrics import confusion_matrix

# Confusion matrix for Random Forest
rf_cm = confusion_matrix(y_test, rf_predictions)
print("Random Forest Confusion Matrix:")
print(rf_cm)

# Confusion matrix for k-Nearest Neighbors (k-NN)
knn_cm = confusion_matrix(y_test, knn_predictions)
print("k-NN Confusion Matrix:")
print(knn_cm)

# Confusion matrix for Naive Bayes
nb_cm = confusion_matrix(y_test, nb_predictions)
print("Naive Bayes Confusion Matrix:")
print(nb_cm)

# Confusion matrix for Decision Tree
dt_cm = confusion_matrix(y_test, dt_predictions)
print("Decision Tree Confusion Matrix:")
print(dt_cm)

Random Forest Confusion Matrix:
[[32  0  0  0  1  0  0  0  0  0]
 [ 0 28  0  0  0  0  0  0  0  0]
 [ 0  0 33  0  0  0  0  0  0  0]
 [ 0  0  0 32  0  1  0  0  1  0]
 [ 0  0  0  0 46  0  0  0  0  0]
 [ 0  0  0  0  0 45  1  0  0  1]
 [ 0  0  0  0  0  1 34  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  1]
 [ 0  1  0  0  0  0  0  0 29  0]
 [ 0  0  0  0  0  1  0  1  0 38]]
k-NN Confusion Matrix:
[[33  0  0  0  0  0  0  0  0  0]
 [ 0 28  0  0  0  0  0  0  0  0]
 [ 0  0 33  0  0  0  0  0  0  0]
 [ 0  0  0 34  0  0  0  0  0  0]
 [ 0  0  0  0 46  0  0  0  0  0]
 [ 0  0  0  0  0 45  1  0  0  1]
 [ 0  0  0  0  0  0 35  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  1]
 [ 0  0  0  0  0  0  0  0 30  0]
 [ 0  0  0  0  1  1  0  0  0 38]]
Naive Bayes Confusion Matrix:
[[31  0  0  0  0  1  0  1  0  0]
 [ 0 24  0  0  0  0  0  0  3  1]
 [ 0  2 20  0  0  0  1  0 10  0]
 [ 0  0  1 29  0  1  0  0  3  0]
 [ 0  0  0  0 38  0  1  7  0  0]
 [ 0  0  0  1  0 44  1  1  0  0]
 [ 0  0  0  0  1  0 34  0  0  0]
 [ 0  0  0  0  0  1  0

In [5]:
import numpy as np

# Define class labels
class_labels = np.unique(y_test)

# Analyze performance for each class
for class_label in class_labels:
    # Random Forest
    rf_class_predictions = rf_predictions[y_test == class_label]
    rf_class_true = y_test[y_test == class_label]
    rf_class_accuracy = accuracy_score(rf_class_true, rf_class_predictions)
    rf_class_report = classification_report(rf_class_true, rf_class_predictions)
    print("Random Forest - Class:", class_label)
    print("Accuracy:", rf_class_accuracy)
    print(rf_class_report)
    print("----------------------")

    # k-Nearest Neighbors (k-NN)
    knn_class_predictions = knn_predictions[y_test == class_label]
    knn_class_true = y_test[y_test == class_label]
    knn_class_accuracy = accuracy_score(knn_class_true, knn_class_predictions)
    knn_class_report = classification_report(knn_class_true, knn_class_predictions)
    print("k-NN - Class:", class_label)
    print("Accuracy:", knn_class_accuracy)
    print(knn_class_report)
    print("----------------------")

    # Naive Bayes
    nb_class_predictions = nb_predictions[y_test == class_label]
    nb_class_true = y_test[y_test == class_label]
    nb_class_accuracy = accuracy_score(nb_class_true, nb_class_predictions)
    nb_class_report = classification_report(nb_class_true, nb_class_predictions)
    print("Naive Bayes - Class:", class_label)
    print("Accuracy:", nb_class_accuracy)
    print(nb_class_report)
    print("----------------------")

    # Decision Tree
    dt_class_predictions = dt_predictions[y_test == class_label]
    dt_class_true = y_test[y_test == class_label]
    dt_class_accuracy = accuracy_score(dt_class_true, dt_class_predictions)
    dt_class_report = classification_report(dt_class_true, dt_class_predictions)
    print("Decision Tree - Class:", class_label)
    print("Accuracy:", dt_class_accuracy)
    print(dt_class_report)
    print("----------------------")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Random Forest - Class: 0
Accuracy: 0.9696969696969697
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        33
           4       0.00      0.00      0.00         0

    accuracy                           0.97        33
   macro avg       0.50      0.48      0.49        33
weighted avg       1.00      0.97      0.98        33

----------------------
k-NN - Class: 0
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33

    accuracy                           1.00        33
   macro avg       1.00      1.00      1.00        33
weighted avg       1.00      1.00      1.00        33

----------------------
Naive Bayes - Class: 0
Accuracy: 0.9393939393939394
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        33
           5       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Naive Bayes - Class: 1
Accuracy: 0.8571428571428571
              precision    recall  f1-score   support

           1       1.00      0.86      0.92        28
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

    accuracy                           0.86        28
   macro avg       0.33      0.29      0.31        28
weighted avg       1.00      0.86      0.92        28

----------------------
Decision Tree - Class: 1
Accuracy: 0.7857142857142857
              precision    recall  f1-score   support

           1       1.00      0.79      0.88        28
           2       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

    accuracy                           0.79        28
   macro avg       0.17      0.13      0.15        28
we

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9411764705882353
              precision    recall  f1-score   support

           3       1.00      0.94      0.97        34
           5       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0

    accuracy                           0.94        34
   macro avg       0.33      0.31      0.32        34
weighted avg       1.00      0.94      0.97        34

----------------------
k-NN - Class: 3
Accuracy: 1.0
              precision    recall  f1-score   support

           3       1.00      1.00      1.00        34

    accuracy                           1.00        34
   macro avg       1.00      1.00      1.00        34
weighted avg       1.00      1.00      1.00        34

----------------------
Naive Bayes - Class: 3
Accuracy: 0.8529411764705882
              precision    recall  f1-score   support

           2       0.00      0.00      0.00         0
           3       1.00      0.85      0.92        34
           5       0.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


k-NN - Class: 5
Accuracy: 0.9574468085106383
              precision    recall  f1-score   support

           5       1.00      0.96      0.98        47
           6       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

    accuracy                           0.96        47
   macro avg       0.33      0.32      0.33        47
weighted avg       1.00      0.96      0.98        47

----------------------
Naive Bayes - Class: 5
Accuracy: 0.9361702127659575
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         0
           5       1.00      0.94      0.97        47
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0

    accuracy                           0.94        47
   macro avg       0.25      0.23      0.24        47
weighted avg       1.00      0.94      0.97        47

----------------------
Decision Tree - Class: 5
Accuracy: 0.8510

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Random Forest - Class: 6
Accuracy: 0.9714285714285714
              precision    recall  f1-score   support

           5       0.00      0.00      0.00         0
           6       1.00      0.97      0.99        35

    accuracy                           0.97        35
   macro avg       0.50      0.49      0.49        35
weighted avg       1.00      0.97      0.99        35

----------------------
k-NN - Class: 6
Accuracy: 1.0
              precision    recall  f1-score   support

           6       1.00      1.00      1.00        35

    accuracy                           1.00        35
   macro avg       1.00      1.00      1.00        35
weighted avg       1.00      1.00      1.00        35

----------------------
Naive Bayes - Class: 6
Accuracy: 0.9714285714285714
              precision    recall  f1-score   support

           4       0.00      0.00      0.00         0
           6       1.00      0.97      0.99        35

    accuracy                           0.97        35


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Random Forest - Class: 7
Accuracy: 0.9705882352941176
              precision    recall  f1-score   support

           7       1.00      0.97      0.99        34
           9       0.00      0.00      0.00         0

    accuracy                           0.97        34
   macro avg       0.50      0.49      0.49        34
weighted avg       1.00      0.97      0.99        34

----------------------
k-NN - Class: 7
Accuracy: 0.9705882352941176
              precision    recall  f1-score   support

           7       1.00      0.97      0.99        34
           9       0.00      0.00      0.00         0

    accuracy                           0.97        34
   macro avg       0.50      0.49      0.49        34
weighted avg       1.00      0.97      0.99        34

----------------------
Naive Bayes - Class: 7
Accuracy: 0.9705882352941176
              precision    recall  f1-score   support

           5       0.00      0.00      0.00         0
           7       1.00      0.97      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Naive Bayes - Class: 8
Accuracy: 0.8666666666666667
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       1.00      0.87      0.93        30

    accuracy                           0.87        30
   macro avg       0.33      0.29      0.31        30
weighted avg       1.00      0.87      0.93        30

----------------------
Decision Tree - Class: 8
Accuracy: 0.7
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           8       1.00      0.70      0.82        30
           9       0.00      0.00      0.00         0

    accuracy                           0.70        30
   macro avg       0.20      0.14      0.16        30
weighted avg       1.00      0.70      0.82        30

----------------

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from keras.datasets import cifar10

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Convert color images to grayscale
x_train_gray = np.array([cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) for img in x_train])
x_test_gray = np.array([cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) for img in x_test])

# Resize grayscale images to match the dimensions of color images
x_train_gray = np.resize(x_train_gray, (x_train_gray.shape[0], 32, 32))
x_test_gray = np.resize(x_test_gray, (x_test_gray.shape[0], 32, 32))

# Split grayscale dataset into training and testing sets
x_train_gray, x_val_gray, y_train, y_val = train_test_split(x_train_gray, y_train, test_size=0.2, random_state=42)

# Reshape grayscale images to flatten them
x_train_gray = x_train_gray.reshape(x_train_gray.shape[0], -1)
x_val_gray = x_val_gray.reshape(x_val_gray.shape[0], -1)
x_test_gray = x_test_gray.reshape(x_test_gray.shape[0], -1)

# Train Random Forest model on grayscale images
rf_model_gray = RandomForestClassifier()
rf_model_gray.fit(x_train_gray, y_train)
rf_predictions_gray = rf_model_gray.predict(x_val_gray)

# Train k-Nearest Neighbors model on grayscale images
knn_model_gray = KNeighborsClassifier()
knn_model_gray.fit(x_train_gray, y_train)
knn_predictions_gray = knn_model_gray.predict(x_val_gray)

# Train Naive Bayes model on grayscale images
nb_model_gray = GaussianNB()
nb_model_gray.fit(x_train_gray, y_train)
nb_predictions_gray = nb_model_gray.predict(x_val_gray)

# Train Decision Tree model on grayscale images
dt_model_gray = DecisionTreeClassifier()
dt_model_gray.fit(x_train_gray, y_train)
dt_predictions_gray = dt_model_gray.predict(x_val_gray)

# Evaluate performance on grayscale images
print("Grayscale Image Results:")
print("Random Forest Accuracy:", accuracy_score(y_val, rf_predictions_gray))
print("Random Forest Classification Report:")
print(classification_report(y_val, rf_predictions_gray))
print("-----------------------------------------")
print("k-Nearest Neighbors Accuracy:", accuracy_score(y_val, knn_predictions_gray))
print("k-Nearest Neighbors Classification Report:")
print(classification_report(y_val, knn_predictions_gray))
print("-----------------------------------------")
print("Naive Bayes Accuracy:", accuracy_score(y_val, nb_predictions_gray))
print("Naive Bayes Classification Report:")
print(classification_report(y_val, nb_predictions_gray))
print("-----------------------------------------")
print("Decision Tree Accuracy:", accuracy_score(y_val, dt_predictions_gray))
print("Decision Tree Classification Report:")
print(classification_report(y_val, dt_predictions_gray))
print("-----------------------------------------")

# Train Random Forest model on color images
rf_model_color = RandomForestClassifier()
rf_model_color.fit(x_train.reshape(x_train.shape[0], -1), y_train)
rf_predictions_color = rf_model_color.predict(x_val.reshape(x_val.shape[0], -1))

# Train k-Nearest Neighbors model on color images
knn_model_color = KNeighborsClassifier()
knn_model_color.fit(x_train.reshape(x_train.shape[0], -1), y_train)
knn_predictions_color = knn_model_color.predict(x_val.reshape(x_val.shape[0], -1))

# Train Naive Bayes model on color images
nb_model_color = GaussianNB()
nb_model_color.fit(x_train.reshape(x_train.shape[0], -1), y_train)
nb_predictions_color = nb_model_color.predict(x_val.reshape(x_val.shape[0], -1))

# Train Decision Tree model on color images
dt_model_color = DecisionTreeClassifier()
dt_model_color.fit(x_train.reshape(x_train.shape[0], -1), y_train)
dt_predictions_color = dt_model_color.predict(x_val.reshape(x_val.shape[0], -1))

# Evaluate performance on color images
print("Color Image Results:")
print("Random Forest Accuracy:", accuracy_score(y_val, rf_predictions_color))
print("Random Forest Classification Report:")
print(classification_report(y_val, rf_predictions_color))
print("-----------------------------------------")
print("k-Nearest Neighbors Accuracy:", accuracy_score(y_val, knn_predictions_color))
print("k-Nearest Neighbors Classification Report:")
print(classification_report(y_val, knn_predictions_color))
print("-----------------------------------------")
print("Naive Bayes Accuracy:", accuracy_score(y_val, nb_predictions_color))
print("Naive Bayes Classification Report:")
print(classification_report(y_val, nb_predictions_color))
print("-----------------------------------------")
print("Decision Tree Accuracy:", accuracy_score(y_val, dt_predictions_color))
print("Decision Tree Classification Report:")
print(classification_report(y_val, dt_predictions_color))
print("-----------------------------------------")

  rf_model_gray.fit(x_train_gray, y_train)
  return self._fit(X, y)
  y = column_or_1d(y, warn=True)


Grayscale Image Results:
Random Forest Accuracy: 0.407
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.44      0.41      0.42       973
           1       0.43      0.49      0.46       979
           2       0.34      0.30      0.32      1030
           3       0.32      0.23      0.27      1023
           4       0.32      0.39      0.35       933
           5       0.38      0.34      0.36      1015
           6       0.41      0.42      0.41       996
           7       0.46      0.43      0.45       994
           8       0.46      0.52      0.49      1017
           9       0.48      0.54      0.51      1040

    accuracy                           0.41     10000
   macro avg       0.40      0.41      0.40     10000
weighted avg       0.40      0.41      0.40     10000

-----------------------------------------
k-Nearest Neighbors Accuracy: 0.2769
k-Nearest Neighbors Classification Report:
              precision    

ValueError: Found input variables with inconsistent numbers of samples: [50000, 40000]