In [2]:
import numpy as np
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Define evaluate_model function
def evaluate_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    return accuracy, report, confusion

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Generate a balanced training dataset with 10 classes and 10,000 total images
balanced_x_train = []
balanced_y_train = []
num_images_per_class = 1000

for class_label in range(10):
    indices = np.where(y_train == class_label)[0]
    np.random.shuffle(indices)
    selected_indices = indices[:num_images_per_class]
    balanced_x_train.extend(x_train[selected_indices])
    balanced_y_train.extend(y_train[selected_indices])

balanced_x_train = np.array(balanced_x_train)
balanced_y_train = np.array(balanced_y_train)

# Flatten images and scale pixel values to the range [0, 1]
balanced_x_train_flat = balanced_x_train.reshape(balanced_x_train.shape[0], -1) / 255.0
x_test_flat = x_test.reshape(x_test.shape[0], -1) / 255.0

# Convert images to grayscale
balanced_x_train_gray = np.mean(balanced_x_train, axis=3, keepdims=True) / 255.0
x_test_gray = np.mean(x_test, axis=3, keepdims=True) / 255.0

# Initialize classifiers
decision_tree_clf = DecisionTreeClassifier()
random_forest_clf = RandomForestClassifier()
logistic_regression_clf = LogisticRegression(max_iter=1000)
svm_clf = SVC()

# Train and evaluate models for color images
print("Color Images:")
print("==========================")
# Train Decision Tree
decision_tree_clf.fit(balanced_x_train_flat, balanced_y_train)
# Evaluate Decision Tree
dt_accuracy, dt_report, dt_confusion = evaluate_model(decision_tree_clf, x_test_flat, y_test)

# Train Random Forest
random_forest_clf.fit(balanced_x_train_flat, balanced_y_train)
# Evaluate Random Forest
rf_accuracy, rf_report, rf_confusion = evaluate_model(random_forest_clf, x_test_flat, y_test)

# Train Logistic Regression
logistic_regression_clf.fit(balanced_x_train_flat, balanced_y_train)
# Evaluate Logistic Regression
lr_accuracy, lr_report, lr_confusion = evaluate_model(logistic_regression_clf, x_test_flat, y_test)

# Scale features for SVM
scaler = StandardScaler()
balanced_x_train_scaled = scaler.fit_transform(balanced_x_train_flat)
x_test_scaled = scaler.transform(x_test_flat)

# Train SVM
svm_clf.fit(balanced_x_train_scaled, balanced_y_train)
# Evaluate SVM
svm_accuracy, svm_report, svm_confusion = evaluate_model(svm_clf, x_test_scaled, y_test)

# Print results for color images
print("Decision Tree Accuracy:", dt_accuracy)
print("Decision Tree Classification Report:\n", dt_report)
print("Decision Tree Confusion Matrix:\n", dt_confusion)

print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", rf_report)
print("Random Forest Confusion Matrix:\n", rf_confusion)

print("Logistic Regression Accuracy:", lr_accuracy)
print("Logistic Regression Classification Report:\n", lr_report)
print("Logistic Regression Confusion Matrix:\n", lr_confusion)

print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:\n", svm_report)
print("SVM Confusion Matrix:\n", svm_confusion)

# Train and evaluate models for grayscale images
print("\nGrayscale Images:")
print("==========================")
# Train Decision Tree
decision_tree_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), balanced_y_train)
# Evaluate Decision Tree
dt_accuracy_gray, dt_report_gray, dt_confusion_gray = evaluate_model(decision_tree_clf, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)

# Train Random Forest
random_forest_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), balanced_y_train)
# Evaluate Random Forest
rf_accuracy_gray, rf_report_gray, rf_confusion_gray = evaluate_model(random_forest_clf, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)

# Train Logistic Regression
logistic_regression_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), balanced_y_train)
# Evaluate Logistic Regression
lr_accuracy_gray, lr_report_gray, lr_confusion_gray = evaluate_model(logistic_regression_clf, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)

# Scale features for SVM
scaler_gray = StandardScaler()
balanced_x_train_gray_scaled = scaler_gray.fit_transform(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1))
x_test_gray_scaled = scaler_gray.transform(x_test_gray.reshape(x_test_gray.shape[0], -1))

# Train SVM
svm_clf.fit(balanced_x_train_gray_scaled, balanced_y_train)
# Evaluate SVM
svm_accuracy_gray, svm_report_gray, svm_confusion_gray = evaluate_model(svm_clf, x_test_gray_scaled, y_test)

# Print results for grayscale images
print("Decision Tree Accuracy (Grayscale):", dt_accuracy_gray)
print("Decision Tree Classification Report (Grayscale):\n", dt_report_gray)
print("Decision Tree Confusion Matrix (Grayscale):\n", dt_confusion_gray)

print("Random Forest Accuracy (Grayscale):", rf_accuracy_gray)
print("Random Forest Classification Report (Grayscale):\n", rf_report_gray)
print("Random Forest Confusion Matrix (Grayscale):\n", rf_confusion_gray)

print("Logistic Regression Accuracy (Grayscale):", lr_accuracy_gray)
print("Logistic Regression Classification Report (Grayscale):\n", lr_report_gray)
print("Logistic Regression Confusion Matrix (Grayscale):\n", lr_confusion_gray)

print("SVM Accuracy (Grayscale):", svm_accuracy_gray)
print("SVM Classification Report (Grayscale):\n", svm_report_gray)
print("SVM Confusion Matrix (Grayscale):\n", svm_confusion_gray)


Color Images:


  random_forest_clf.fit(balanced_x_train_flat, balanced_y_train)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


Decision Tree Accuracy: 0.24
Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       0.32      0.33      0.32      1000
           1       0.25      0.22      0.23      1000
           2       0.18      0.20      0.19      1000
           3       0.16      0.15      0.15      1000
           4       0.20      0.22      0.21      1000
           5       0.22      0.20      0.21      1000
           6       0.25      0.25      0.25      1000
           7       0.23      0.23      0.23      1000
           8       0.33      0.36      0.35      1000
           9       0.25      0.24      0.25      1000

    accuracy                           0.24     10000
   macro avg       0.24      0.24      0.24     10000
weighted avg       0.24      0.24      0.24     10000

Decision Tree Confusion Matrix:
 [[328  67  92  51  67  48  42  64 161  80]
 [ 87 221  66  74  54  59  77  77 123 162]
 [101  56 196  89 157  87 115  97  66  36]
 [ 69  69 10

  random_forest_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), balanced_y_train)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


Decision Tree Accuracy (Grayscale): 0.205
Decision Tree Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.24      0.25      0.24      1000
           1       0.23      0.21      0.22      1000
           2       0.17      0.18      0.18      1000
           3       0.15      0.14      0.15      1000
           4       0.17      0.17      0.17      1000
           5       0.18      0.18      0.18      1000
           6       0.19      0.19      0.19      1000
           7       0.18      0.19      0.18      1000
           8       0.26      0.27      0.26      1000
           9       0.28      0.25      0.26      1000

    accuracy                           0.20     10000
   macro avg       0.21      0.20      0.21     10000
weighted avg       0.21      0.20      0.21     10000

Decision Tree Confusion Matrix (Grayscale):
 [[252  59 106  60  74  75  79  83 140  72]
 [ 90 214  56  78  64  83  82  73 127 133]
 [135  62 182  89 

In [3]:
import numpy as np
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Define evaluate_model function
def evaluate_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    return accuracy, report, confusion

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Generate a balanced training dataset with 10 classes and 10,000 total images
balanced_x_train = []
balanced_y_train = []
num_images_per_class = 1000

for class_label in range(10):
    indices = np.where(y_train == class_label)[0]
    np.random.shuffle(indices)
    selected_indices = indices[:num_images_per_class]
    balanced_x_train.extend(x_train[selected_indices])
    balanced_y_train.extend(y_train[selected_indices])

balanced_x_train = np.array(balanced_x_train)
balanced_y_train = np.array(balanced_y_train)

# Split training data into 80% training and 20% validation
x_train_balanced, x_val_balanced, y_train_balanced, y_val_balanced = train_test_split(balanced_x_train, balanced_y_train, test_size=0.2, random_state=42)

# Flatten images and scale pixel values to the range [0, 1]
balanced_x_train_flat = x_train_balanced.reshape(x_train_balanced.shape[0], -1) / 255.0
x_val_flat = x_val_balanced.reshape(x_val_balanced.shape[0], -1) / 255.0
x_test_flat = x_test.reshape(x_test.shape[0], -1) / 255.0

# Convert images to grayscale
balanced_x_train_gray = np.mean(x_train_balanced, axis=3, keepdims=True) / 255.0
x_val_gray = np.mean(x_val_balanced, axis=3, keepdims=True) / 255.0
x_test_gray = np.mean(x_test, axis=3, keepdims=True) / 255.0

# Initialize classifiers
decision_tree_clf = DecisionTreeClassifier()
random_forest_clf = RandomForestClassifier()
logistic_regression_clf = LogisticRegression(max_iter=1000)
svm_clf = SVC()

# Train and evaluate models for color images
print("Color Images (20% Validation of Training Dataset):")
print("==========================")
# Train Decision Tree
decision_tree_clf.fit(balanced_x_train_flat, y_train_balanced)
# Evaluate Decision Tree
dt_accuracy, dt_report, dt_confusion = evaluate_model(decision_tree_clf, x_val_flat, y_val_balanced)

# Train Random Forest
random_forest_clf.fit(balanced_x_train_flat, y_train_balanced)
# Evaluate Random Forest
rf_accuracy, rf_report, rf_confusion = evaluate_model(random_forest_clf, x_val_flat, y_val_balanced)

# Train Logistic Regression
logistic_regression_clf.fit(balanced_x_train_flat, y_train_balanced)
# Evaluate Logistic Regression
lr_accuracy, lr_report, lr_confusion = evaluate_model(logistic_regression_clf, x_val_flat, y_val_balanced)

# Scale features for SVM
scaler = StandardScaler()
balanced_x_train_scaled = scaler.fit_transform(balanced_x_train_flat)
x_val_scaled = scaler.transform(x_val_flat)
x_test_scaled = scaler.transform(x_test_flat)

# Train SVM
svm_clf.fit(balanced_x_train_scaled, y_train_balanced)
# Evaluate SVM
svm_accuracy, svm_report, svm_confusion = evaluate_model(svm_clf, x_val_scaled, y_val_balanced)

# Print results for color images
print("Decision Tree Accuracy:", dt_accuracy)
print("Decision Tree Classification Report:\n", dt_report)
print("Decision Tree Confusion Matrix:\n", dt_confusion)

print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", rf_report)
print("Random Forest Confusion Matrix:\n", rf_confusion)

print("Logistic Regression Accuracy:", lr_accuracy)
print("Logistic Regression Classification Report:\n", lr_report)
print("Logistic Regression Confusion Matrix:\n", lr_confusion)

print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:\n", svm_report)
print("SVM Confusion Matrix:\n", svm_confusion)

# Train and evaluate models for grayscale images
print("\nGrayscale Images (20% Validation of Training Dataset):")
print("==========================")
# Train Decision Tree
decision_tree_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), y_train_balanced)
# Evaluate Decision Tree
dt_accuracy_gray, dt_report_gray, dt_confusion_gray = evaluate_model(decision_tree_clf, x_val_gray.reshape(x_val_gray.shape[0], -1), y_val_balanced)

# Train Random Forest
random_forest_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), y_train_balanced)
# Evaluate Random Forest
rf_accuracy_gray, rf_report_gray, rf_confusion_gray = evaluate_model(random_forest_clf, x_val_gray.reshape(x_val_gray.shape[0], -1), y_val_balanced)

# Train Logistic Regression
logistic_regression_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), y_train_balanced)
# Evaluate Logistic Regression
lr_accuracy_gray, lr_report_gray, lr_confusion_gray = evaluate_model(logistic_regression_clf, x_val_gray.reshape(x_val_gray.shape[0], -1), y_val_balanced)

# Scale features for SVM
scaler_gray = StandardScaler()
balanced_x_train_gray_scaled = scaler_gray.fit_transform(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1))
x_val_gray_scaled = scaler_gray.transform(x_val_gray.reshape(x_val_gray.shape[0], -1))
x_test_gray_scaled = scaler_gray.transform(x_test_gray.reshape(x_test_gray.shape[0], -1))

# Train SVM
svm_clf.fit(balanced_x_train_gray_scaled, y_train_balanced)
# Evaluate SVM
svm_accuracy_gray, svm_report_gray, svm_confusion_gray = evaluate_model(svm_clf, x_val_gray_scaled, y_val_balanced)

# Print results for grayscale images
print("Decision Tree Accuracy (Grayscale):", dt_accuracy_gray)
print("Decision Tree Classification Report (Grayscale):\n", dt_report_gray)
print("Decision Tree Confusion Matrix (Grayscale):\n", dt_confusion_gray)

print("Random Forest Accuracy (Grayscale):", rf_accuracy_gray)
print("Random Forest Classification Report (Grayscale):\n", rf_report_gray)
print("Random Forest Confusion Matrix (Grayscale):\n", rf_confusion_gray)

print("Logistic Regression Accuracy (Grayscale):", lr_accuracy_gray)
print("Logistic Regression Classification Report (Grayscale):\n", lr_report_gray)
print("Logistic Regression Confusion Matrix (Grayscale):\n", lr_confusion_gray)

print("SVM Accuracy (Grayscale):", svm_accuracy_gray)
print("SVM Classification Report (Grayscale):\n", svm_report_gray)
print("SVM Confusion Matrix (Grayscale):\n", svm_confusion_gray)


Color Images (20% Validation of Training Dataset):


  random_forest_clf.fit(balanced_x_train_flat, y_train_balanced)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


Decision Tree Accuracy: 0.231
Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       0.30      0.30      0.30       208
           1       0.24      0.21      0.22       202
           2       0.16      0.21      0.19       192
           3       0.14      0.13      0.14       201
           4       0.19      0.18      0.18       209
           5       0.20      0.18      0.19       186
           6       0.23      0.23      0.23       211
           7       0.23      0.20      0.21       204
           8       0.35      0.34      0.35       212
           9       0.28      0.31      0.30       175

    accuracy                           0.23      2000
   macro avg       0.23      0.23      0.23      2000
weighted avg       0.23      0.23      0.23      2000

Decision Tree Confusion Matrix:
 [[63 18 24  9 14 10  5 11 29 25]
 [17 43 15 14 12 12 19 12 25 33]
 [23  8 41 26 25 13 24 14 14  4]
 [13 20 22 27 26 23 24 23 10 13]
 [15  7 

  random_forest_clf.fit(balanced_x_train_gray.reshape(balanced_x_train_gray.shape[0], -1), y_train_balanced)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


Decision Tree Accuracy (Grayscale): 0.201
Decision Tree Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.26      0.26      0.26       208
           1       0.24      0.24      0.24       202
           2       0.19      0.21      0.20       192
           3       0.12      0.11      0.12       201
           4       0.16      0.17      0.17       209
           5       0.17      0.15      0.16       186
           6       0.18      0.18      0.18       211
           7       0.20      0.18      0.19       204
           8       0.25      0.25      0.25       212
           9       0.22      0.26      0.24       175

    accuracy                           0.20      2000
   macro avg       0.20      0.20      0.20      2000
weighted avg       0.20      0.20      0.20      2000

Decision Tree Confusion Matrix (Grayscale):
 [[54  7 17 15 23  8 14 19 38 13]
 [17 48 10 14 13 11 21 21 18 29]
 [24 13 40 12 27 17 23 11 12 13]
 [14 

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler

# Define evaluate_model function
def evaluate_model(model, x_train, y_train, x_test, y_test):
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    accuracy_scores = []
    classification_reports = []
    confusion_matrices = []

    for train_index, val_index in skf.split(x_train, y_train):
        x_train_fold, x_val_fold = x_train[train_index], x_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
        model.fit(x_train_fold, y_train_fold)
        y_pred = model.predict(x_val_fold)
        accuracy_scores.append(accuracy_score(y_val_fold, y_pred))
        classification_reports.append(classification_report(y_val_fold, y_pred, output_dict=True))
        confusion_matrices.append(confusion_matrix(y_val_fold, y_pred))

    # Fit the model on the whole training data
    model.fit(x_train, y_train)
    # Evaluate on testing data
    y_pred_test = model.predict(x_test)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_classification_report = classification_report(y_test, y_pred_test)
    test_confusion_matrix = confusion_matrix(y_test, y_pred_test)

    return np.mean(accuracy_scores), classification_reports, confusion_matrices, test_accuracy, test_classification_report, test_confusion_matrix

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Generate a balanced training dataset with 10 classes and 10,000 total images
balanced_x_train = []
balanced_y_train = []
num_images_per_class = 1000

for class_label in range(10):
    indices = np.where(y_train == class_label)[0]
    np.random.shuffle(indices)
    selected_indices = indices[:num_images_per_class]
    balanced_x_train.extend(x_train[selected_indices])
    balanced_y_train.extend(y_train[selected_indices])

balanced_x_train = np.array(balanced_x_train)
balanced_y_train = np.array(balanced_y_train)

# Flatten images and scale pixel values to the range [0, 1]
x_train_flat = balanced_x_train.reshape(balanced_x_train.shape[0], -1) / 255.0
x_test_flat = x_test.reshape(x_test.shape[0], -1) / 255.0

# Convert images to grayscale
x_train_gray = np.mean(balanced_x_train, axis=3, keepdims=True) / 255.0
x_test_gray = np.mean(x_test, axis=3, keepdims=True) / 255.0

# Initialize classifiers
decision_tree_clf = DecisionTreeClassifier()
random_forest_clf = RandomForestClassifier()
logistic_regression_clf = LogisticRegression(max_iter=1000)
svm_clf = SVC()

# Train and evaluate models for color images
print("Color Images (3-fold Cross-Validation):")
print("==========================")
# Evaluate Decision Tree
dt_accuracy, dt_classification_reports, dt_confusion_matrices, dt_test_accuracy, dt_test_classification_report, dt_test_confusion_matrix = evaluate_model(decision_tree_clf, x_train_flat, balanced_y_train, x_test_flat, y_test)
print("Decision Tree Mean Cross-Validation Accuracy:", dt_accuracy)
print("Decision Tree Test Accuracy:", dt_test_accuracy)
print("Decision Tree Test Classification Report:\n", dt_test_classification_report)
print("Decision Tree Test Confusion Matrix:\n", dt_test_confusion_matrix)

# Evaluate Random Forest
rf_accuracy, rf_classification_reports, rf_confusion_matrices, rf_test_accuracy, rf_test_classification_report, rf_test_confusion_matrix = evaluate_model(random_forest_clf, x_train_flat, balanced_y_train, x_test_flat, y_test)
print("Random Forest Mean Cross-Validation Accuracy:", rf_accuracy)
print("Random Forest Test Accuracy:", rf_test_accuracy)
print("Random Forest Test Classification Report:\n", rf_test_classification_report)
print("Random Forest Test Confusion Matrix:\n", rf_test_confusion_matrix)

# Evaluate Logistic Regression
lr_accuracy, lr_classification_reports, lr_confusion_matrices, lr_test_accuracy, lr_test_classification_report, lr_test_confusion_matrix = evaluate_model(logistic_regression_clf, x_train_flat, balanced_y_train, x_test_flat, y_test)
print("Logistic Regression Mean Cross-Validation Accuracy:", lr_accuracy)
print("Logistic Regression Test Accuracy:", lr_test_accuracy)
print("Logistic Regression Test Classification Report:\n", lr_test_classification_report)
print("Logistic Regression Test Confusion Matrix:\n", lr_test_confusion_matrix)

# Scale features for SVM
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_flat)
x_test_scaled = scaler.transform(x_test_flat)

# Evaluate SVM
svm_accuracy, svm_classification_reports, svm_confusion_matrices, svm_test_accuracy, svm_test_classification_report, svm_test_confusion_matrix = evaluate_model(svm_clf, x_train_scaled, balanced_y_train, x_test_scaled, y_test)
print("SVM Mean Cross-Validation Accuracy:", svm_accuracy)
print("SVM Test Accuracy:", svm_test_accuracy)
print("SVM Test Classification Report:\n", svm_test_classification_report)
print("SVM Test Confusion Matrix:\n", svm_test_confusion_matrix)

# Train and evaluate models for grayscale images
print("\nGrayscale Images (3-fold Cross-Validation):")
print("==========================")
# Evaluate Decision Tree
dt_accuracy_gray, dt_classification_reports_gray, dt_confusion_matrices_gray, dt_test_accuracy_gray, dt_test_classification_report_gray, dt_test_confusion_matrix_gray = evaluate_model(decision_tree_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), balanced_y_train, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)
print("Decision Tree Mean Cross-Validation Accuracy (Grayscale):", dt_accuracy_gray)
print("Decision Tree Test Accuracy (Grayscale):", dt_test_accuracy_gray)
print("Decision Tree Test Classification Report (Grayscale):\n", dt_test_classification_report_gray)
print("Decision Tree Test Confusion Matrix (Grayscale):\n", dt_test_confusion_matrix_gray)

# Evaluate Random Forest
rf_accuracy_gray, rf_classification_reports_gray, rf_confusion_matrices_gray, rf_test_accuracy_gray, rf_test_classification_report_gray, rf_test_confusion_matrix_gray = evaluate_model(random_forest_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), balanced_y_train, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)
print("Random Forest Mean Cross-Validation Accuracy (Grayscale):", rf_accuracy_gray)
print("Random Forest Test Accuracy (Grayscale):", rf_test_accuracy_gray)
print("Random Forest Test Classification Report (Grayscale):\n", rf_test_classification_report_gray)
print("Random Forest Test Confusion Matrix (Grayscale):\n", rf_test_confusion_matrix_gray)

# Evaluate Logistic Regression
lr_accuracy_gray, lr_classification_reports_gray, lr_confusion_matrices_gray, lr_test_accuracy_gray, lr_test_classification_report_gray, lr_test_confusion_matrix_gray = evaluate_model(logistic_regression_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), balanced_y_train, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)
print("Logistic Regression Mean Cross-Validation Accuracy (Grayscale):", lr_accuracy_gray)
print("Logistic Regression Test Accuracy (Grayscale):", lr_test_accuracy_gray)
print("Logistic Regression Test Classification Report (Grayscale):\n", lr_test_classification_report_gray)
print("Logistic Regression Test Confusion Matrix (Grayscale):\n", lr_test_confusion_matrix_gray)

# Scale features for SVM
scaler_gray = StandardScaler()
x_train_gray_scaled = scaler_gray.fit_transform(x_train_gray.reshape(x_train_gray.shape[0], -1))
x_test_gray_scaled = scaler_gray.transform(x_test_gray.reshape(x_test_gray.shape[0], -1))

# Evaluate SVM
svm_accuracy_gray, svm_classification_reports_gray, svm_confusion_matrices_gray, svm_test_accuracy_gray, svm_test_classification_report_gray, svm_test_confusion_matrix_gray = evaluate_model(svm_clf, x_train_gray_scaled, balanced_y_train, x_test_gray_scaled, y_test)
print("SVM Mean Cross-Validation Accuracy (Grayscale):", svm_accuracy_gray)
print("SVM Test Accuracy (Grayscale):", svm_test_accuracy_gray)
print("SVM Test Classification Report (Grayscale):\n", svm_test_classification_report_gray)
print("SVM Test Confusion Matrix (Grayscale):\n", svm_test_confusion_matrix_gray)


Color Images (3-fold Cross-Validation):
Decision Tree Mean Cross-Validation Accuracy: 0.22290072438557593
Decision Tree Test Accuracy: 0.2362
Decision Tree Test Classification Report:
               precision    recall  f1-score   support

           0       0.30      0.32      0.31      1000
           1       0.26      0.26      0.26      1000
           2       0.18      0.20      0.19      1000
           3       0.16      0.14      0.15      1000
           4       0.19      0.20      0.19      1000
           5       0.20      0.20      0.20      1000
           6       0.24      0.24      0.24      1000
           7       0.23      0.21      0.22      1000
           8       0.34      0.34      0.34      1000
           9       0.27      0.26      0.26      1000

    accuracy                           0.24     10000
   macro avg       0.24      0.24      0.24     10000
weighted avg       0.24      0.24      0.24     10000

Decision Tree Test Confusion Matrix:
 [[318  70 109  50 

  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train, y_train)


Random Forest Mean Cross-Validation Accuracy: 0.4008996280551871
Random Forest Test Accuracy: 0.4196
Random Forest Test Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.53      0.50      1000
           1       0.46      0.48      0.47      1000
           2       0.32      0.29      0.30      1000
           3       0.28      0.22      0.25      1000
           4       0.36      0.36      0.36      1000
           5       0.39      0.36      0.37      1000
           6       0.41      0.48      0.44      1000
           7       0.49      0.39      0.43      1000
           8       0.54      0.57      0.55      1000
           9       0.44      0.51      0.48      1000

    accuracy                           0.42     10000
   macro avg       0.41      0.42      0.42     10000
weighted avg       0.41      0.42      0.42     10000

Random Forest Test Confusion Matrix:
 [[526  42  55  26  23  20  28  28 188  64]
 [ 43 478  26  4

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Logistic Regression Mean Cross-Validation Accuracy: 0.315000066293372
Logistic Regression Test Accuracy: 0.3288
Logistic Regression Test Classification Report:
               precision    recall  f1-score   support

           0       0.38      0.38      0.38      1000
           1       0.38      0.37      0.37      1000
           2       0.25      0.25      0.25      1000
           3       0.23      0.20      0.21      1000
           4       0.29      0.29      0.29      1000
           5       0.24      0.25      0.24      1000
           6       0.35      0.34      0.35      1000
           7       0.37      0.34      0.36      1000
           8       0.41      0.48      0.44      1000
           9       0.39      0.38      0.38      1000

    accuracy                           0.33     10000
   macro avg       0.33      0.33      0.33     10000
weighted avg       0.33      0.33      0.33     10000

Logistic Regression Test Confusion Matrix:
 [[377  49  68  38  37  43  24  52 23

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVM Mean Cross-Validation Accuracy: 0.45139972905508907
SVM Test Accuracy: 0.4753
SVM Test Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.53      0.54      1000
           1       0.56      0.58      0.57      1000
           2       0.35      0.34      0.35      1000
           3       0.31      0.32      0.32      1000
           4       0.42      0.38      0.40      1000
           5       0.44      0.36      0.40      1000
           6       0.45      0.58      0.51      1000
           7       0.57      0.46      0.51      1000
           8       0.58      0.64      0.61      1000
           9       0.53      0.55      0.54      1000

    accuracy                           0.48     10000
   macro avg       0.47      0.48      0.47     10000
weighted avg       0.47      0.48      0.47     10000

SVM Test Confusion Matrix:
 [[529  36  65  24  22  14  29  27 186  68]
 [ 31 584  22  52  11  23  28  29  62 158]
 [101  27 34

  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train, y_train)


Random Forest Mean Cross-Validation Accuracy (Grayscale): 0.3503997070372904
Random Forest Test Accuracy (Grayscale): 0.3704
Random Forest Test Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.39      0.44      0.41      1000
           1       0.42      0.44      0.43      1000
           2       0.28      0.27      0.28      1000
           3       0.24      0.18      0.21      1000
           4       0.31      0.34      0.33      1000
           5       0.38      0.33      0.35      1000
           6       0.35      0.39      0.37      1000
           7       0.42      0.34      0.38      1000
           8       0.47      0.50      0.48      1000
           9       0.41      0.46      0.43      1000

    accuracy                           0.37     10000
   macro avg       0.37      0.37      0.37     10000
weighted avg       0.37      0.37      0.37     10000

Random Forest Test Confusion Matrix (Grayscale):
 [[439  29  

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Logistic Regression Mean Cross-Validation Accuracy (Grayscale): 0.24530041486452184
Logistic Regression Test Accuracy (Grayscale): 0.2486
Logistic Regression Test Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.30      0.31      0.30      1000
           1       0.30      0.30      0.30      1000
           2       0.17      0.17      0.17      1000
           3       0.17      0.14      0.15      1000
           4       0.19      0.18      0.18      1000
           5       0.24      0.23      0.23      1000
           6       0.22      0.23      0.23      1000
           7       0.24      0.22      0.23      1000
           8       0.31      0.35      0.33      1000
           9       0.33      0.35      0.34      1000

    accuracy                           0.25     10000
   macro avg       0.25      0.25      0.25     10000
weighted avg       0.25      0.25      0.25     10000

Logistic Regression Test Confusion Matrix 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [6]:
import numpy as np
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler

# Define evaluate_model function
def evaluate_model(model, x_train, y_train, x_test, y_test):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    accuracy_scores = []
    classification_reports = []
    confusion_matrices = []

    for train_index, val_index in skf.split(x_train, y_train):
        x_train_fold, x_val_fold = x_train[train_index], x_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
        model.fit(x_train_fold, y_train_fold)
        y_pred = model.predict(x_val_fold)
        accuracy_scores.append(accuracy_score(y_val_fold, y_pred))
        classification_reports.append(classification_report(y_val_fold, y_pred, output_dict=True))
        confusion_matrices.append(confusion_matrix(y_val_fold, y_pred))

    # Fit the model on the whole training data
    model.fit(x_train, y_train)
    # Evaluate on testing data
    y_pred_test = model.predict(x_test)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_classification_report = classification_report(y_test, y_pred_test)
    test_confusion_matrix = confusion_matrix(y_test, y_pred_test)

    return np.mean(accuracy_scores), classification_reports, confusion_matrices, test_accuracy, test_classification_report, test_confusion_matrix

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Generate a balanced training dataset with 10 classes and 10,000 total images
balanced_x_train = []
balanced_y_train = []
num_images_per_class = 1000

for class_label in range(10):
    indices = np.where(y_train == class_label)[0]
    np.random.shuffle(indices)
    selected_indices = indices[:num_images_per_class]
    balanced_x_train.extend(x_train[selected_indices])
    balanced_y_train.extend(y_train[selected_indices])

balanced_x_train = np.array(balanced_x_train)
balanced_y_train = np.array(balanced_y_train)

# Flatten images and scale pixel values to the range [0, 1]
x_train_flat = balanced_x_train.reshape(balanced_x_train.shape[0], -1) / 255.0
x_test_flat = x_test.reshape(x_test.shape[0], -1) / 255.0

# Convert images to grayscale
x_train_gray = np.mean(balanced_x_train, axis=3, keepdims=True) / 255.0
x_test_gray = np.mean(x_test, axis=3, keepdims=True) / 255.0

# Initialize classifiers
decision_tree_clf = DecisionTreeClassifier()
random_forest_clf = RandomForestClassifier()
logistic_regression_clf = LogisticRegression(max_iter=1000)
svm_clf = SVC()

# Train and evaluate models for color images
print("Color Images (5-fold Cross-Validation):")
print("==========================")
# Evaluate Decision Tree
dt_accuracy, dt_classification_reports, dt_confusion_matrices, dt_test_accuracy, dt_test_classification_report, dt_test_confusion_matrix = evaluate_model(decision_tree_clf, x_train_flat, balanced_y_train, x_test_flat, y_test)
print("Decision Tree Mean Cross-Validation Accuracy:", dt_accuracy)
print("Decision Tree Test Accuracy:", dt_test_accuracy)
print("Decision Tree Test Classification Report:\n", dt_test_classification_report)
print("Decision Tree Test Confusion Matrix:\n", dt_test_confusion_matrix)

# Evaluate Random Forest
rf_accuracy, rf_classification_reports, rf_confusion_matrices, rf_test_accuracy, rf_test_classification_report, rf_test_confusion_matrix = evaluate_model(random_forest_clf, x_train_flat, balanced_y_train, x_test_flat, y_test)
print("Random Forest Mean Cross-Validation Accuracy:", rf_accuracy)
print("Random Forest Test Accuracy:", rf_test_accuracy)
print("Random Forest Test Classification Report:\n", rf_test_classification_report)
print("Random Forest Test Confusion Matrix:\n", rf_test_confusion_matrix)

# Evaluate Logistic Regression
lr_accuracy, lr_classification_reports, lr_confusion_matrices, lr_test_accuracy, lr_test_classification_report, lr_test_confusion_matrix = evaluate_model(logistic_regression_clf, x_train_flat, balanced_y_train, x_test_flat, y_test)
print("Logistic Regression Mean Cross-Validation Accuracy:", lr_accuracy)
print("Logistic Regression Test Accuracy:", lr_test_accuracy)
print("Logistic Regression Test Classification Report:\n", lr_test_classification_report)
print("Logistic Regression Test Confusion Matrix:\n", lr_test_confusion_matrix)

# Scale features for SVM
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_flat)
x_test_scaled = scaler.transform(x_test_flat)

# Evaluate SVM
svm_accuracy, svm_classification_reports, svm_confusion_matrices, svm_test_accuracy, svm_test_classification_report, svm_test_confusion_matrix = evaluate_model(svm_clf, x_train_scaled, balanced_y_train, x_test_scaled, y_test)
print("SVM Mean Cross-Validation Accuracy:", svm_accuracy)
print("SVM Test Accuracy:", svm_test_accuracy)
print("SVM Test Classification Report:\n", svm_test_classification_report)
print("SVM Test Confusion Matrix:\n", svm_test_confusion_matrix)

# Train and evaluate models for grayscale images
print("\nGrayscale Images (5-fold Cross-Validation):")
print("==========================")
# Evaluate Decision Tree
dt_accuracy_gray, dt_classification_reports_gray, dt_confusion_matrices_gray, dt_test_accuracy_gray, dt_test_classification_report_gray, dt_test_confusion_matrix_gray = evaluate_model(decision_tree_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), balanced_y_train, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)
print("Decision Tree Mean Cross-Validation Accuracy (Grayscale):", dt_accuracy_gray)
print("Decision Tree Test Accuracy (Grayscale):", dt_test_accuracy_gray)
print("Decision Tree Test Classification Report (Grayscale):\n", dt_test_classification_report_gray)
print("Decision Tree Test Confusion Matrix (Grayscale):\n", dt_test_confusion_matrix_gray)

# Evaluate Random Forest
rf_accuracy_gray, rf_classification_reports_gray, rf_confusion_matrices_gray, rf_test_accuracy_gray, rf_test_classification_report_gray, rf_test_confusion_matrix_gray = evaluate_model(random_forest_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), balanced_y_train, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)
print("Random Forest Mean Cross-Validation Accuracy (Grayscale):", rf_accuracy_gray)
print("Random Forest Test Accuracy (Grayscale):", rf_test_accuracy_gray)
print("Random Forest Test Classification Report (Grayscale):\n", rf_test_classification_report_gray)
print("Random Forest Test Confusion Matrix (Grayscale):\n", rf_test_confusion_matrix_gray)

# Evaluate Logistic Regression
lr_accuracy_gray, lr_classification_reports_gray, lr_confusion_matrices_gray, lr_test_accuracy_gray, lr_test_classification_report_gray, lr_test_confusion_matrix_gray = evaluate_model(logistic_regression_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), balanced_y_train, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test)
print("Logistic Regression Mean Cross-Validation Accuracy (Grayscale):", lr_accuracy_gray)
print("Logistic Regression Test Accuracy (Grayscale):", lr_test_accuracy_gray)
print("Logistic Regression Test Classification Report (Grayscale):\n", lr_test_classification_report_gray)
print("Logistic Regression Test Confusion Matrix (Grayscale):\n", lr_test_confusion_matrix_gray)

# Scale features for SVM
scaler_gray = StandardScaler()
x_train_gray_scaled = scaler_gray.fit_transform(x_train_gray.reshape(x_train_gray.shape[0], -1))
x_test_gray_scaled = scaler_gray.transform(x_test_gray.reshape(x_test_gray.shape[0], -1))

# Evaluate SVM
svm_accuracy_gray, svm_classification_reports_gray, svm_confusion_matrices_gray, svm_test_accuracy_gray, svm_test_classification_report_gray, svm_test_confusion_matrix_gray = evaluate_model(svm_clf, x_train_gray_scaled, balanced_y_train, x_test_gray_scaled, y_test)
print("SVM Mean Cross-Validation Accuracy (Grayscale):", svm_accuracy_gray)
print("SVM Test Accuracy (Grayscale):", svm_test_accuracy_gray)
print("SVM Test Classification Report (Grayscale):\n", svm_test_classification_report_gray)
print("SVM Test Confusion Matrix (Grayscale):\n", svm_test_confusion_matrix_gray)


Color Images (5-fold Cross-Validation):
Decision Tree Mean Cross-Validation Accuracy: 0.2311
Decision Tree Test Accuracy: 0.2341
Decision Tree Test Classification Report:
               precision    recall  f1-score   support

           0       0.31      0.35      0.33      1000
           1       0.25      0.23      0.24      1000
           2       0.15      0.17      0.16      1000
           3       0.15      0.14      0.14      1000
           4       0.20      0.20      0.20      1000
           5       0.22      0.21      0.22      1000
           6       0.24      0.25      0.25      1000
           7       0.24      0.23      0.23      1000
           8       0.32      0.33      0.32      1000
           9       0.26      0.23      0.24      1000

    accuracy                           0.23     10000
   macro avg       0.23      0.23      0.23     10000
weighted avg       0.23      0.23      0.23     10000

Decision Tree Test Confusion Matrix:
 [[354  76 104  45  58  57  37  

  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train, y_train)


Random Forest Mean Cross-Validation Accuracy: 0.41129999999999994
Random Forest Test Accuracy: 0.4177
Random Forest Test Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.50      0.50      1000
           1       0.49      0.49      0.49      1000
           2       0.31      0.25      0.28      1000
           3       0.28      0.20      0.24      1000
           4       0.35      0.39      0.37      1000
           5       0.35      0.35      0.35      1000
           6       0.45      0.53      0.49      1000
           7       0.42      0.38      0.40      1000
           8       0.54      0.58      0.56      1000
           9       0.43      0.51      0.47      1000

    accuracy                           0.42     10000
   macro avg       0.41      0.42      0.41     10000
weighted avg       0.41      0.42      0.41     10000

Random Forest Test Confusion Matrix:
 [[504  37  59  21  31  28  25  36 183  76]
 [ 33 490  19  

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Logistic Regression Mean Cross-Validation Accuracy: 0.3142
Logistic Regression Test Accuracy: 0.3278
Logistic Regression Test Classification Report:
               precision    recall  f1-score   support

           0       0.40      0.41      0.40      1000
           1       0.39      0.38      0.39      1000
           2       0.25      0.24      0.25      1000
           3       0.22      0.22      0.22      1000
           4       0.27      0.26      0.26      1000
           5       0.24      0.25      0.24      1000
           6       0.34      0.36      0.35      1000
           7       0.36      0.34      0.35      1000
           8       0.42      0.45      0.43      1000
           9       0.38      0.36      0.37      1000

    accuracy                           0.33     10000
   macro avg       0.33      0.33      0.33     10000
weighted avg       0.33      0.33      0.33     10000

Logistic Regression Test Confusion Matrix:
 [[409  67  71  41  29  46  29  52 188  68]
 [ 7

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVM Mean Cross-Validation Accuracy: 0.4572
SVM Test Accuracy: 0.479
SVM Test Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.54      0.55      1000
           1       0.56      0.56      0.56      1000
           2       0.37      0.31      0.34      1000
           3       0.34      0.33      0.34      1000
           4       0.39      0.41      0.40      1000
           5       0.43      0.38      0.40      1000
           6       0.46      0.58      0.51      1000
           7       0.56      0.47      0.51      1000
           8       0.58      0.63      0.61      1000
           9       0.51      0.59      0.55      1000

    accuracy                           0.48     10000
   macro avg       0.48      0.48      0.48     10000
weighted avg       0.48      0.48      0.48     10000

SVM Test Confusion Matrix:
 [[538  44  54  28  32  15  26  33 162  68]
 [ 31 557  18  34  22  27  34  29  69 179]
 [ 96  29 308  91 178  67 

  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train_fold, y_train_fold)
  model.fit(x_train, y_train)


Random Forest Mean Cross-Validation Accuracy (Grayscale): 0.3596
Random Forest Test Accuracy (Grayscale): 0.3737
Random Forest Test Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.41      0.40      0.41      1000
           1       0.44      0.42      0.43      1000
           2       0.32      0.29      0.30      1000
           3       0.27      0.17      0.21      1000
           4       0.30      0.36      0.33      1000
           5       0.36      0.33      0.34      1000
           6       0.38      0.40      0.39      1000
           7       0.39      0.36      0.37      1000
           8       0.45      0.50      0.47      1000
           9       0.40      0.49      0.44      1000

    accuracy                           0.37     10000
   macro avg       0.37      0.37      0.37     10000
weighted avg       0.37      0.37      0.37     10000

Random Forest Test Confusion Matrix (Grayscale):
 [[404  38  92  20  73  

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Logistic Regression Mean Cross-Validation Accuracy (Grayscale): 0.2464
Logistic Regression Test Accuracy (Grayscale): 0.2449
Logistic Regression Test Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.29      0.29      0.29      1000
           1       0.30      0.29      0.30      1000
           2       0.17      0.15      0.16      1000
           3       0.18      0.16      0.17      1000
           4       0.18      0.17      0.17      1000
           5       0.23      0.23      0.23      1000
           6       0.22      0.23      0.23      1000
           7       0.22      0.22      0.22      1000
           8       0.29      0.34      0.31      1000
           9       0.33      0.36      0.35      1000

    accuracy                           0.24     10000
   macro avg       0.24      0.24      0.24     10000
weighted avg       0.24      0.24      0.24     10000

Logistic Regression Test Confusion Matrix (Grayscale):


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVM Mean Cross-Validation Accuracy (Grayscale): 0.3889
SVM Test Accuracy (Grayscale): 0.4031
SVM Test Classification Report (Grayscale):
               precision    recall  f1-score   support

           0       0.42      0.37      0.39      1000
           1       0.50      0.47      0.48      1000
           2       0.31      0.23      0.26      1000
           3       0.31      0.24      0.27      1000
           4       0.32      0.42      0.36      1000
           5       0.41      0.34      0.37      1000
           6       0.38      0.49      0.43      1000
           7       0.47      0.38      0.42      1000
           8       0.45      0.56      0.50      1000
           9       0.46      0.53      0.49      1000

    accuracy                           0.40     10000
   macro avg       0.40      0.40      0.40     10000
weighted avg       0.40      0.40      0.40     10000

SVM Test Confusion Matrix (Grayscale):
 [[373  40  82  26  98  16  62  40 200  63]
 [ 26 465  11  39  3

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import StandardScaler

# Define evaluate_model function
def evaluate_model(model, x_train, y_train, x_test, y_test):
    loo = LeaveOneOut()
    accuracy_scores = []
    classification_reports = []
    confusion_matrices = []

    for train_index, val_index in loo.split(x_train):
        x_train_fold, x_val_fold = x_train[train_index], x_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
        model.fit(x_train_fold, y_train_fold)
        y_pred = model.predict(x_val_fold)
        accuracy_scores.append(accuracy_score(y_val_fold, y_pred))
        classification_reports.append(classification_report(y_val_fold, y_pred, output_dict=True))
        confusion_matrices.append(confusion_matrix(y_val_fold, y_pred))

    # Fit the model on the whole training data
    model.fit(x_train, y_train)
    # Evaluate on testing data
    y_pred_test = model.predict(x_test)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_classification_report = classification_report(y_test, y_pred_test)
    test_confusion_matrix = confusion_matrix(y_test, y_pred_test)

    return np.mean(accuracy_scores), classification_reports, confusion_matrices, test_accuracy, test_classification_report, test_confusion_matrix

def main():
    # Load CIFAR-10 dataset
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

    # Select 5000 training images randomly
    np.random.seed(42)  # For reproducibility
    indices = np.random.choice(len(x_train), 5000, replace=False)
    x_train_selected = x_train[indices]
    y_train_selected = y_train[indices]

    # Flatten images and scale pixel values to the range [0, 1]
    x_train_flat = x_train_selected.reshape(x_train_selected.shape[0], -1) / 255.0
    x_test_flat = x_test.reshape(x_test.shape[0], -1) / 255.0

    # Initialize classifiers
    decision_tree_clf = DecisionTreeClassifier()
    random_forest_clf = RandomForestClassifier()
    logistic_regression_clf = LogisticRegression(max_iter=1000)
    svm_clf = SVC()

    # Train and evaluate models for color images
    print("Color Images (LOOCV - 5000 training images):")
    print("==========================")
    evaluate_and_print_results(decision_tree_clf, x_train_flat, y_train_selected, x_test_flat, y_test, "Decision Tree")
    evaluate_and_print_results(random_forest_clf, x_train_flat, y_train_selected, x_test_flat, y_test, "Random Forest")
    evaluate_and_print_results(logistic_regression_clf, x_train_flat, y_train_selected, x_test_flat, y_test, "Logistic Regression")
    evaluate_and_print_results(svm_clf, x_train_flat, y_train_selected, x_test_flat, y_test, "SVM")

    # Convert images to grayscale
    x_train_gray = np.mean(x_train_selected, axis=3, keepdims=True) / 255.0
    x_test_gray = np.mean(x_test, axis=3, keepdims=True) / 255.0

    # Train and evaluate models for grayscale images
    print("\nGrayscale Images (LOOCV - 5000 training images):")
    print("==========================")
    evaluate_and_print_results(decision_tree_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), y_train_selected, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test, "Decision Tree (Grayscale)")
    evaluate_and_print_results(random_forest_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), y_train_selected, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test, "Random Forest (Grayscale)")
    evaluate_and_print_results(logistic_regression_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), y_train_selected, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test, "Logistic Regression (Grayscale)")
    evaluate_and_print_results(svm_clf, x_train_gray.reshape(x_train_gray.shape[0], -1), y_train_selected, x_test_gray.reshape(x_test_gray.shape[0], -1), y_test, "SVM (Grayscale)")

def evaluate_and_print_results(model, x_train, y_train, x_test, y_test, model_name):
    accuracy, _, _, test_accuracy, test_classification_report, test_confusion_matrix = evaluate_model(model, x_train, y_train, x_test, y_test)
    print(f"{model_name} Mean LOOCV Accuracy:", accuracy)
    print(f"{model_name} Test Accuracy:", test_accuracy)
    print(f"{model_name} Test Classification Report:\n", test_classification_report)
    print(f"{model_name} Test Confusion Matrix:\n", test_confusion_matrix)

if __name__ == "__main__":
    main()


Color Images (LOOCV - 5000 training images):


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr