In [None]:
from google.colab import drive
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.transform import resize
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import zipfile

# Unzipping the dataset
zip_file = "/content/train.zip"
extract_folder = "/content/trainimages"
csv_file = "/content/trainLabels.csv"

if not os.path.exists(extract_folder):
    os.makedirs(extract_folder)

with zipfile.ZipFile(zip_file, 'r') as archive:
    archive.extractall(extract_folder)

image_folder = "/content/trainimages/train"

df = pd.read_csv(csv_file)

# Function to load and preprocess images
def load_image(image_id, img_size=(28, 28)):
    img_path = os.path.join(image_folder, f"{image_id}.png")
    if os.path.exists(img_path):
        img = imread(img_path)
        img_resized = resize(img, img_size)
        img_gray = rgb2gray(img_resized)
        return img_gray
    else:
        print(f"Image {image_id}.png not found!")
        return None

X_data, y_data = [], []
for index, row in df.iterrows():
    img = load_image(row["id"])
    if img is not None:
        X_data.append(img)
        y_data.append(row["label"])

X_data = np.array(X_data).reshape(len(X_data), 28, 28, 1)  # Add channel dimension
y_data = np.array(pd.factorize(y_data)[0])

X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

# CNN Model with Regularization
def create_cnn():
    model = Sequential([
        Conv2D(64, (3,3), input_shape=(28, 28, 1), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),
        Dropout(0.25),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),
        Dropout(0.25),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = create_cnn()
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)




In [None]:

# Print all layer names to find the correct one
for layer in model.layers:
    print(layer.name)

# Use the correct layer name before the final Dense layer
feature_extractor = Model(inputs=model.inputs, outputs=model.get_layer("dense_4").output)  # Change "dense_4" if needed

# Extract features
X_train_features = feature_extractor.predict(X_train)
X_test_features = feature_extractor.predict(X_test)

print("Feature extraction completed! Feature shape:", X_train_features.shape)

# Classifier using extracted features
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_features, y_train)
y_pred = clf.predict(X_test_features)
print("Random Forest Classifier Accuracy:", accuracy_score(y_test, y_pred))

# Using SVM for classification
svm = SVC(kernel='linear')
svm.fit(X_train_features, y_train)
y_pred_svm = svm.predict(X_test_features)
print("SVM Classifier Accuracy:", accuracy_score(y_test, y_pred_svm))

# Plot Training & Validation Loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("Training vs Validation Loss with Regularization")
plt.show()

In [None]:
import numpy as np
from sklearn.metrics import classification_report
import tensorflow as tf

# ✅ Evaluate the model on test data
score = model.evaluate(X_test, y_test, verbose=1)
print('\n✅ Regularized CNN Model Loss:', score[0])
print('✅ Regularized CNN Model Accuracy:', score[1])

# ✅ Predict class probabilities
y_pred_probs = model.predict(X_test)

# ✅ Convert predicted probabilities to class labels
y_pred = np.argmax(y_pred_probs, axis=1)

# ✅ Ensure `y_test` is properly formatted
if len(y_test.shape) == 1:  # If already categorical
    y_true = y_test
else:  # If one-hot encoded, convert back to class labels
    y_true = np.argmax(y_test, axis=1)

# ✅ Generate classification report
num_classes = len(np.unique(y_true))  # Get actual number of classes
report = classification_report(y_true, y_pred, target_names=[str(i) for i in range(num_classes)])

# ✅ Print results
print("\n✅ Classification Report:\n", report)
