In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import shutil
import random
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from tqdm import tqdm
import cv2
from imblearn.over_sampling import SMOTE

# Set random seed for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Define dataset paths
ORIGINAL_TRAIN_PATH = "/content/drive/MyDrive/Cavity Dataset/train/"
ORIGINAL_TEST_PATH = "/content/drive/MyDrive/Cavity Dataset/test/"
COPIED_TRAIN_PATH = "dataset_cv/train/"
COPIED_TEST_PATH = "dataset_cv/test/"

# Function to check if an image is valid (not corrupted)
def is_valid_image(img_path):
    try:
        img = cv2.imread(img_path)
        return img is not None
    except Exception:
        return False

# Function to copy dataset while removing corrupted images
def copy_dataset(src, dest):
    if os.path.exists(dest):
        shutil.rmtree(dest)  # Remove existing copy
    os.makedirs(dest, exist_ok=True)

    for class_folder in os.listdir(src):
        src_class_path = os.path.join(src, class_folder)
        dest_class_path = os.path.join(dest, class_folder)
        os.makedirs(dest_class_path, exist_ok=True)

        for filename in os.listdir(src_class_path):
            img_path = os.path.join(src_class_path, filename)
            if is_valid_image(img_path):
                shutil.copy(img_path, dest_class_path)

# Create a copy of the dataset, removing corrupted images
copy_dataset(ORIGINAL_TRAIN_PATH, COPIED_TRAIN_PATH)
copy_dataset(ORIGINAL_TEST_PATH, COPIED_TEST_PATH)

# Image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load copied dataset
train_dataset = ImageFolder(root=COPIED_TRAIN_PATH, transform=transform)

# Extract features for SMOTE
def extract_features(dataset):
    data = []
    labels = []
    for image, label in dataset:
        data.append(image.view(-1).numpy())  # Flatten the image
        labels.append(label)
    return np.array(data), np.array(labels)

# Apply SMOTE
def apply_smote(dataset):
    data, labels = extract_features(dataset)
    smote = SMOTE(random_state=SEED)
    data_resampled, labels_resampled = smote.fit_resample(data, labels)
    print(f"SMOTE applied: Original count {len(labels)}, Resampled count {len(labels_resampled)}")

apply_smote(train_dataset)

SMOTE applied: Original count 707, Resampled count 776


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
from torch.utils.data import DataLoader
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tqdm import tqdm

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load Pre-trained Models with Correct Weights
resnet18 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
inception = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1)

# Remove final classification layers
resnet18 = nn.Sequential(*list(resnet18.children())[:-1])  # ResNet outputs (batch, 512, 1, 1)
inception.fc = nn.Identity()  # Remove Inception's classification layer

# Move models to device
resnet18.to(device).eval()
inception.to(device).eval()

# Define Image Transformations (InceptionNet requires 299x299 input size)
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load Training and Test Data
train_dataset = datasets.ImageFolder(root="dataset_cv/train/", transform=transform)
test_dataset = datasets.ImageFolder(root="dataset_cv/test/", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Function to extract features from both models
def extract_features(loader):
    features = []
    labels = []

    with torch.no_grad():
        for images, targets in tqdm(loader, desc="Extracting Features"):
            images = images.to(device)

            # Extract features
            resnet_features = resnet18(images).squeeze(-1).squeeze(-1)  # Shape: (batch, 512)
            inception_features = inception(images)  # Shape: (batch, 2048)

            # Flatten and concatenate features
            resnet_features = resnet_features.cpu().numpy()
            inception_features = inception_features.cpu().numpy()

            combined_features = np.hstack((resnet_features, inception_features))

            # Store features and labels
            features.extend(combined_features)
            labels.extend(targets.cpu().numpy())

    return np.array(features), np.array(labels)

# Extract features for training and testing
X_train, y_train = extract_features(train_loader)
X_test, y_test = extract_features(test_loader)

# Train Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Make Predictions
y_pred = rf_classifier.predict(X_test)

# Generate Classification Report
report = classification_report(y_test, y_pred, target_names=train_dataset.classes)
print("\nClassification Report:\n", report)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 100MB/s]
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:01<00:00, 87.2MB/s]
Extracting Features: 100%|██████████| 23/23 [07:24<00:00, 19.34s/it]
Extracting Features: 100%|██████████| 6/6 [01:51<00:00, 18.63s/it]



Classification Report:
               precision    recall  f1-score   support

      cavity       0.82      0.92      0.87        97
   no_cavity       0.88      0.76      0.82        79

    accuracy                           0.85       176
   macro avg       0.85      0.84      0.84       176
weighted avg       0.85      0.85      0.84       176

