## **Required Imports**

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os
import time
import copy
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
import numpy as np
from PIL import Image
import torch.nn.functional as F

## **Configuration**

In [9]:
DATA_DIR = '/kaggle/input/my-mc-defect-dataset/my_defect_dataset' 

BATCH_SIZE = 32
NUM_EPOCHS = 25 
LEARNING_RATE = 0.001
MODEL_SAVE_PATH = 'best_defect_classifier_multi_category.pth'

# --- 1. Define Image Transformations ---
# ImageNet statistics for normalization
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

# Transformations for the training set (with data augmentation)
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)), # Random crop to 224x224, varying scale
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15), # Rotate by +/- 15 degrees
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05), # Random color jitter
    transforms.ToTensor(), # Convert PIL Image to PyTorch Tensor
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD) # Normalize pixels
])

# Transformations for the validation and test sets (no augmentation, only resizing and normalization)
val_test_transforms = transforms.Compose([
    transforms.Resize(256), # Resize to 256
    transforms.CenterCrop(224), # Crop the center 224x224
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

# --- 2. Create Dataset and DataLoader ---
print("Loading datasets...")

# Create dictionaries to hold datasets and dataloaders
image_datasets = {
    'train': datasets.ImageFolder(os.path.join(DATA_DIR, 'train'), train_transforms),
    'val': datasets.ImageFolder(os.path.join(DATA_DIR, 'val'), val_test_transforms),
    'test': datasets.ImageFolder(os.path.join(DATA_DIR, 'test'), val_test_transforms)
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=BATCH_SIZE, shuffle=False, num_workers=4),
    'test': DataLoader(image_datasets['test'], batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes # This will be ['defective_product', 'good_product'] or vice versa
num_classes = len(class_names)

print(f"Dataset sizes: {dataset_sizes}")
print(f"Class names: {class_names}")

# Map class names to their numerical labels (e.g., 'good_product': 0, 'defective_product': 1)
class_to_idx = image_datasets['train'].class_to_idx
print(f"Class to index mapping: {class_to_idx}")

# --- 3. Define Model Architecture (Transfer Learning) ---
print("\nSetting up model...")

# Use GPU if available, else CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load a pre-trained ResNet50 model
# We use ResNet50_Weights.IMAGENET1K_V1 for consistency with common practices
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

# Freeze all parameters in the pre-trained model (optional, but good for initial training)
# This means only the final layer's weights will be updated
for param in model.parameters():
    param.requires_grad = False

# Get the number of in_features for the last fully connected layer
num_ftrs = model.fc.in_features
# Replace the classifier head with a new one for our 2 classes
model.fc = nn.Linear(num_ftrs, num_classes)

# Move the model to the selected device (GPU or CPU)
model = model.to(device)

# --- 4. Set up Loss Function and Optimizer ---
# CrossEntropyLoss automatically handles softmax and NLLLoss internally
criterion = nn.CrossEntropyLoss()

# Only optimize the parameters of the new (unfrozen) final layer
optimizer = optim.Adam(model.fc.parameters(), lr=LEARNING_RATE)

print("Setup complete. Ready for training.")

Loading datasets...
Dataset sizes: {'train': 405, 'val': 276, 'test': 84}
Class names: ['defective_product', 'good_product']
Class to index mapping: {'defective_product': 0, 'good_product': 1}

Setting up model...
Using device: cuda:0


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 177MB/s] 


Setup complete. Ready for training.


## **Training and Validation**

In [10]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=NUM_EPOCHS):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                # Track history only in train phase
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model if it's the best validation accuracy
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        
        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

# Call the training function
model_ft = train_model(model, dataloaders, criterion, optimizer, num_epochs=NUM_EPOCHS)

# Save the best model
torch.save(model_ft.state_dict(), MODEL_SAVE_PATH)
print(f"Model saved to: {MODEL_SAVE_PATH}")

Epoch 1/25
----------
train Loss: 0.3204 Acc: 0.8864
val Loss: 0.3967 Acc: 0.9022

Epoch 2/25
----------
train Loss: 0.2426 Acc: 0.9333
val Loss: 0.3880 Acc: 0.9022

Epoch 3/25
----------
train Loss: 0.2411 Acc: 0.9333
val Loss: 0.3451 Acc: 0.9022

Epoch 4/25
----------
train Loss: 0.2268 Acc: 0.9333
val Loss: 0.3111 Acc: 0.9022

Epoch 5/25
----------
train Loss: 0.2308 Acc: 0.9333
val Loss: 0.3244 Acc: 0.9022

Epoch 6/25
----------
train Loss: 0.2264 Acc: 0.9333
val Loss: 0.3338 Acc: 0.9022

Epoch 7/25
----------
train Loss: 0.1973 Acc: 0.9333
val Loss: 0.3286 Acc: 0.9022

Epoch 8/25
----------
train Loss: 0.2016 Acc: 0.9358
val Loss: 0.3055 Acc: 0.9022

Epoch 9/25
----------
train Loss: 0.1984 Acc: 0.9333
val Loss: 0.2954 Acc: 0.9058

Epoch 10/25
----------
train Loss: 0.2088 Acc: 0.9284
val Loss: 0.3441 Acc: 0.8913

Epoch 11/25
----------
train Loss: 0.2259 Acc: 0.9333
val Loss: 0.3249 Acc: 0.9094

Epoch 12/25
----------
train Loss: 0.1963 Acc: 0.9309
val Loss: 0.2989 Acc: 0.9022

E

## **Model Evaluation**

In [11]:
# Define the model architecture again to load weights into it
model_ft_eval = models.resnet50(weights=None) # No pre-trained weights needed here
num_ftrs = model_ft_eval.fc.in_features
model_ft_eval.fc = nn.Linear(num_ftrs, num_classes)

# Load the saved best weights
model_ft_eval.load_state_dict(torch.load(MODEL_SAVE_PATH))
model_ft_eval = model_ft_eval.to(device)

def evaluate_model(model, dataloader, class_names):
    model.eval()  # Set model to evaluation mode
    all_preds = []
    all_labels = []

    print("\nStarting final evaluation on test set...")
    with torch.no_grad():
        for inputs, labels in dataloader['test']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    cm = confusion_matrix(all_labels, all_preds)
    
    # Precision, Recall, F1-score per class
    precision, recall, f1_score, _ = precision_recall_fscore_support(all_labels, all_preds, average=None, labels=np.arange(len(class_names)))
    
    print("\n--- Evaluation Results on Test Set ---")
    print("Confusion Matrix:")
    print(cm)
    
    print("\nMetrics per class:")
    for i, class_name in enumerate(class_names):
        print(f"Class: {class_name}")
        print(f"  Precision: {precision[i]:.4f}")
        print(f"  Recall:    {recall[i]:.4f}")
        print(f"  F1-score:  {f1_score[i]:.4f}")

    # Overall accuracy
    overall_accuracy = np.sum(np.array(all_preds) == np.array(all_labels)) / len(all_labels)
    print(f"\nOverall Test Accuracy: {overall_accuracy:.4f}")

# Call the evaluation function on the best model
evaluate_model(model_ft_eval, dataloaders, class_names)


Starting final evaluation on test set...

--- Evaluation Results on Test Set ---
Confusion Matrix:
[[ 0 24]
 [ 0 60]]

Metrics per class:
Class: defective_product
  Precision: 0.0000
  Recall:    0.0000
  F1-score:  0.0000
Class: good_product
  Precision: 0.7143
  Recall:    1.0000
  F1-score:  0.8333

Overall Test Accuracy: 0.7143


  _warn_prf(average, modifier, msg_start, len(result))


## **Model Retraining**

In [12]:
print("Recalculating weights for imbalanced classes...")

# Calculate class weights
# We can use the train dataset to get a representative count
train_counts = [0] * num_classes
for _, label in dataloaders['train'].dataset.samples:
    train_counts[label] += 1

total_samples = sum(train_counts)
class_weights = total_samples / torch.Tensor(train_counts)

# Invert the weights for CrossEntropyLoss (higher weight for smaller class)
# Let's say defective_product is index 0 and good_product is index 1
# This ensures that the loss for a defective product is penalized more
class_weights = class_weights.to(device)

print(f"Original class counts: {train_counts}")
print(f"Calculated class weights: {class_weights}")

# Use weighted CrossEntropyLoss
criterion_weighted = nn.CrossEntropyLoss(weight=class_weights)

# --- Fine-tuning the entire network ---
print("\nUnfreezing all layers for fine-tuning...")

# Unfreeze all model parameters
for param in model.parameters():
    param.requires_grad = True

# Now, we will optimize ALL parameters, but with a much lower learning rate
# This helps prevent catastrophic forgetting of the pre-trained weights
optimizer_fine_tune = optim.Adam(model.parameters(), lr=0.0001)

# --- Re-run the Training Loop with the new setup ---

print("\nStarting fine-tuning...")
NUM_EPOCHS_FINE_TUNE = 15 # Train for fewer epochs now
model_ft_tuned = train_model(model, dataloaders, criterion_weighted, optimizer_fine_tune, num_epochs=NUM_EPOCHS_FINE_TUNE)

# Save the newly fine-tuned model
torch.save(model_ft_tuned.state_dict(), 'best_fine_tuned_classifier.pth')
print(f"Fine-tuned model saved to: best_fine_tuned_classifier.pth")

# --- Final Evaluation on the Fine-Tuned Model ---

# We need to reload the model with the best fine-tuned weights
model_ft_tuned.load_state_dict(torch.load('best_fine_tuned_classifier.pth'))

# Run the evaluation function
evaluate_model(model_ft_tuned, dataloaders, class_names)

Recalculating weights for imbalanced classes...
Original class counts: [27, 378]
Calculated class weights: tensor([15.0000,  1.0714], device='cuda:0')

Unfreezing all layers for fine-tuning...

Starting fine-tuning...
Epoch 1/15
----------
train Loss: 1.0597 Acc: 0.7457
val Loss: 0.7887 Acc: 0.7065

Epoch 2/15
----------
train Loss: 0.5778 Acc: 0.6840
val Loss: 0.5341 Acc: 0.7464

Epoch 3/15
----------
train Loss: 0.4736 Acc: 0.7062
val Loss: 0.5717 Acc: 0.7609

Epoch 4/15
----------
train Loss: 0.4321 Acc: 0.7481
val Loss: 0.4921 Acc: 0.8043

Epoch 5/15
----------
train Loss: 0.3811 Acc: 0.7012
val Loss: 0.5532 Acc: 0.7500

Epoch 6/15
----------
train Loss: 0.4765 Acc: 0.8543
val Loss: 0.5260 Acc: 0.7101

Epoch 7/15
----------
train Loss: 0.3335 Acc: 0.7333
val Loss: 0.5836 Acc: 0.7428

Epoch 8/15
----------
train Loss: 0.3823 Acc: 0.8173
val Loss: 0.6543 Acc: 0.6993

Epoch 9/15
----------
train Loss: 0.3950 Acc: 0.8370
val Loss: 0.7962 Acc: 0.6775

Epoch 10/15
----------
train Loss: 

## **Model Testing**

In [13]:
# Path to a sample image from your test set (e.g., a defective product)
sample_image_path = '/kaggle/input/my-mc-defect-dataset/my_defect_dataset/test/defective_product/000.png' 

# Load the saved model
loaded_model = models.resnet50(weights=None)
num_ftrs = loaded_model.fc.in_features
loaded_model.fc = nn.Linear(num_ftrs, num_classes)
loaded_model.load_state_dict(torch.load('best_fine_tuned_classifier.pth'))
loaded_model = loaded_model.to(device)
loaded_model.eval()

# The same transformations used for the test set must be applied to new images
inference_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

def predict_image(image_path, model, transform, class_names):
    """
    Predicts the class of a single image.
    
    Args:
        image_path (str): The path to the image file.
        model (nn.Module): The trained model.
        transform (transforms.Compose): The image transformations.
        class_names (list): The list of class names.
    
    Returns:
        tuple: The predicted class name and confidence score.
    """
    # Load the image
    image = Image.open(image_path).convert('RGB')
    
    # Apply transformations and add a batch dimension
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    # Make a prediction
    with torch.no_grad():
        output = model(image_tensor)
        
    # Get the confidence score and predicted class
    probabilities = F.softmax(output, dim=1)
    confidence, predicted_idx = torch.max(probabilities, 1)
    predicted_class = class_names[predicted_idx.item()]
    
    return predicted_class, confidence.item()

# Run the prediction on the sample image
predicted_class, confidence = predict_image(sample_image_path, loaded_model, inference_transforms, class_names)

print(f"Sample Image Path: {sample_image_path}")
print(f"Predicted Class: {predicted_class}")
print(f"Confidence: {confidence:.4f}")

Sample Image Path: /kaggle/input/my-mc-defect-dataset/my_defect_dataset/test/defective_product/000.png
Predicted Class: good_product
Confidence: 0.8028
