## Grid search code

In [None]:
import os
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd
import random
from datetime import datetime

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

class FruitCNN(nn.Module):
    def __init__(self):
        super(FruitCNN, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(128 * 12 * 12, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, 4)
        )

    def forward(self, x):
        return self.model(x)

def run_grid_search(train_path, test_path, optimizer_lr_dict, lrd_list, batch_size=16, epochs=10, seed=42):
    set_seed(seed)

    transform_train = transforms.Compose([
        transforms.Resize((100, 100)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])
    transform_test = transforms.Compose([
        transforms.Resize((100, 100)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    train_data = ImageFolder(train_path, transform=transform_train)
    test_data = ImageFolder(test_path, transform=transform_test)

    train_size = int(0.7 * len(train_data))
    val_size = len(train_data) - train_size
    train_dataset, val_dataset = random_split(train_data, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Classes:", train_data.classes)
    print("Using device:", device)

    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    os.makedirs("results", exist_ok=True)
    csv_path = f"results/results_log_{timestamp}.csv"
    report_path = f"results/classification_reports_{timestamp}.txt"

    all_results = []
    with open(report_path, "w") as report_file:
        for opt_name, lr_list in optimizer_lr_dict.items():
            for lr in lr_list:
                for lrd in lrd_list:
                    print(f"\n=== Training with {opt_name}, lr={lr}, lr_decay={lrd} ===")
                    model = FruitCNN().to(device)
                    criterion = nn.CrossEntropyLoss()

                    if opt_name == 'SGD':
                        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
                    elif opt_name == 'Adam':
                        optimizer = optim.Adam(model.parameters(), lr=lr)
                    elif opt_name == 'AdamW':
                        optimizer = optim.AdamW(model.parameters(), lr=lr)
                    else:
                        raise ValueError(f"Unknown optimizer: {opt_name}")

                    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=lrd)

                    train_acc, val_acc, train_loss, val_loss = [], [], [], []

                    for epoch in range(epochs):
                        model.train()
                        correct, total, running_loss = 0, 0, 0.0
                        for images, labels in train_loader:
                            images, labels = images.to(device), labels.to(device)
                            optimizer.zero_grad()
                            outputs = model(images)
                            loss = criterion(outputs, labels)
                            loss.backward()
                            optimizer.step()
                            running_loss += loss.item()
                            _, predicted = torch.max(outputs, 1)
                            correct += (predicted == labels).sum().item()
                            total += labels.size(0)
                        train_acc.append(correct / total)
                        train_loss.append(running_loss)

                        model.eval()
                        val_correct, val_total, val_running_loss = 0, 0, 0.0
                        with torch.no_grad():
                            for val_images, val_labels in val_loader:
                                val_images, val_labels = val_images.to(device), val_labels.to(device)
                                val_outputs = model(val_images)
                                val_loss_batch = criterion(val_outputs, val_labels)
                                val_running_loss += val_loss_batch.item()
                                _, val_preds = torch.max(val_outputs, 1)
                                val_total += val_labels.size(0)
                                val_correct += (val_preds == val_labels).sum().item()
                        val_acc.append(val_correct / val_total)
                        val_loss.append(val_running_loss)
                        scheduler.step()

                        print(f"Epoch {epoch+1}, Train Acc: {train_acc[-1]:.4f}, Val Acc: {val_acc[-1]:.4f}")

                    model.eval()
                    y_true, y_pred = [], []
                    with torch.no_grad():
                        for images, labels in test_loader:
                            images = images.to(device)
                            outputs = model(images)
                            _, preds = torch.max(outputs, 1)
                            y_pred.extend(preds.cpu().numpy())
                            y_true.extend(labels.numpy())

                    report = classification_report(y_true, y_pred, target_names=train_data.classes)
                    print("\nClassification Report:")
                    print(report)

                    report_file.write(
                        f"=== Optimizer: {opt_name}, LR: {lr}, Decay: {lrd} ===\n{report}\n\n"
                    )

                    all_results.append({
                        'optimizer': opt_name,
                        'lr': lr,
                        'lr_decay': lrd,
                        'final_train_acc': train_acc[-1],
                        'final_val_acc': val_acc[-1],
                        'final_train_loss': train_loss[-1],
                        'final_val_loss': val_loss[-1]
                    })

                    del model
                    torch.cuda.empty_cache()

    pd.DataFrame(all_results).to_csv(csv_path, index=False)
    print(f"\nSaved training results to {csv_path}")
    print(f"Saved classification reports to {report_path}")


# Configuration
optimizer_lr_dict = {
    'SGD': [0.1, 0.05, 0.01, 0.001, 0.0001],
    'Adam': [0.01, 0.005, 0.001, 0.0005, 0.0001],
    'AdamW': [0.01, 0.005, 0.001, 0.0005, 0.0001]
}
lrd_list = [0.9, 0.7, 0.5]

# Run
run_grid_search(
    train_path="../train",
    test_path="../test",
    optimizer_lr_dict=optimizer_lr_dict,
    lrd_list=lrd_list,
    batch_size=32,
    epochs=15,
    seed=42
)


Classes: ['apple', 'banana', 'mixed', 'orange']
Using device: cpu

=== Training with SGD, lr=0.1, lr_decay=0.9 ===
Epoch 1, Train Acc: 0.3054, Val Acc: 0.3792
Epoch 2, Train Acc: 0.4375, Val Acc: 0.2542
Epoch 3, Train Acc: 0.3071, Val Acc: 0.2583
Epoch 4, Train Acc: 0.2554, Val Acc: 0.2875
Epoch 5, Train Acc: 0.2964, Val Acc: 0.2542
Epoch 6, Train Acc: 0.2625, Val Acc: 0.2750
Epoch 7, Train Acc: 0.2393, Val Acc: 0.2208
Epoch 8, Train Acc: 0.2768, Val Acc: 0.2208
Epoch 9, Train Acc: 0.2393, Val Acc: 0.2750
Epoch 10, Train Acc: 0.2411, Val Acc: 0.2208
Epoch 11, Train Acc: 0.2375, Val Acc: 0.2208
Epoch 12, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 13, Train Acc: 0.2214, Val Acc: 0.2542
Epoch 14, Train Acc: 0.2571, Val Acc: 0.2208
Epoch 15, Train Acc: 0.2250, Val Acc: 0.2208


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.30      1.00      0.46        18

    accuracy                           0.30        60
   macro avg       0.07      0.25      0.12        60
weighted avg       0.09      0.30      0.14        60


=== Training with SGD, lr=0.1, lr_decay=0.7 ===
Epoch 1, Train Acc: 0.2911, Val Acc: 0.2750
Epoch 2, Train Acc: 0.2839, Val Acc: 0.2500
Epoch 3, Train Acc: 0.3411, Val Acc: 0.3958
Epoch 4, Train Acc: 0.4268, Val Acc: 0.4125
Epoch 5, Train Acc: 0.3446, Val Acc: 0.2500
Epoch 6, Train Acc: 0.2875, Val Acc: 0.3333
Epoch 7, Train Acc: 0.2482, Val Acc: 0.2667
Epoch 8, Train Acc: 0.3679, Val Acc: 0.3292
Epoch 9, Train Acc: 0.3536, Val Acc: 0.2708
Epoch 10, Train Acc: 0.2929, Val Acc: 0.3250
Epoch 11, Train Acc: 0.3893, Val Acc: 0.4625
Epoch 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.32      1.00      0.48        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.00      0.00      0.00        18

    accuracy                           0.32        60
   macro avg       0.08      0.25      0.12        60
weighted avg       0.10      0.32      0.15        60


=== Training with SGD, lr=0.05, lr_decay=0.9 ===
Epoch 1, Train Acc: 0.3357, Val Acc: 0.3875
Epoch 2, Train Acc: 0.4250, Val Acc: 0.2500
Epoch 3, Train Acc: 0.3304, Val Acc: 0.3417
Epoch 4, Train Acc: 0.3179, Val Acc: 0.3583
Epoch 5, Train Acc: 0.3232, Val Acc: 0.2208
Epoch 6, Train Acc: 0.2875, Val Acc: 0.3625
Epoch 7, Train Acc: 0.3643, Val Acc: 0.3917
Epoch 8, Train Acc: 0.4554, Val Acc: 0.6250
Epoch 9, Train Acc: 0.3804, Val Acc: 0.2583
Epoch 10, Train Acc: 0.4232, Val Acc: 0.4583
Epoch 11, Train Acc: 0.5089, Val Acc: 0.5750
Epoch

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.94      0.89      0.92        19
      banana       0.00      0.00      0.00        18
       mixed       0.14      1.00      0.24         5
      orange       0.80      0.22      0.35        18

    accuracy                           0.43        60
   macro avg       0.47      0.53      0.38        60
weighted avg       0.55      0.43      0.42        60


=== Training with SGD, lr=0.05, lr_decay=0.7 ===
Epoch 1, Train Acc: 0.3089, Val Acc: 0.4042
Epoch 2, Train Acc: 0.4054, Val Acc: 0.5292
Epoch 3, Train Acc: 0.4321, Val Acc: 0.3417
Epoch 4, Train Acc: 0.4411, Val Acc: 0.5792
Epoch 5, Train Acc: 0.3000, Val Acc: 0.3542
Epoch 6, Train Acc: 0.4036, Val Acc: 0.2208
Epoch 7, Train Acc: 0.3768, Val Acc: 0.5000
Epoch 8, Train Acc: 0.4571, Val Acc: 0.6083
Epoch 9, Train Acc: 0.4857, Val Acc: 0.5083
Epoch 10, Train Acc: 0.5821, Val Acc: 0.5833
Epoch 11, Train Acc: 0.6304, Val Acc: 0.5583
Epoch

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.10      1.00      0.19         5
      orange       0.36      0.22      0.28        18

    accuracy                           0.15        60
   macro avg       0.12      0.31      0.12        60
weighted avg       0.12      0.15      0.10        60


=== Training with SGD, lr=0.0001, lr_decay=0.7 ===
Epoch 1, Train Acc: 0.2714, Val Acc: 0.2208
Epoch 2, Train Acc: 0.2786, Val Acc: 0.2208
Epoch 3, Train Acc: 0.2714, Val Acc: 0.2208
Epoch 4, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 5, Train Acc: 0.2679, Val Acc: 0.2208
Epoch 6, Train Acc: 0.2607, Val Acc: 0.2208
Epoch 7, Train Acc: 0.2661, Val Acc: 0.2208
Epoch 8, Train Acc: 0.2714, Val Acc: 0.2208
Epoch 9, Train Acc: 0.2696, Val Acc: 0.2208
Epoch 10, Train Acc: 0.2679, Val Acc: 0.2208
Epoch 11, Train Acc: 0.2750, Val Acc: 0.2208
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.30      1.00      0.46        18

    accuracy                           0.30        60
   macro avg       0.07      0.25      0.12        60
weighted avg       0.09      0.30      0.14        60


=== Training with SGD, lr=0.0001, lr_decay=0.5 ===
Epoch 1, Train Acc: 0.2375, Val Acc: 0.2750
Epoch 2, Train Acc: 0.2446, Val Acc: 0.2750
Epoch 3, Train Acc: 0.2446, Val Acc: 0.2750
Epoch 4, Train Acc: 0.2625, Val Acc: 0.2750
Epoch 5, Train Acc: 0.3054, Val Acc: 0.2917
Epoch 6, Train Acc: 0.3107, Val Acc: 0.3083
Epoch 7, Train Acc: 0.2768, Val Acc: 0.3167
Epoch 8, Train Acc: 0.3054, Val Acc: 0.3208
Epoch 9, Train Acc: 0.3089, Val Acc: 0.3333
Epoch 10, Train Acc: 0.3429, Val Acc: 0.3167
Epoch 11, Train Acc: 0.3357, Val Acc: 0.3417
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       1.00      0.11      0.19        19
      banana       0.32      0.61      0.42        18
       mixed       0.00      0.00      0.00         5
      orange       0.46      0.61      0.52        18

    accuracy                           0.40        60
   macro avg       0.45      0.33      0.28        60
weighted avg       0.55      0.40      0.34        60


=== Training with Adam, lr=0.01, lr_decay=0.9 ===
Epoch 1, Train Acc: 0.2589, Val Acc: 0.2375
Epoch 2, Train Acc: 0.2446, Val Acc: 0.2208
Epoch 3, Train Acc: 0.2214, Val Acc: 0.2125
Epoch 4, Train Acc: 0.2911, Val Acc: 0.2208
Epoch 5, Train Acc: 0.2571, Val Acc: 0.2208
Epoch 6, Train Acc: 0.2696, Val Acc: 0.2208
Epoch 7, Train Acc: 0.2661, Val Acc: 0.2208
Epoch 8, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 9, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 10, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 11, Train Acc: 0.2661, Val Acc: 0.2208
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.30      1.00      0.46        18

    accuracy                           0.30        60
   macro avg       0.07      0.25      0.12        60
weighted avg       0.09      0.30      0.14        60


=== Training with Adam, lr=0.01, lr_decay=0.7 ===
Epoch 1, Train Acc: 0.2732, Val Acc: 0.2542
Epoch 2, Train Acc: 0.2482, Val Acc: 0.2542
Epoch 3, Train Acc: 0.2482, Val Acc: 0.2542
Epoch 4, Train Acc: 0.2482, Val Acc: 0.2542
Epoch 5, Train Acc: 0.2446, Val Acc: 0.2208
Epoch 6, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 7, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 8, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 9, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 10, Train Acc: 0.2625, Val Acc: 0.2208
Epoch 11, Train Acc: 0.2625, Val Acc: 0.2208
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.30      1.00      0.46        18

    accuracy                           0.30        60
   macro avg       0.07      0.25      0.12        60
weighted avg       0.09      0.30      0.14        60


=== Training with Adam, lr=0.01, lr_decay=0.5 ===
Epoch 1, Train Acc: 0.3304, Val Acc: 0.4333
Epoch 2, Train Acc: 0.5161, Val Acc: 0.4958
Epoch 3, Train Acc: 0.5732, Val Acc: 0.7333
Epoch 4, Train Acc: 0.6857, Val Acc: 0.7542
Epoch 5, Train Acc: 0.7286, Val Acc: 0.7917
Epoch 6, Train Acc: 0.7768, Val Acc: 0.8042
Epoch 7, Train Acc: 0.8161, Val Acc: 0.8000
Epoch 8, Train Acc: 0.8268, Val Acc: 0.7792
Epoch 9, Train Acc: 0.7821, Val Acc: 0.7500
Epoch 10, Train Acc: 0.8375, Val Acc: 0.7792
Epoch 11, Train Acc: 0.8304, Val Acc: 0.8167
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.30      1.00      0.46        18

    accuracy                           0.30        60
   macro avg       0.07      0.25      0.12        60
weighted avg       0.09      0.30      0.14        60


=== Training with Adam, lr=0.005, lr_decay=0.7 ===
Epoch 1, Train Acc: 0.2393, Val Acc: 0.2625
Epoch 2, Train Acc: 0.2946, Val Acc: 0.2542
Epoch 3, Train Acc: 0.3071, Val Acc: 0.2917
Epoch 4, Train Acc: 0.3500, Val Acc: 0.2708
Epoch 5, Train Acc: 0.4589, Val Acc: 0.5250
Epoch 6, Train Acc: 0.5321, Val Acc: 0.6167
Epoch 7, Train Acc: 0.5893, Val Acc: 0.5917
Epoch 8, Train Acc: 0.6250, Val Acc: 0.5458
Epoch 9, Train Acc: 0.5964, Val Acc: 0.6167
Epoch 10, Train Acc: 0.6696, Val Acc: 0.6583
Epoch 11, Train Acc: 0.7089, Val Acc: 0.6458
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report:
              precision    recall  f1-score   support

       apple       0.00      0.00      0.00        19
      banana       0.00      0.00      0.00        18
       mixed       0.00      0.00      0.00         5
      orange       0.30      1.00      0.46        18

    accuracy                           0.30        60
   macro avg       0.07      0.25      0.12        60
weighted avg       0.09      0.30      0.14        60


=== Training with AdamW, lr=0.005, lr_decay=0.9 ===
Epoch 1, Train Acc: 0.2839, Val Acc: 0.2292
Epoch 2, Train Acc: 0.3857, Val Acc: 0.4750
Epoch 3, Train Acc: 0.4964, Val Acc: 0.5458
Epoch 4, Train Acc: 0.5893, Val Acc: 0.6667
Epoch 5, Train Acc: 0.6304, Val Acc: 0.6792
Epoch 6, Train Acc: 0.6821, Val Acc: 0.7375
Epoch 7, Train Acc: 0.7268, Val Acc: 0.7875
Epoch 8, Train Acc: 0.7679, Val Acc: 0.8000
Epoch 9, Train Acc: 0.7554, Val Acc: 0.7750
Epoch 10, Train Acc: 0.8000, Val Acc: 0.8167
Epoch 11, Train Acc: 0.8054, Val Acc: 0.8208
Ep

# Code Rationale

| Component       | Purpose                                   |
| --------------- | ----------------------------------------- |
| 15 Epochs       | Enough for small dataset; avoids overfit  |
| Resize(100x100) | Standardizes input size                   |
| Flip + Rotate   | Augments data for better generalization   |
| Normalize       | Speeds up and stabilizes learning         |
| CNN Layers      | Extract low-to-high level visual features |
| Dropout         | Regularization to reduce overfitting      |
| Fully Connected | Decision making for classification        |


# Model Training & Evaluation Summary

---

## Model Training Observations

- **Epochs**: 15  
- **Initial Accuracy**: 45.83%  
- **Final Training Accuracy**: 97.92%  
- **Training Loss**: Decreased from 9.78 to approximately 0.45

### Trend:
- The model shows steady learning and convergence.
- Accuracy and loss improvements indicate effective training and model fit.

---

## Test Set Performance

| Class   | Precision | Recall | F1-score | Support |
|---------|-----------|--------|----------|---------|
| Apple   | 0.90      | 1.00   | 0.95     | 19      |
| Banana  | 0.80      | 0.89   | 0.84     | 18      |
| Mixed   | 0.00      | 0.00   | 0.00     | 5       |
| Orange  | 0.89      | 0.94   | 0.92     | 18      |

- Apple and Orange were classified very well.
- Mixed class was completely misclassified — the model made no correct predictions.
- Overall test accuracy was 87%.

---

## Warnings and Issues

### PIL Warning:
`Palette images with Transparency expressed in bytes should be converted to RGBA images`  
Some images (e.g., `.png` or `.gif`) contain transparency and should be explicitly converted to RGBA to ensure proper processing.

### UndefinedMetricWarning from sklearn:
`Precision is ill-defined and being set to 0.0 in labels with no predicted samples.`  
This occurs because the model never predicted the Mixed class, leading to undefined precision and recall values for that class.

---

## Overall Performance Summary

- **Final Test Accuracy**: 87%
- **Macro Average F1-score**: 0.68 (lower due to poor performance on Mixed)
- **Weighted Average F1-score**: 0.83 (heavily influenced by Apple and Orange)

---

## Recommendations

1. Check class distribution in the training set to ensure the Mixed class is not underrepresented.
2. Add more training examples for the Mixed class or apply data augmentation.
3. Consider using class weighting in the loss function to compensate for class imbalance.
4. Plot a confusion matrix to understand where the model is confusing Mixed with other classes.
5. Ensure all images are correctly formatted and converted to RGB or RGBA where necessary.

---


# Why a 3-Layer CNN Architecture Was Chosen

---

## 1. Progressive Feature Extraction

- **Layer 1** learns basic features such as edges and textures.
- **Layer 2** identifies more complex patterns like shapes and contours.
- **Layer 3** extracts high-level, abstract features (e.g., outlines or combinations of shapes).
- This hierarchy allows the model to understand images from simple to complex representations.

---

## 2. Suitable for Simple Visual Categories

- The dataset involves fruits, which have **distinct colors, textures, and shapes**.
- The resized image dimension is **100×100**, which is relatively low.
- A deeper architecture would be overkill and may introduce unnecessary complexity.

---

## 3. Balanced Depth to Prevent Overfitting

- **Too shallow (1–2 layers)**: May underfit and miss important patterns.
- **Too deep (5+ layers)**: May overfit or require more data and compute.
- **3 layers** is a balanced choice, offering enough capacity to learn without overfitting.

---

## 4. Efficient Feature Map Reduction

- Input size: **100×100**
- After 3 `MaxPool2d(2)` layers:
  - Output size reduces as follows: `100 → 50 → 25 → 12`
- The final feature maps are small and efficient to flatten for fully connected layers.

---

## 5. Proven Practical Effectiveness

- 3-layer CNNs perform well on small to medium image datasets (e.g., MNIST, CIFAR-10).
- Ideal for classification tasks with a **limited number of classes**.
- Fast to train, interpretable, and good for prototyping or educational use.

---

## Summary

| Reason                            | Explanation                                                                 |
|-----------------------------------|-----------------------------------------------------------------------------|
| Hierarchical feature learning     | Captures visual patterns from edges to object shapes                        |
| Appropriate model depth           | Deep enough to learn, but avoids unnecessary complexity                     |
| Reduces overfitting risk          | Suitable depth for datasets with limited samples per class                  |
| Efficient for 100×100 images      | Spatial dimensions reduce nicely through pooling                            |
| Fast and effective                | Trains quickly, works well for fruit classification tasks                   |

---


# Recommended Number of Training Images and Rationale

---

## Recommended Number of Images per Class

| Class     | Minimum Recommended | Ideal Target | Rationale |
|-----------|---------------------|--------------|-----------|
| Apple     | ≥ 100               | 200–500      | Performs well; more data helps improve generalization. |
| Banana    | ≥ 100               | 200–500      | Decent performance; more examples improve robustness. |
| Mixed     | ≥ 200               | 300–600+     | Currently underperforms; needs significantly more data. |
| Orange    | ≥ 100               | 200–500      | Strong baseline; should maintain class balance. |

---

## Justifications and Rationale

### 1. Preventing Class Imbalance

- The `Mixed` class fails due to likely underrepresentation.
- Adding more examples ensures balanced training and fairer model attention.
- Balanced datasets reduce bias and improve classification accuracy across all classes.

### 2. Enhancing Generalization

- CNNs require visual variety (angle, lighting, background) to generalize.
- Small datasets (<100/class) often cause overfitting — the model memorizes instead of learning patterns.
- 300–500 images per class offer enough variability for a simple CNN to generalize well.

### 3. Data vs Model Complexity

- Your model is a **3-layer CNN**, which is relatively simple and data-efficient.
- Such models typically perform well with 200–500 images per class, especially when combined with data augmentation.

### 4. Empirical Evidence

- Datasets like CIFAR-10 and Flowers102 use ~500+ images/class for good performance.
- Deeper models like ResNet often need more data, but shallower models benefit greatly from just 300–600/class.

---

## Summary Recommendation

| Class Type           | Minimum (per class) | Ideal (per class) | Priority     |
|----------------------|----------------------|-------------------|--------------|
| Well-performing      | 100–150              | 300–500           | Medium       |
| Mid-performing       | 100–200              | 300–500           | Medium       |
| Underperforming      | 200–300              | 400–600+          | High (focus) |

> Aim for **~1500–2000 total images**, with **additional focus on the 'Mixed' class**.

---

## Next Step

Consider using data augmentation or collecting more labeled images. This will enhance the model’s ability to generalize and improve its accuracy across all classes.

---
