In [None]:
import os
import shutil

# 1. Auto-detect the dataset path in Drive
possible_paths = [
    "/content/drive/MyDrive/data/fgvc-aircraft-2013b",  # Path from Notebook 1
    "/content/drive/MyDrive/fgvc-aircraft-2013b"        # Path from Notebook 2
]

source_root = None
for p in possible_paths:
    if os.path.exists(p):
        source_root = p
        print(f"‚úÖ Found dataset source at: {source_root}")
        break

if source_root is None:
    print("‚ùå CRITICAL ERROR: Could not find 'fgvc-aircraft-2013b' in Google Drive.")
    print("Please check if the folder is in 'MyDrive/data/' or just 'MyDrive/'.")
else:
    # 2. Force Wipe Local Dirs (The "Nuclear" Option)
    print("üßπ Wiping broken local directories...")
    if os.path.exists("/content/train"): shutil.rmtree("/content/train")
    if os.path.exists("/content/test"): shutil.rmtree("/content/test")

    # 3. Copy Fresh Data
    print(f"üìÇ Copying TRAIN data from {source_root}...")
    # We use full path copying to avoid nesting issues
    os.system(f"cp -r '{source_root}/train' '/content/'")

    print(f"üìÇ Copying TEST data from {source_root}...")
    os.system(f"cp -r '{source_root}/test' '/content/'")

    # 4. Cleanup Hidden Files (Crucial for ImageFolder)
    print("üßπ Cleaning hidden files...")
    os.system('find /content/train -type d -name ".ipynb_checkpoints" -exec rm -rf {} +')
    os.system('find /content/test -type d -name ".ipynb_checkpoints" -exec rm -rf {} +')

    # 5. Final Verification
    if os.path.exists("/content/train"):
        classes = [d for d in os.listdir("/content/train") if os.path.isdir(f"/content/train/{d}")]
        print(f"\nüéâ SUCCESS: Found {len(classes)} classes in /content/train")
        if len(classes) > 0:
            print(f"Example class: {classes[0]}")
            print("You can now run the DataLoader cell!")
        else:
            print("‚ö†Ô∏è WARNING: Folder exists but contains 0 classes. Check Drive path.")
    else:
        print("‚ùå ERROR: Copy failed completely.")

‚úÖ Found dataset source at: /content/drive/MyDrive/fgvc-aircraft-2013b
üßπ Wiping broken local directories...
üìÇ Copying TRAIN data from /content/drive/MyDrive/fgvc-aircraft-2013b...
üìÇ Copying TEST data from /content/drive/MyDrive/fgvc-aircraft-2013b...
üßπ Cleaning hidden files...

üéâ SUCCESS: Found 100 classes in /content/train
Example class: MD_80
You can now run the DataLoader cell!


In [None]:
# Setup Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

Device: cuda


In [None]:
import os
from PIL import Image
from pathlib import Path

def verify_and_remove_images(directory):
    print(f"Scanning {directory} for corrupted images...")
    p = Path(directory)
    # Get all .jpg and .jpeg files (case insensitive)
    files = list(p.rglob("*.[jJ][pP][gG]")) + list(p.rglob("*.[jJ][pP][eE][gG]"))

    corrupted_count = 0
    for file_path in files:
        try:
            with Image.open(file_path) as img:
                img.verify() # Verify integrity
        except (IOError, SyntaxError, Image.UnidentifiedImageError) as e:
            print(f"Bad file found: {file_path} - Removing...")
            os.remove(file_path)
            corrupted_count += 1

    print(f"Scan complete. Removed {corrupted_count} corrupted images from {directory}.\n")

# Run on both directories
verify_and_remove_images("/content/train")
verify_and_remove_images("/content/test")

Scanning /content/train for corrupted images...
Scan complete. Removed 0 corrupted images from /content/train.

Scanning /content/test for corrupted images...
Bad file found: /content/test/Gulfstream_V/1546282.jpg - Removing...
Scan complete. Removed 1 corrupted images from /content/test.



In [None]:
import torchvision.transforms as transforms
import setup_dataholders
import importlib
importlib.reload(setup_dataholders) # Ensure we have the latest version

# Define Transforms (Standard ImageNet Normalization)
manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Create DataLoaders
# Note: We are using workers=0 to avoid the deadlock issue you faced before
train_dataloader, test_dataloader, class_names = setup_dataholders.create_dataloaders(
    train_directory=local_train_dir,
    test_directory=local_test_dir,
    data_transforms=manual_transforms,
    batch_size=32,
    workers=0
)

print(f"Number of classes: {len(class_names)}")
print(f"Classes: {class_names[:10]}...") # Print first 10

Number of classes: 100
Classes: ['707_320', '727_200', '737_200', '737_300', '737_400', '737_500', '737_600', '737_700', '737_800', '737_900']...


In [None]:
import torch
import torchvision
from torch import nn
from timeit import default_timer as timer

# 1. Re-create the model (EfficientNet B0)
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
model = torchvision.models.efficientnet_b0(weights=weights)

# 2. FREEZE layers first (Feature Extraction)
for param in model.features.parameters():
    param.requires_grad = False

# 3. Add the classifier head (with Dropout for regularization)
torch.manual_seed(42)
model.classifier = nn.Sequential(
    nn.Dropout(p=0.3), # Increased dropout to prevent overfitting
    nn.Linear(in_features=1280, out_features=100) # 100 classes
)

# 4. UNFREEZE the last 20% of the base model (The "Fine-Tuning" Magic)
# This lets the model learn specific aircraft features like wing shapes
for param in model.features[-3:].parameters():
    param.requires_grad = True

# 5. Setup for Training
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

# Use a much lower learning rate for fine-tuning
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1) # Helps with hard classes
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # 10x smaller learning rate

# 6. Train!
print("Starting Fine-Tuning...")
epochs = 15  # Needs more time to adjust
for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}\n-------------------------------")
    model.train()
    train_loss, train_acc = 0, 0

    # Training Loop
    for batch, (X, y) in enumerate(train_dataloader): # Assumes train_dataloader exists
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")

# 7. Save the Improved Model
print("Saving improved model...")
torch.save(model.state_dict(), "aircraft_model.pth")
from google.colab import files
files.download("aircraft_model.pth")

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20.5M/20.5M [00:00<00:00, 141MB/s]


Starting Fine-Tuning...
Epoch 1/15
-------------------------------
Train Loss: 4.5499 | Train Acc: 0.0315
Epoch 2/15
-------------------------------
Train Loss: 4.1651 | Train Acc: 0.1824
Epoch 3/15
-------------------------------
Train Loss: 3.6168 | Train Acc: 0.3235
Epoch 4/15
-------------------------------
Train Loss: 3.1441 | Train Acc: 0.4165
Epoch 5/15
-------------------------------
Train Loss: 2.7271 | Train Acc: 0.5395
Epoch 6/15
-------------------------------
Train Loss: 2.3981 | Train Acc: 0.6199
Epoch 7/15
-------------------------------
Train Loss: 2.1161 | Train Acc: 0.7010
Epoch 8/15
-------------------------------
Train Loss: 1.8979 | Train Acc: 0.7728
Epoch 9/15
-------------------------------
Train Loss: 1.6964 | Train Acc: 0.8270
Epoch 10/15
-------------------------------
Train Loss: 1.5321 | Train Acc: 0.8655
Epoch 11/15
-------------------------------
Train Loss: 1.4248 | Train Acc: 0.8947
Epoch 12/15
-------------------------------
Train Loss: 1.3210 | Train A

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>