<a href="https://colab.research.google.com/github/ericodle/J_PlanktoNET/blob/main/VGG16_finetune_2023July19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os

from google.colab import drive
drive.mount('/content/drive')

# Set the path to your dataset
train_dir = "/content/drive/MyDrive/20230613_Tanchaexamples"
model_path = "/content/drive/MyDrive/pua_path.pth"
unsorted_data_dir = "/content/drive/MyDrive/D20230613T060932"
output_dir = "/content/drive/MyDrive/puafolder"

# Set the batch size for training
batch_size = 32

# Set the image size for resizing and normalization
image_size = 224

# Define the transformations for the training set
data_transforms = transforms.Compose([
    transforms.RandomResizedCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[:, :-12, :]),  # Crop off bottom 12 pixels
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the dataset with ImageFolder
train_dataset = datasets.ImageFolder(
    root=train_dir,
    transform=data_transforms
)

# Create a dataloader for the training set
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=4
)

# Specify the number of classes
num_classes = len(train_dataset.classes)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




In [3]:
# Load the pre-trained VGG16 model
model = models.vgg16(pretrained=True)

# Modify the output layer
model.classifier[6] = nn.Linear(4096, num_classes)

# Freeze initial layers
for param in model.features.parameters():
    param.requires_grad = False

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Move the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()

    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in train_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)

    print('Epoch {}: Loss: {:.4f} Acc: {:.4f}'.format(epoch+1, epoch_loss, epoch_acc))

torch.save(model.state_dict(), model_path)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:05<00:00, 104MB/s]


Epoch 1: Loss: 1.6639 Acc: 0.1969
Epoch 2: Loss: 1.2434 Acc: 0.5748
Epoch 3: Loss: 1.0286 Acc: 0.6299
Epoch 4: Loss: 0.9817 Acc: 0.6378
Epoch 5: Loss: 0.8132 Acc: 0.7008
Epoch 6: Loss: 0.7309 Acc: 0.7402
Epoch 7: Loss: 0.6431 Acc: 0.7953
Epoch 8: Loss: 0.6076 Acc: 0.7953
Epoch 9: Loss: 0.7166 Acc: 0.7244
Epoch 10: Loss: 0.6082 Acc: 0.8268
Epoch 11: Loss: 0.4970 Acc: 0.8504
Epoch 12: Loss: 0.4865 Acc: 0.8819
Epoch 13: Loss: 0.5194 Acc: 0.8661
Epoch 14: Loss: 0.4656 Acc: 0.8661
Epoch 15: Loss: 0.4943 Acc: 0.8504
Epoch 16: Loss: 0.4704 Acc: 0.8268
Epoch 17: Loss: 0.4642 Acc: 0.8661
Epoch 18: Loss: 0.3614 Acc: 0.8898
Epoch 19: Loss: 0.3963 Acc: 0.8819
Epoch 20: Loss: 0.4638 Acc: 0.8661


In [12]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os


# Set the image size for resizing and normalization
image_size = 224

# Define the transformation for the unsorted images
unsorted_data_transforms = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[:, :-15, :]),  # Crop off bottom 15 pixels
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the saved model
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(4096, num_classes)  # Replace with the appropriate number of classes
model.load_state_dict(torch.load(model_path))
model.eval()

# Move the model to the appropriate device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Load the training dataset to retrieve the class names
train_dataset = datasets.ImageFolder(root=train_dir, transform=unsorted_data_transforms)

# Retrieve the class names from the training dataset
class_names = train_dataset.classes

os.makedirs(output_dir, exist_ok=True)

# Iterate over the unsorted images
for filename in os.listdir(unsorted_data_dir):
    image_path = os.path.join(unsorted_data_dir, filename)
    img = Image.open(image_path)
    img = unsorted_data_transforms(img).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(img)
        _, preds = torch.max(outputs, 1)
        predicted_class = preds.item()

    # Get the class name from the fine-tuning dataset
    predicted_class_name = class_names[predicted_class]

    # Create the target class directory if it doesn't exist
    target_class_dir = os.path.join(output_dir, predicted_class_name)
    os.makedirs(target_class_dir, exist_ok=True)

    # Move the image to the corresponding class directory
    target_image_path = os.path.join(target_class_dir, filename)
    os.rename(image_path, target_image_path)

    print(f"Image {filename} moved to class {predicted_class_name}")




Image D20230613T060932_IFCB108_00459.png moved to class Junk
Image D20230613T060932_IFCB108_00584.png moved to class Junk
Image D20230613T060932_IFCB108_00488.png moved to class Junk
Image D20230613T060932_IFCB108_00556.png moved to class Ciliophora
Image D20230613T060932_IFCB108_00356.png moved to class Junk
Image D20230613T060932_IFCB108_00413.png moved to class Junk
Image D20230613T060932_IFCB108_00478.png moved to class Junk
Image D20230613T060932_IFCB108_00546.png moved to class Dinophyceae
Image D20230613T060932_IFCB108_00463.png moved to class Junk
Image D20230613T060932_IFCB108_00426.png moved to class Bacillariophyta
Image D20230613T060932_IFCB108_00454.png moved to class Junk
Image D20230613T060932_IFCB108_00613.png moved to class Junk
Image D20230613T060932_IFCB108_00607.png moved to class Junk
Image D20230613T060932_IFCB108_00450.png moved to class Junk
Image D20230613T060932_IFCB108_00276.png moved to class Dinophyceae
Image D20230613T060932_IFCB108_00260.png moved to clas