In [2]:
import deeplake
import torch
import numpy as np
from torch.utils.data import Subset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt

# Load the dataset
dataset = deeplake.load("hub://activeloop/plantvillage-with-augmentation")

# Create a custom PyTorch dataset
class PlantVillageDataset(torch.utils.data.Dataset):
    def __init__(self, deeplake_dataset, transform=None):
        self.dataset = deeplake_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        # Adjust indexing based on actual dataset structure
        image = self.dataset[idx]['images'].numpy()
        label = self.dataset[idx]['labels'].numpy()

        # Convert image to tensor and normalize
        image_tensor = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0

        # Convert label to tensor
        label_tensor = torch.tensor(label, dtype=torch.long)

        # Apply transforms if specified
        if self.transform:
            image_tensor = self.transform(image_tensor)

        return image_tensor, label_tensor
    
keep_indices = list(range(0, 1000)) + list(range(7852, 10149))
# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

new_dataset = Subset(dataset, keep_indices)

# This will split the Dataset into only our peaches!
full_dataset = PlantVillageDataset(new_dataset, transform=transform)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size]
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=32)

|

Opening dataset in read-only mode as you don't have write permissions.


-

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/plantvillage-with-augmentation



-

hub://activeloop/plantvillage-with-augmentation loaded successfully.



 

Okay, we have our data. For this first turn in, we were wanting to work faster on tuning the model, so we're focusing on classifying from one plant of the many plants the data encompasses, Peaches!

In [22]:
print(len(train_loader),len(val_loader),len(train_dataset),len(val_dataset))

83 21 2637 660


In [None]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torchsummary import summary

class PlantDiseaseClassifier(nn.Module):
    def __init__(self, num_classes):
        super(PlantDiseaseClassifier, self).__init__()
        self.base_model = models.resnet18(pretrained=True)
        for param in self.base_model.parameters():
            param.requires_grad = False
        num_features = self.base_model.fc.in_features
        self.base_model.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
        self.train_losses = []
        self.val_losses = []

    def forward(self, x):
        return self.base_model(x)

def inspect_data(train_loader):
    images, labels = next(iter(train_loader))
    print("Images shape:", images.shape)
    print("Labels shape:", labels.shape)
    print("Sample label:", labels[0])
    return images, labels

def train_model(model, train_loader, val_loader, num_epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        #count = 0
        for inputs, labels in train_loader:
            #count = count + 1
            #print(count)
            inputs = inputs.float().to(device)
            labels = labels.long().to(device)

            # Ensure correct shape (B, C, H, W)
            if len(inputs.shape) == 3:
                inputs = inputs.unsqueeze(0)  # Add batch dimension
            elif len(inputs.shape) == 4 and inputs.shape[1] != 3:
                inputs = inputs.permute(0, 3, 1, 2)  # Adjust channel dimension

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.squeeze())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        model.train_losses.append(running_loss/len(train_loader))

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.float().to(device)
                labels = labels.long().to(device)

                # Ensure correct shape
                if len(inputs.shape) == 3:
                    inputs = inputs.unsqueeze(0)
                elif len(inputs.shape) == 4 and inputs.shape[1] != 3:
                    inputs = inputs.permute(0, 3, 1, 2)

                outputs = model(inputs)
                loss = criterion(outputs, labels.squeeze())

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.squeeze()).sum().item()
        model.val_losses.append(val_loss/len(val_loader))

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Training Loss: {running_loss/len(train_loader):.4f}')
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}')
        print(f'Validation Accuracy: {100 * correct / total:.2f}%')

# Let's inspect the data first
try:
    images, labels = inspect_data(train_loader)
    print("\nData inspection successful!")
except Exception as e:
    print(f"Error during data inspection: {str(e)}")

# Initialize and train model
num_classes = len(dataset['labels'].info["class_names"])
model = PlantDiseaseClassifier(num_classes)
summary(model, (3, 224, 224))




Images shape: torch.Size([32, 3, 224, 224])
Labels shape: torch.Size([32, 1])
Sample label: tensor([0])

Data inspection successful!




----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

Now, We have our Model in all of it's glory, lets train it!

In [5]:
train_model(model, train_loader, val_loader)

plt.plot(model.train_losses, label='Training loss')
plt.show()

KeyboardInterrupt: 