In [1]:
##IMPORTS
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F

import torchvision
from torchvision import transforms
from torchvision import models
from torch.utils.data import random_split 

from PIL import Image
import pandas as pd
from tqdm import tqdm

In [2]:
#TEST VALID TRAIN SPLIT
df = pd.read_csv('/kaggle/input/packed-fruits-and-vegetables-recognition-benchmark/variety_classification.csv')
df.drop(['layout_id', 'for_cropping', 'packed', 'amount', 'uniform_background', 'spoiled', 'weight', 'cam', 'city', 'crowd', 'date', 'simp_amount', 'shop'], axis=1, inplace=True)
train_df = df[df['subset'] == 'train']
train_df.loc[:, 'variety_image_path'] = train_df['variety_image_path'].str.replace(
    'varieties_classification_dataset/train/', '', regex=False)


test_df = df[df['subset'] == 'test']
test_df.loc[:, 'variety_image_path'] = test_df['variety_image_path'].str.replace(
    'varieties_classification_dataset/test/', '', regex=False)


labels = df['species'].unique()
label_to_int = {label: idx for idx, label in enumerate(labels)}
print(labels)

['apple' 'apricot' 'avocado' 'banana' 'beet' 'cabbage' 'carrot' 'corn'
 'cucumber' 'daikon' 'garlic' 'grape' 'grapefruit' 'kiwi' 'lemon' 'lime'
 'mango' 'melon' 'onion' 'orange' 'nectarine' 'pomelo' 'pear' 'pepper'
 'plum' 'pomegranate' 'potato' 'pumpkin' 'raddish' 'salad' 'tangerine'
 'tomato' 'watermelon' 'zucchini']


  df = pd.read_csv('/kaggle/input/packed-fruits-and-vegetables-recognition-benchmark/variety_classification.csv')


In [3]:
#IMAGE DATASET
class ImageDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.label_to_int = {label: idx for idx, label in enumerate(labels)}

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_name = os.path.join(self.root_dir, row['variety_image_path'])
        label = self.label_to_int[row['species']]

        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
#VGG16 - TRANSFORM
vgg_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet normalization
])

In [5]:
###MODEL - VGG16
model = models.vgg16()
num_classes = len(labels)
model.classifier[6] = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(4096, 1024),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(1024, num_classes)
)
for param in model.parameters():
    param.requires_grad = True 


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_gpus = torch.cuda.device_count()
print(f"Available GPUs: {num_gpus}")

if num_gpus > 1:
    model = nn.DataParallel(model)
    model = model.to(device)
else:
    model = model.to(device)

Available GPUs: 2


In [6]:
"""###CNN - TRANSFORM

cnn_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet normalization
])
"""

'###CNN - TRANSFORM\n\ncnn_transform = transforms.Compose([\n    transforms.Resize((64, 64)),\n    transforms.ToTensor(),\n    #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet normalization\n])\n'

In [7]:
"""###CUSTOM CNN###

class CustomCNN(nn.Module):
    def __init__(self, num_classes=len(labels)):  # Number of classes for classification
        super(CustomCNN, self).__init__()
        
        # Define the layers of the model
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)  # Input is RGB, 3 channels
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        
        # Define the fully connected layers
        self.fc1 = nn.Linear(256 * 8 * 8, 1024) 
        self.fc2 = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        # Apply the layers
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)  # Pool after each conv layer

        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)

        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)

        x = torch.flatten(x, 1)  # Flatten the tensor for the fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return x

model = CustomCNN()
for param in model.parameters():
    param.requires_grad = True 


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_gpus = torch.cuda.device_count()
print(f"Available GPUs: {num_gpus}")

if num_gpus > 1:
    model = nn.DataParallel(model)
    model = model.to(device)
else:
    model = model.to(device)"""

'###CUSTOM CNN###\n\nclass CustomCNN(nn.Module):\n    def __init__(self, num_classes=len(labels)):  # Number of classes for classification\n        super(CustomCNN, self).__init__()\n        \n        # Define the layers of the model\n        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)  # Input is RGB, 3 channels\n        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)\n        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)\n        \n        # Define the fully connected layers\n        self.fc1 = nn.Linear(256 * 8 * 8, 1024) \n        self.fc2 = nn.Linear(1024, num_classes)\n        \n    def forward(self, x):\n        # Apply the layers\n        x = F.relu(self.conv1(x))\n        x = F.max_pool2d(x, 2)  # Pool after each conv layer\n\n        x = F.relu(self.conv2(x))\n        x = F.max_pool2d(x, 2)\n\n        x = F.relu(self.conv3(x))\n        x = F.max_pool2d(x, 2)\n\n        x = torch.flatten(x, 1)  # Flatten the tensor for the fully connected lay

In [8]:
##DATASET AND LOADER
tf = vgg_transform

train_valid_dataset = ImageDataset(dataframe=train_df, root_dir='/kaggle/input/packed-fruits-and-vegetables-recognition-benchmark/train/train', transform = tf)
train_size = int(0.8 * len(train_valid_dataset))
val_size = len(train_valid_dataset) - train_size
train_dataset, val_dataset = random_split(train_valid_dataset, [train_size, val_size])
test_dataset = ImageDataset(dataframe=test_df, root_dir='/kaggle/input/packed-fruits-and-vegetables-recognition-benchmark/test/test', transform = tf)


train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4)

In [9]:
###PARAMS
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
num_epochs = 10

In [10]:
###TRAIN
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Training)", unit="batch") as tepoch:
        for inputs, labels in tepoch:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            tepoch.set_postfix(loss=train_loss / (tepoch.n + 1), accuracy=100 * correct / total)

    train_accuracy = 100 * correct / total
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        with tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Validation)", unit="batch") as vepoch:
            for inputs, labels in vepoch:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                vepoch.set_postfix(loss=val_loss / (vepoch.n + 1), accuracy=100 * val_correct / val_total)

    val_accuracy = 100 * val_correct / val_total

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy:.2f}%")

    # Step the learning rate scheduler, if used
    scheduler.step()


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Epoch 1/10 (Training): 100%|██████████| 510/510 [10:17<00:00,  1.21s/batch, accuracy=16.6, loss=2.94]
Epoch 1/10 (Validation): 100%|██████████| 510/510 [02:40<00:00,  3.18batch/s, accuracy=24.2, loss=2.47]


Epoch 1/10, Train Loss: 2.9407, Train Accuracy: 16.63%, Val Loss: 2.4719, Val Accuracy: 24.22%


Epoch 2/10 (Training): 100%|██████████| 510/510 [10:35<00:00,  1.25s/batch, accuracy=33.7, loss=2.11]
Epoch 2/10 (Validation): 100%|██████████| 510/510 [02:17<00:00,  3.70batch/s, accuracy=49, loss=1.58]


Epoch 2/10, Train Loss: 2.1091, Train Accuracy: 33.70%, Val Loss: 1.5815, Val Accuracy: 48.98%


Epoch 3/10 (Training): 100%|██████████| 510/510 [09:59<00:00,  1.18s/batch, accuracy=53.5, loss=1.44]
Epoch 3/10 (Validation): 100%|██████████| 510/510 [02:26<00:00,  3.49batch/s, accuracy=67.2, loss=1.03]


Epoch 3/10, Train Loss: 1.4392, Train Accuracy: 53.53%, Val Loss: 1.0311, Val Accuracy: 67.17%


Epoch 4/10 (Training): 100%|██████████| 510/510 [10:33<00:00,  1.24s/batch, accuracy=69.7, loss=0.945]
Epoch 4/10 (Validation): 100%|██████████| 510/510 [02:15<00:00,  3.76batch/s, accuracy=79.4, loss=0.638]


Epoch 4/10, Train Loss: 0.9450, Train Accuracy: 69.66%, Val Loss: 0.6381, Val Accuracy: 79.43%


Epoch 5/10 (Training): 100%|██████████| 510/510 [10:07<00:00,  1.19s/batch, accuracy=79.9, loss=0.632]
Epoch 5/10 (Validation): 100%|██████████| 510/510 [02:14<00:00,  3.79batch/s, accuracy=84.2, loss=0.503]


Epoch 5/10, Train Loss: 0.6318, Train Accuracy: 79.86%, Val Loss: 0.5029, Val Accuracy: 84.18%


Epoch 6/10 (Training): 100%|██████████| 510/510 [10:06<00:00,  1.19s/batch, accuracy=90.4, loss=0.298]
Epoch 6/10 (Validation): 100%|██████████| 510/510 [02:18<00:00,  3.69batch/s, accuracy=90.1, loss=0.316]


Epoch 6/10, Train Loss: 0.2978, Train Accuracy: 90.39%, Val Loss: 0.3164, Val Accuracy: 90.05%


Epoch 7/10 (Training): 100%|██████████| 510/510 [10:23<00:00,  1.22s/batch, accuracy=92.3, loss=0.238]
Epoch 7/10 (Validation): 100%|██████████| 510/510 [02:18<00:00,  3.69batch/s, accuracy=90.9, loss=0.294]


Epoch 7/10, Train Loss: 0.2382, Train Accuracy: 92.29%, Val Loss: 0.2937, Val Accuracy: 90.90%


Epoch 8/10 (Training): 100%|██████████| 510/510 [10:17<00:00,  1.21s/batch, accuracy=93.4, loss=0.205]
Epoch 8/10 (Validation): 100%|██████████| 510/510 [02:32<00:00,  3.35batch/s, accuracy=91.5, loss=0.276]


Epoch 8/10, Train Loss: 0.2046, Train Accuracy: 93.43%, Val Loss: 0.2762, Val Accuracy: 91.51%


Epoch 9/10 (Training): 100%|██████████| 510/510 [10:12<00:00,  1.20s/batch, accuracy=94.3, loss=0.177]
Epoch 9/10 (Validation): 100%|██████████| 510/510 [02:23<00:00,  3.55batch/s, accuracy=92, loss=0.261]


Epoch 9/10, Train Loss: 0.1768, Train Accuracy: 94.30%, Val Loss: 0.2615, Val Accuracy: 92.01%


Epoch 10/10 (Training): 100%|██████████| 510/510 [10:09<00:00,  1.19s/batch, accuracy=94.9, loss=0.157]
Epoch 10/10 (Validation): 100%|██████████| 510/510 [02:16<00:00,  3.73batch/s, accuracy=92.6, loss=0.245]

Epoch 10/10, Train Loss: 0.1573, Train Accuracy: 94.93%, Val Loss: 0.2446, Val Accuracy: 92.62%





In [11]:
#TEST
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():  # No need to compute gradients during evaluation
    with tqdm(test_loader, desc="Testing", unit="batch") as tepoch:
        for inputs, labels in tepoch:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass: Get the predictions from the model
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Update progress bar with loss and accuracy
            tepoch.set_postfix(loss=test_loss / (tepoch.n + 1), accuracy=100 * correct / total)

# Calculate final test accuracy and loss
test_accuracy = 100 * correct / total
test_loss /= len(test_loader)

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Testing: 100%|██████████| 172/172 [03:25<00:00,  1.20s/batch, accuracy=91.7, loss=0.288]

Test Loss: 0.2879, Test Accuracy: 91.69%



