In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, ImageOps, ImageFile

import os
from pathlib import Path
from collections import defaultdict

import numpy as np


In [2]:
base_dir = 'Archive/Partitioned_Dataset/train'
path = Path(base_dir)

classes  = [entry.name for entry in path.iterdir() if entry.is_dir()]
classes 

['Agaricus',
 'Amanita',
 'Boletus',
 'Cortinarius',
 'Entoloma',
 'Hygrocybe',
 'Lactarius',
 'Russula',
 'Suillus']

In [None]:
# Define the data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]),
}

# Set up the directories
base_dir = 'Archive/Partitioned_Dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validate')

# Create the datasets
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(val_dir, transform=data_transforms['val'])

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True)

In [21]:
import torch
import torch.nn as nn
import torchvision.models as models
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms

class ResNet18FeatureExtractor(pl.LightningModule):
    def __init__(self, num_classes):
        super(ResNet18FeatureExtractor, self).__init__()
        self.model = models.resnet18(pretrained=True)
        # Freeze all the layers
        for param in self.model.parameters():
            param.requires_grad = False
        # Replace the final fully connected layer
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.val_losses = []
        self.val_accs = []

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.criterion(outputs, labels)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.criterion(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = torch.sum(preds == labels).item() / len(labels)
        self.val_losses.append(loss)
        self.val_accs.append(acc)

    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.val_losses).mean()
        avg_acc = torch.tensor(self.val_accs).mean()
        self.log('val_loss', avg_loss)
        self.log('val_acc', avg_acc)
        self.val_losses.clear()
        self.val_accs.clear()

    def configure_optimizers(self):
        return torch.optim.SGD(self.model.fc.parameters(), lr=0.001, momentum=0.9)

# Data preparation
transform = transforms.Compose([
    #transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

checkpoint_callback = ModelCheckpoint(
    monitor='val_acc',
    mode='max',
    save_top_k=1,
    verbose=True,
    dirpath='checkpoints',
    filename='mushroom-classifier-{epoch:02d}-{val_acc:.2f}'
)

# Training
model = ResNet18FeatureExtractor(num_classes=9)  # Replace with your number of classes
trainer = pl.Trainer(max_epochs=10,    
                     #accelerator='cpu',  # Explicitly specify CPU usage
                     #devices=1,          # Use one CPU device
                     callbacks=[checkpoint_callback])
trainer.fit(model, train_loader, val_loader)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | ResNet           | 11.2 M | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
4.6 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.725    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Fine Tunning

In [None]:
class ResNet18FineTuner(pl.LightningModule):
    def __init__(self, num_classes):
        super(ResNet18FineTuner, self).__init__()
        self.model = models.resnet18(pretrained=True)
        # Unfreeze the last few layers
        for param in self.model.layer4.parameters():
            param.requires_grad = True
        for param in self.model.fc.parameters():
            param.requires_grad = True
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.val_losses = []
        self.val_accs = []

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.criterion(outputs, labels)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.criterion(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = torch.sum(preds == labels).item() / len(labels)
        self.val_losses.append(loss)
        self.val_accs.append(acc)

    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.val_losses).mean()
        avg_acc = torch.tensor(self.val_accs).mean()
        self.log('val_loss', avg_loss)
        self.log('val_acc', avg_acc)
        self.val_losses.clear()
        self.val_accs.clear()

    def configure_optimizers(self):
        return torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=0.0001, momentum=0.9)

# Training
model = ResNet18FineTuner(num_classes=10)  # Replace with your number of classes
trainer = pl.Trainer(max_epochs=5)  # Additional epochs for fine-tuning
trainer.fit(model, train_loader, val_loader)


In [9]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader

In [13]:
# Define the data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]),
}

# Set up the directories
base_dir = 'Archive/Partitioned_Dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validate')

# Create the datasets
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(val_dir, transform=data_transforms['val'])

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True)

# Define the model
class MushroomClassifier(pl.LightningModule):
    def __init__(self, num_classes=9):
        super(MushroomClassifier, self).__init__()
        self.model = models.resnet18(weights=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        acc = (outputs.argmax(dim=1) == labels).float().mean()
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        acc = (outputs.argmax(dim=1) == labels).float().mean()
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

# Initialize the model, trainer, and start training
model = MushroomClassifier(num_classes=len(train_dataset.classes))

checkpoint_callback = ModelCheckpoint(
    monitor='val_acc',
    mode='max',
    save_top_k=1,
    verbose=True,
    dirpath='checkpoints',
    filename='mushroom-classifier-{epoch:02d}-{val_acc:.2f}'
)

# Initialize the Trainer
trainer = Trainer(
    max_epochs=25,
    accelerator='cpu',  # Explicitly specify CPU usage
    devices=1,          # Use one CPU device
    callbacks=[checkpoint_callback]
)
# Train the model
trainer.fit(model, train_loader, val_loader)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | ResNet           | 11.2 M | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.725    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 284: 'val_acc' reached 0.81189 (best 0.81189), saving model to 'C:\\Users\\Gintare\\Desktop\\Study\\Module4\\Sprint1\\Mushrooms_classification\\checkpoints\\mushroom-classifier-epoch=00-val_acc=0.81.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 568: 'val_acc' reached 0.88165 (best 0.88165), saving model to 'C:\\Users\\Gintare\\Desktop\\Study\\Module4\\Sprint1\\Mushrooms_classification\\checkpoints\\mushroom-classifier-epoch=01-val_acc=0.88.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2, global step 852: 'val_acc' reached 0.88992 (best 0.88992), saving model to 'C:\\Users\\Gintare\\Desktop\\Study\\Module4\\Sprint1\\Mushrooms_classification\\checkpoints\\mushroom-classifier-epoch=02-val_acc=0.89.ckpt' as top 1
