# Implement MobileNet V1 using PyTorch Lightning Module


In [None]:
# Standard libraries
import os
import numpy as np
import random
import requests

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

# Torchvision
import torchvision
from torchvision import transforms

# PyTorch Lightning
try:
    import pytorch_lightning as pl
except ModuleNotFoundError:  # Install PyTorch Lightning if not installed
    !pip install pytorch-lightning
    import pytorch_lightning as pl
from pytorch_lightning import Trainer

# Other
from pathlib import Path
import tarfile

File Structure:
```
root
├── Data
└── Checkpoint
```


In [None]:
# Setting the seed
pl.seed_everything(42)

# Hyper-parameters
INPUT_SIZE = 784  # 28x28
HIDDEN_SIZE = 500
NUM_CLASSES = 102
NUM_EPOCHS = 5
BATCH_SIZE = 100
LEARNING_RATE = 0.001

# Directory
ROOT_DIR = Path("/root/jupyter_projects")  # Your working directory.
DATA_DIR = ROOT_DIR / "Data"  # Directory where the data are/should be downloaded.
CHECKPOINT_DIR = ROOT_DIR / "Checkpoint"  # Directory where the pretrained models are saved.
DATASET_DIR = DATA_DIR / "oxford-102-flowers"  # Directory of the dataset.

# Path
TRAIN_LABEL_PATH = DATASET_DIR / "train.txt"
VAL_LABEL_PATH = DATASET_DIR / "valid.txt"
TEST_LABEL_PATH = DATASET_DIR / "test.txt"

# Dataset
URL = "https://www.dropbox.com/s/hqiryv0g62lp878/oxford-102-flowers.tgz?raw=1"
FILE_NAME = "Flowers102.tgz"

In [None]:
class CreateDataset:
    def __init__(self, url, file_name, data_dir):
        self.url = url  # Dataset download link.
        self.file_name = file_name  # The name of the downloaded file.
        self.data_dir = data_dir
        self.file_path = data_dir / file_name  # Path to download file.

    def download_dataset(self):
        try:
            r = requests.get(self.url, allow_redirects=True)
            open(self.file_path, 'wb').write(r.content)
            print('[Success] Dataset downloaded successfully')
        except:
            print('[Error] Dataset downloaded failed')
            raise

    def unzip_dataset(self):
        zip_file_path = self.file_path
        extract_directory = self.data_dir
        try:
            with tarfile.open(zip_file_path, "r") as tar_ref:
                tar_ref.extractall(extract_directory)
            print('[Success] Dataset extracted successfully')
            Path.unlink(self.file_path)  # Remove zip file.
        except:
            print('[Error] Dataset extracted failed')
            raise

    def create_dataset(self):
        if not DATASET_DIR.is_dir():  # Dataset is not downloaded yet.
            self.download_dataset()
            self.unzip_dataset()
        else:
            print('[Alert] Dataset already exist')

    def dataset_label(self, label_path):
        """
        This function reads the label file and generates the path and class of the data in the dataset

        Parameters
        ----------
        label_path : str
            path of the label file(train label, val label, test label)

        Returns
        -------
        path_list : list
            path of all the data in the dataset
        class_list : list
            class of all the data in the dataset
        """
        path_list = []  # Store the path of images.
        class_list = []  # Store the class names.
        with open(label_path) as file:  # Get the content of the label file.
            lines = file.readlines()
        for line in lines:
            data_path, data_class = line.strip().split(' ')
            data_path = str(self.data_dir / data_path)  # Convert to string type.
            path_list.append(data_path)  # Store the path of images in list.
            class_list.append(int(data_class))
        return path_list, class_list

In [1]:
Flowers102 = CreateDataset(URL, FILE_NAME, DATA_DIR)

train_path_list, train_class_list = Flowers102.dataset_label(TRAIN_LABEL_PATH)
val_path_list, val_class_list = Flowers102.dataset_label(VAL_LABEL_PATH)
test_path_list, test_class_list = Flowers102.dataset_label(TEST_LABEL_PATH)

NameError: name 'CreateDataset' is not defined

In [None]:
class Flowers102Dataset(data.Dataset):
    def __init__(self, data_path, data_class, transform=None):
        self.data_path = data_path
        self.data_class = data_class
        self.transform = transform

    def __len__(self):
        return len(self.data_path)

    def __getitem__(self, idx):
        image_filepath = self.data_path[idx]
        # For numpy array:
        # image = cv2.imread(image_filepath)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # For torch tensor:
        image = torchvision.io.read_image(image_filepath)  # uint8 type
        image = image.to(torch.float)

        label = self.data_class[idx]

        if self.transform is not None:
            image = self.transform(image)

        return image, label

In [None]:
class MobileNetV1(pl.LightningModule):
    """

    """
    def __init__(self, in_channels, num_classes):
        super().__init__()

        def conv_bn(ch_in, ch_out, stride):  # convolution batch normalization
            return nn.Sequential(
                nn.Conv2d(ch_in, ch_out, 3, stride, 1, bias=False),
                nn.BatchNorm2d(ch_out),
                nn.ReLU(inplace=True)
            )

        def conv_dw(ch_in, ch_out, stride):  # depth-wise separable convolution
            return nn.Sequential(
                nn.Conv2d(ch_in, ch_in, 3, stride, 1, groups=ch_in, bias=False),
                nn.BatchNorm2d(ch_in),
                nn.ReLU(inplace=True),
                nn.Conv2d(ch_in, ch_out, 1, 1, 0, bias=False),
                nn.BatchNorm2d(ch_out),
                nn.ReLU(inplace=True),
            )

        self.model = nn.Sequential(
            conv_bn(in_channels, 32, 2),
            conv_dw(32, 64, 1),
            conv_dw(64, 128, 2),
            conv_dw(128, 128, 1),
            conv_dw(128, 256, 2),
            conv_dw(256, 256, 1),
            conv_dw(256, 512, 2),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 1024, 2),
            conv_dw(1024, 1024, 1),
            nn.AdaptiveAvgPool2d(1)
        )

        self.img_transforms = transforms.Compose([
            transforms.CenterCrop(227),
            transforms.RandomHorizontalFlip(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ])

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x

In [1]:
class MobileNetV1(pl.LightningModule):
    def training_step(self, batch, batch_idx):
        images, labels = batch

        # Forward pass
        outputs = self.model(images)
        loss = F.cross_entropy(outputs, labels)

        tensorboard_logs = {'train_loss': loss}
        # use key 'log'
        return {"loss": loss, 'log': tensorboard_logs}

    def train_dataloader(self):
        train_dataset = Flowers102Dataset(
            data_path=train_path_list,
            data_class=train_class_list,
            transform=self.img_transforms
        )
        train_loader = data.DataLoader(
            dataset=train_dataset,
            batch_size=BATCH_SIZE,
            shuffle=True,
        )
        return train_loader

    def val_dataloader(self):
        val_dataset = Flowers102Dataset(
            data_path=val_path_list,
            data_class=val_class_list,
            transform=self.img_transforms
        )
        val_loader = data.DataLoader(
            dataset=val_dataset,
            batch_size=BATCH_SIZE,
            shuffle=True,
        )
        return val_loader

    def validation_step(self, batch, batch_idx):
        images, labels = batch

        # Forward pass
        outputs = self.model(images)
        loss = F.cross_entropy(outputs, labels)

        tensorboard_logs = {'train_loss': loss}
        # use key 'log'
        return {"val_loss": loss}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=LEARNING_RATE)

NameError: name 'pl' is not defined

In [None]:
def main():
    model = MobileNetV1(in_channels=3, num_classes=NUM_CLASSES)
    trainer = Trainer(max_epochs=NUM_EPOCHS)
    trainer.fit(model)

In [None]:
if __name__ == '__main__':
    main()