In [None]:
from google.colab import drive
drive.flush_and_unmount()

Drive not mounted, so nothing to flush and unmount.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

checkpoints = '/content/drive/MyDrive/Colab Notebooks/MobileNetV2/checkpoints/'
if not os.path.exists(checkpoints):
    os.makedirs(checkpoints)

In [None]:
!mkdir "/content/drive/MyDrive/Colab Notebooks/MobileNetV2/checkpoints/MobileNetV2_9/"

In [None]:
pip install torchinfo

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np
from torchvision import models
from torchinfo import summary

In [None]:
pip install wandb

In [None]:
!wandb login

In [None]:
!wandb login --relogin

In [None]:
import wandb

torch.manual_seed(42)
np.random.seed(42)

wandb.init(
    project="MobileNetV2",
    group="MobileNetV2",
    name="9",
    notes="MobileNetV2_9",
)

In [None]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = datasets.CIFAR10(root="/content/drive/MyDrive/Colab Notebooks/VGGNet/dataset/",
                                 train=True,
                                 download=True,
                                 transform=transform)

test_dataset = datasets.CIFAR10(root="/content/drive/MyDrive/Colab Notebooks/VGGNet/dataset/",
                                train=False,
                                download=True,
                                transform=transform_test)

num_classes = len(train_dataset.classes)

train_size = int(0.8 * len(train_dataset))
validation_size = len(train_dataset) - train_size

train_dataset, validation_dataset = random_split(train_dataset, [train_size, validation_size])

In [None]:
BATCH_SIZE = 128

train_loader = DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True)

validation_loader = DataLoader(dataset=validation_dataset,
                               batch_size=BATCH_SIZE,
                               shuffle=False)

test_loader = DataLoader(dataset=test_dataset,
                                          batch_size=64,
                                          shuffle=False)

In [None]:
X_train_size = 0
for (X_train, Y_train) in train_loader:
    X_train_size = X_train.size()[1:]
    print(X_train_size)
    print(f"X_train: {X_train.size()} type: {X_train.type()}")
    print(f"Y_train: {Y_train.size()} type: {Y_train.type()}")
    break

In [None]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print("사용하는 Device :", DEVICE)

In [None]:
class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.features = nn.Sequential(
            nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(planes),
            nn.ReLU6(),
            nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False),
            nn.BatchNorm2d(planes),
            nn.ReLU6(),
            nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(out_planes)
        )

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = self.features(x)
        out = out + self.shortcut(x) if self.stride == 1 else out
        return out


class MobileNetV2(nn.Module):
    # (expansion, out_planes, num_blocks, stride)
    cfg = [(1,  32, 1, 1),
           (4,  48, 2, 1),
           (4,  64, 3, 2),
           (4, 128, 4, 2),
           (4, 256, 3, 2)]

    def __init__(self, num_classes=num_classes):
        super(MobileNetV2, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            self._make_layers(in_planes=64),
            nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        self.linear = nn.Linear(512, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, std=1e-3)
                if m.bias is not None:
                    m.bias.data.zero_()

    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x)
        out = F.avg_pool2d(out, 4)
        out = torch.flatten(out, 1)
        out = self.linear(out)
        return out

In [None]:
model = MobileNetV2().to(DEVICE)
total_params = sum(p.numel() for p in model.parameters())
print(model)
print(f"총 파라미터 개수: {total_params}")

MobileNetV2(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Sequential(
      (0): Block(
        (features): Sequential(
          (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6()
          (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
          (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (5): ReLU6()
          (6): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (shortcut): Sequential(
          (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bia

In [None]:
model = MobileNetV2().to(DEVICE)
summary(model, (1, *X_train_size))

Layer (type:depth-idx)                        Output Shape              Param #
MobileNetV2                                   [1, 10]                   --
├─Sequential: 1-1                             [1, 512, 4, 4]            --
│    └─Conv2d: 2-1                            [1, 64, 32, 32]           1,728
│    └─BatchNorm2d: 2-2                       [1, 64, 32, 32]           128
│    └─ReLU: 2-3                              [1, 64, 32, 32]           --
│    └─Sequential: 2-4                        [1, 256, 4, 4]            --
│    │    └─Block: 3-1                        [1, 32, 32, 32]           9,152
│    │    └─Block: 3-2                        [1, 48, 32, 32]           13,632
│    │    └─Block: 3-3                        [1, 48, 32, 32]           21,024
│    │    └─Block: 3-4                        [1, 64, 16, 16]           24,128
│    │    └─Block: 3-5                        [1, 64, 16, 16]           36,224
│    │    └─Block: 3-6                        [1, 64, 16, 16]           

In [None]:
from tqdm.auto import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion = torch.nn.CrossEntropyLoss().to(DEVICE)
total_epochs = 100

try:
    checkpoint = torch.load(checkpoints + 'MobileNetV2_9/last_epoch')
    old_model_state_dict = checkpoint['model_state_dict']
    new_model_state_dict = model.state_dict()

    for name, param in old_model_state_dict.items():
        if name in new_model_state_dict:
            try:
                new_model_state_dict[name].copy_(param)
            except Exception as e:
                print(f"Failed to copy param: {name}, due to {e}")

    model.load_state_dict(new_model_state_dict, strict=False)

    last_epoch = checkpoint['epoch']
    best_val_loss = checkpoint["best_val_loss"]
    train_losses = checkpoint["train_losses"]
    val_losses = checkpoint["val_losses"]

except:
    checkpoint = None
    last_epoch = -1
    best_val_loss = float('inf')
    train_losses = []
    val_losses = []

finally:
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_epochs)
    if checkpoint:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

    for epoch in tqdm(range(last_epoch + 1, total_epochs), desc='Epoch Progress'):
        avg_cost = 0

        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0

        with tqdm(total=len(train_loader), desc='Batch Progress') as batch_bar:
            for X, Y in train_loader:
                X = X.to(DEVICE)
                Y = Y.to(DEVICE)

                optimizer.zero_grad()
                hypothesis = model(X)
                loss = criterion(hypothesis, Y)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()

                _, predicted_train = torch.max(hypothesis.data, 1)
                total_train += Y.size(0)
                correct_train += (predicted_train == Y).sum().item()

                batch_bar.update()

            train_losses.append(train_loss / len(train_loader))
            train_accuracy = (100 * correct_train) / total_train

            model.eval()
            val_loss = 0.0

            with torch.no_grad():
                correct = 0
                total = 0
                for X, Y in validation_loader:
                    X = X.to(DEVICE)
                    Y = Y.to(DEVICE)

                    output = model(X)
                    _, predicted = torch.max(output, 1)

                    val_loss += criterion(output, Y).item()

                    total += Y.size(0)
                    correct += (predicted == Y).sum().item()

                val_losses.append(val_loss / len(validation_loader))
                val_accuracy = correct / total
                scheduler.step()

            train_desc = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_accuracy': train_accuracy,
            'val_accuracy': val_accuracy * 100,
            'train_losses': train_losses,
            'val_losses': val_losses,
            'best_val_loss': best_val_loss
            }

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(train_desc, checkpoints + f'MobileNetV2_9/best_epoch')

            torch.save(train_desc, checkpoints+f'MobileNetV2_9/last_epoch')

            wandb.log({"train_accuracy": train_accuracy, "val_accuracy": val_accuracy*100,
                        "train_losses": train_losses[-1], "val_losses": val_losses[-1],
                        }, step=epoch)

            print('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.4f}%, Val Loss: {:.4f}, Val Accuracy: {:.2f}%'
                    .format(epoch, total_epochs, train_losses[-1], train_accuracy, val_losses[-1], val_accuracy*100))

    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for X, Y in test_loader:
            X = X.to(DEVICE)
            Y = Y.to(DEVICE)

            output = model(X)
            loss = criterion(output, Y)
            test_loss += loss.item()

            _, predicted = torch.max(output, 1)
            total += Y.size(0)
            correct += (predicted == Y).sum().item()

    test_loss /= len(test_loader)
    test_accuracy = correct / total

    wandb.log({"test_loss": test_loss, "test_accuracy": test_accuracy*100})

    wandb.alert("[MobileNetV2_9]Training Task Finished", f"test accuracy: {test_accuracy*100}")

    wandb.finish()