<a href="https://colab.research.google.com/github/Coreight98/Paran_ajou_Project_Anything/blob/main/alexnet_test1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
pip install tensorboardX

Collecting tensorboardX
[?25l  Downloading https://files.pythonhosted.org/packages/07/84/46421bd3e0e89a92682b1a38b40efc22dafb6d8e3d947e4ceefd4a5fabc7/tensorboardX-2.2-py2.py3-none-any.whl (120kB)
[K     |██▊                             | 10kB 18.9MB/s eta 0:00:01[K     |█████▍                          | 20kB 21.6MB/s eta 0:00:01[K     |████████▏                       | 30kB 11.7MB/s eta 0:00:01[K     |██████████▉                     | 40kB 9.4MB/s eta 0:00:01[K     |█████████████▋                  | 51kB 8.2MB/s eta 0:00:01[K     |████████████████▎               | 61kB 8.5MB/s eta 0:00:01[K     |███████████████████             | 71kB 8.7MB/s eta 0:00:01[K     |█████████████████████▊          | 81kB 8.6MB/s eta 0:00:01[K     |████████████████████████▌       | 92kB 8.9MB/s eta 0:00:01[K     |███████████████████████████▏    | 102kB 7.8MB/s eta 0:00:01[K     |██████████████████████████████  | 112kB 7.8MB/s eta 0:00:01[K     |████████████████████████████████| 122kB 

In [10]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter

In [11]:
# define model parameters
NUM_EPOCHS = 10  # original paper
BATCH_SIZE = 4
MOMENTUM = 0.9
LR_DECAY = 0.0005
LR_INIT = 0.01
IMAGE_DIM = 227  # pixels
NUM_CLASSES = 38  # 38 classes 
DEVICE_IDS = [0, 1, 2, 3]  # 멀티 GPU

# modify this to point to your data directory
#test_location = './drive/MyDrive/data/DTD_test/'
TRAIN_IMG_DIR = './drive/MyDrive/data/Alexnet_test/DTD/'
OUTPUT_DIR = './drive/MyDrive/data/Alexnet_test/Output/'
LOG_DIR = OUTPUT_DIR + '/tblogs'  # tensorboard logs
CHECKPOINT_DIR = OUTPUT_DIR + '/DTD_models_cp'  # model checkpoints

# make checkpoint path directory
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
class AlexNet(nn.Module):
    """
    Neural network model consisting of layers propsed by AlexNet paper.
    """
    def __init__(self, num_classes=38):
        """
        Define and allocate layers for this neural net.
        Args:
            num_classes (int): number of classes to predict with this model
        """
        super().__init__()
        # input size should be : (b x 3 x 227 x 227)
        # The image in the original paper states that width and height are 224 pixels, but
        # the dimensions after first convolution layer do not lead to 55 x 55.
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),  # (b x 96 x 55 x 55)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),  # section 3.3
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 96 x 27 x 27)
            nn.Conv2d(96, 256, 5, padding=2),  # (b x 256 x 27 x 27)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 13 x 13)
            nn.Conv2d(256, 384, 3, padding=1),  # (b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, padding=1),  # (b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, padding=1),  # (b x 256 x 13 x 13)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 6 x 6)
        )
        # classifier is just a name for linear layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=(256 * 6 * 6), out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=num_classes),
        )
        self.init_bias()  # initialize bias

    def init_bias(self):
        for layer in self.net:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)
        # original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers
        nn.init.constant_(self.net[4].bias, 1)
        nn.init.constant_(self.net[10].bias, 1)
        nn.init.constant_(self.net[12].bias, 1)

    def forward(self, x):
        """
        Pass the input through the net.
        Args:
            x (Tensor): input tensor
        Returns:
            output (Tensor): output tensor
        """
        x = self.net(x)
        x = x.view(-1, 256 * 6 * 6)  # reduce the dimensions for linear layer input
        return self.classifier(x)


In [None]:
if __name__ == '__main__':
    # print the seed value
    seed = torch.initial_seed()
    print('Used seed : {}'.format(seed))

    tbwriter = SummaryWriter(log_dir=LOG_DIR)
    print('TensorboardX summary writer created')

    # create model
    alexnet = AlexNet(num_classes=NUM_CLASSES).to(device)
    print('AlexNet created')

    # create dataset and data loader
    dataset = datasets.ImageFolder(TRAIN_IMG_DIR, transforms.Compose([
        # transforms.RandomResizedCrop(IMAGE_DIM, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
        transforms.CenterCrop(IMAGE_DIM),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]))
    print('Dataset created')
    dataloader = data.DataLoader(
        dataset,
        shuffle=False,
        pin_memory=True,
        num_workers=8,
        drop_last=True,
        batch_size=BATCH_SIZE)
    print('Dataloader created')

    # create optimizer
    # the one that WORKS
    optimizer = optim.Adam(params=alexnet.parameters(), lr=0.0001)
    ### BELOW is the setting proposed by the original paper - which doesn't train....
    # optimizer = optim.SGD(
    #     params=alexnet.parameters(),
    #     lr=LR_INIT,
    #     momentum=MOMENTUM,
    #     weight_decay=LR_DECAY)
    print('Optimizer created')

    # multiply LR by 1 / 10 after every 30 epochs
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    print('LR Scheduler created')

    # start training!!
    print('Starting training...')
    total_steps = 1
    for epoch in range(NUM_EPOCHS):
        lr_scheduler.step()
        Acc_temp = 0
        for imgs, classes in dataloader:
            imgs, classes = imgs.to(device), classes.to(device)

            # calculate the loss
            output = alexnet(imgs)
            loss = F.cross_entropy(output, classes)

            # update the parameters
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # log the information and add to tensorboard
            if total_steps % 10 == 0:
                with torch.no_grad():
                    _, preds = torch.max(output, 1)
                    accuracy = torch.sum(preds == classes)

                    print('Epoch: {} \tStep: {} \tLoss: {:.4f} \tAcc: {}'
                        .format(epoch + 1, total_steps, loss.item(), accuracy.item()))
                    Acc_temp += accuracy.item()
                    tbwriter.add_scalar('loss', loss.item(), total_steps)
                    tbwriter.add_scalar('accuracy', accuracy.item(), total_steps)

            # print out gradient values and parameter average values
            if total_steps % 100 == 0:
                with torch.no_grad():
                    # print and save the grad of the parameters
                    # also print and save parameter values
                    print('*' * 10)
                    for name, parameter in alexnet.named_parameters():
                        if parameter.grad is not None:
                            avg_grad = torch.mean(parameter.grad)
                            #print('\t{} - grad_avg: {}'.format(name, avg_grad))
                            tbwriter.add_scalar('grad_avg/{}'.format(name), avg_grad.item(), total_steps)
                            tbwriter.add_histogram('grad/{}'.format(name),
                                    parameter.grad.cpu().numpy(), total_steps)
                        if parameter.data is not None:
                            avg_weight = torch.mean(parameter.data)
                            #print('\t{} - param_avg: {}'.format(name, avg_weight))
                            tbwriter.add_histogram('weight/{}'.format(name),
                                    parameter.data.cpu().numpy(), total_steps)
                            tbwriter.add_scalar('weight_avg/{}'.format(name), avg_weight.item(), total_steps)

            total_steps += 1
        print('Accuracy:', Acc_temp,'/1045' )

        # save checkpoints
        checkpoint_path = os.path.join(CHECKPOINT_DIR, 'alexnet_states_e{}.pkl'.format(epoch + 1))
        state = {
            'epoch': epoch,
            'total_steps': total_steps,
            'optimizer': optimizer.state_dict(),
            'model': alexnet.state_dict(),
            'seed': seed,
        }
        torch.save(state, checkpoint_path)

Used seed : 16904014131702502576
TensorboardX summary writer created
AlexNet created
Dataset created
Dataloader created
Optimizer created
LR Scheduler created
Starting training...


  cpuset_checked))


Epoch: 1 	Step: 10 	Loss: 0.0000 	Acc: 4
Epoch: 1 	Step: 20 	Loss: 0.0000 	Acc: 4
Epoch: 1 	Step: 30 	Loss: 135.4017 	Acc: 0
Epoch: 1 	Step: 40 	Loss: 0.1569 	Acc: 4
Epoch: 1 	Step: 50 	Loss: 0.0003 	Acc: 4
Epoch: 1 	Step: 60 	Loss: 3.4781 	Acc: 0
Epoch: 1 	Step: 70 	Loss: 3.5063 	Acc: 0
Epoch: 1 	Step: 80 	Loss: 2.3155 	Acc: 4
Epoch: 1 	Step: 90 	Loss: 3.1518 	Acc: 0
Epoch: 1 	Step: 100 	Loss: 1.3614 	Acc: 4
**********
Epoch: 1 	Step: 110 	Loss: 0.0074 	Acc: 4
Epoch: 1 	Step: 120 	Loss: 3.4430 	Acc: 0
Epoch: 1 	Step: 130 	Loss: 2.3126 	Acc: 4
Epoch: 1 	Step: 140 	Loss: 10.3736 	Acc: 0
Epoch: 1 	Step: 150 	Loss: 2.9925 	Acc: 0
Epoch: 1 	Step: 160 	Loss: 2.0725 	Acc: 3
Epoch: 1 	Step: 170 	Loss: 4.8652 	Acc: 0
Epoch: 1 	Step: 180 	Loss: 3.1562 	Acc: 0
Epoch: 1 	Step: 190 	Loss: 2.5877 	Acc: 0
Epoch: 1 	Step: 200 	Loss: 3.8907 	Acc: 0
**********
Epoch: 1 	Step: 210 	Loss: 3.0984 	Acc: 0
Epoch: 1 	Step: 220 	Loss: 1.8105 	Acc: 0
Epoch: 1 	Step: 230 	Loss: 3.5760 	Acc: 0
Epoch: 1 	Step: 24