In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
device = 'cuda' if torch.cuda.is_available() else 'cpu'

NUM_EPOCHS = 90
BATCH_SIZE = 128
MOMENTUM = 0.9
LR_DECAY = 0.0005
LR_INIT = 0.01
IMAGE_DIM = 227
NUM_CLASSES = 100

In [None]:
import os
from glob import glob
base_path = '/content/gdrive/MyDrive'
folder = 'CiFAR10_datasets'
path_list = [base_path, folder]
path = os.path.join(*path_list)

os.chdir(base_path)
if os.path.exists(folder):
    print("folder already exists!")
else:
    os.makedirs(folder)
    print("folder create complete!")

In [None]:
transform = transforms.Compose([
                                transforms.ToTensor(),
])

train_dataset = torchvision.datasets.CIFAR10(root=path,
                                              train=True,
                                              download=True,
                                              transform=transform)
val_dataset = torchvision.datasets.CIFAR10(root=path,
                                            train=False,
                                            download=True,
                                            transform=transform)

train_mean = train_dataset.data.mean(axis=(0,1,2))      # 축의 평균을 구할 때는 축을 없앤다고 생각하면 편하다.
                                                        # 3 channel에 대한 각각의 평균을 구하고 싶으니
                                                        # (batch, h, w, channel)에서 0,1,2축을 없애면 된다.
train_std = train_dataset.data.std(axis=(0,1,2))
val_mean = val_dataset.data.mean(axis=(0,1,2))
val_std = val_dataset.data.std(axis=(0,1,2))

train_mean = train_mean/255
train_std = train_std/255

val_mean = val_mean/255
val_std = val_std/255
transform_train = transforms.Compose([
                                      transforms.Resize((227,227)),
                                   transforms.CenterCrop(IMAGE_DIM),
                                   transforms.ToTensor(),
                                   transforms.Normalize(train_mean,train_std),
])
transform_val = transforms.Compose([
                                    transforms.Resize((227,227)),
                                   transforms.CenterCrop(IMAGE_DIM),
                                   transforms.ToTensor(),
                                   transforms.Normalize(val_mean,val_std),
])
train_dataset = torchvision.datasets.CIFAR10(root=path,
                                              train=True,
                                              download=True,
                                              transform=transform_train)
val_dataset = torchvision.datasets.CIFAR10(root=path,
                                            train=False,
                                            download=True,
                                            transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle=False, num_workers=2)


In [None]:

class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super().__init__()
        self.num_classes = num_classes
        self.net = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(96,256,kernel_size=5, stride=1,padding=2),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=(256*6*6), out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=4096, out_features=num_classes),
        )

        self.init_bias()

    def init_bias(self):
        for m in self.net:
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, mean=0, std=0.01)
                nn.init.constant_(m.bias, 0)
            nn.init.constant_(self.net[4].bias, 1)
            nn.init.constant_(self.net[10].bias, 1)
            nn.init.constant_(self.net[12].bias, 1)

    def forward(self,x):
        x=self.net(x)
        x=x.view(x.shape[0], -1)
        x=self.classifier(x)
        return x

In [None]:
torch.manual_seed(7777)
if device == "cuda":
    torch.cuda.manual_seed_all(7777)

model = AlexNet(10).to(device)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(params=model.parameters(), lr=LR_INIT, momentum=MOMENTUM, weight_decay=LR_DECAY)
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
val_every = 1

In [None]:
def train(num_epochs, model, data_loader, criterion, optimizer, saved_dir, val_every, device):
    print('start training..')
    best_loss = 9999999
    model.train()
    for epoch in range(num_epochs):
        for i, (imgs, labels) in enumerate(data_loader):
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)            
            loss.backward()     
            optimizer.step()

            _, argmax = torch.max(outputs, 1)
            accuracy = (labels==argmax).float().mean()

            if (i+1)%10==0:
                print("Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%, learning rate : {}".format(epoch+1, num_epochs, i+1, len(data_loader), loss.item(), accuracy.item()*100, get_lr(optimizer)))
        if (epoch + 1) % val_every == 0:
            avg_loss = validation(epoch+1, model, val_loader, criterion, device)
            if avg_loss < best_loss:
                print( "Best performance at epoch:{}".format(epoch+1))
                print( "Save model in", saved_dir)
                best_loss = avg_loss
                save_model(model, saved_dir)

In [None]:
def validation(epoch, model, data_loader, criterion, device):
    print("Start validation #{}".format(epoch))
    model.eval()
    with torch.no_grad():
        total = 0
        total_loss = 0
        cnt = 0
        correct = 0
        for i, (images, labels) in enumerate(data_loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss
            total += images.size(0)

            _, predict = torch.max(outputs, 1)
            correct += (labels==predict).sum().item()
            cnt += 1
        avg_loss = total_loss / cnt
        print("Validation #{} Accuracy: {:.2f}% Average Loss: {:.4f}".format(epoch, correct/total*100, avg_loss))
    model.train()
    return avg_loss


In [None]:
def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']

In [None]:
def save_model(model, saved_dir, file_name='best_model.pt'):
    os.makedirs(saved_dir, exist_ok=True)   # 해당 디렉토리가 존재할 시 에러를 반환하지 않는다.
    check_point = {
        'net': model.state_dict()
    }
    output_path = os.path.join(saved_dir, file_name)
    torch.save(check_point, output_path)

In [None]:
train(100, model, train_loader, criterion, optimizer, path, val_every, device)