In [19]:
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as Datasets
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [20]:
class VGG(nn.Module):
    def __init__(self, features, output_dim):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            #nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            #nn.Dropout(),
            nn.Linear(4096, output_dim),
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        h = x.view(x. shape[0], -1)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x, h
        

In [21]:
vgg11_config = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

In [22]:
def get_vgg_layers(config, batch_norm):
    layers = []
    in_channels = 3
    
    for c in config:
        assert c == 'M' or isinstance(c, int)
        if c == 'M':
            layers += [nn.MaxPool2d(kernel_size=2)]
        else:
            conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(c), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = c
            
    return nn.Sequential(*layers)

In [23]:
vgg11_layers = get_vgg_layers(vgg11_config, batch_norm=True)

In [24]:
OUTPUT_DIM = 2
model = VGG(vgg11_layers, OUTPUT_DIM).to(device)
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [25]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [26]:
train_path = './catanddog/train'
test_path = './catanddog/test'

train_dataset = Datasets.ImageFolder(train_path, transform=train_transforms)
test_dataset = Datasets.ImageFolder(test_path, transform=test_transforms)

print('Train dataset size:', len(train_dataset))
print('Test dataset size:', len(test_dataset))

Train dataset size: 529
Test dataset size: 12


In [27]:
VALID_RATIO = 0.7
n_train_examples = int(len(train_dataset) * VALID_RATIO)
n_valid_examples = len(train_dataset) - n_train_examples

train_data, valid_data = data.random_split(train_dataset, [n_train_examples, n_valid_examples])

In [28]:
valid_data = copy.deepcopy(valid_data)
valid_data.dataset.transform = test_transforms

In [29]:
print(f"Number of training examples: {len(train_data)}")
print(f"Number of validation examples: {len(valid_data)}")
print(f"Number of testing examples: {len(test_dataset)}")

Number of training examples: 370
Number of validation examples: 159
Number of testing examples: 12


In [30]:
BATCH_SIZE = 64
train_iterator = data.DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE)
valid_iterator = data.DataLoader(valid_data, batch_size=BATCH_SIZE)
test_iterator = data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [31]:
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

model = model.to(device)
criterion = criterion.to(device)

In [32]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [None]:
def train(model, iterator, optimizer, criterion, device, k):
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    alpha = optimizer.param_groups[0]['lr']
    
    # 추적할 가중치들 : 총 누적 기울기 값 저장
    gradients = {}

    for (x, y) in iterator:
        x = x.to(device)
        y = y.to(device)
        
        for param in model.parameters():
            if param.grad is not None:
                param.grad.zero_()
        
        y_pred, _ = model(x)
        
        loss = criterion(y_pred, y)
        acc = calculate_accuracy(y_pred, y)
        
        loss.backward()
        
        # 기울기 값 추적 (각 파라미터마다 저장)
        for param in model.parameters():
            if param.grad is not None:
                gradients[param] = param.grad.abs().clone() 
                
        # 모든 가중치에 대해 상위 k개 기울기만 추적
        for param in model.parameters():
            if param.grad is not None:
                # 기울기의 절댓값을 기준으로 상위 k개 기울기 추적
                grad_values = gradients[param].view(-1)  
                topk_values, _ = grad_values.topk(k, largest=True) 
                
                threshold = topk_values[-1]  # 상위 k개 기울기의 최소값 (임계값)
                
                # 임계값 이상이면 해당 기울기로 업데이트, 아니면 랜덤 값으로 대체
                mask = gradients[param] >= threshold
                random_update = 0 * (1 - mask.float()) 
                param.data -= alpha * torch.where(mask, param.grad, random_update) 
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [34]:
def evaluate(model, iterator, criterion, device):
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        for (x, y) in iterator:
            x = x.to(device)
            y = y.to(device)

            y_pred, _ = model(x)

            loss = criterion(y_pred, y)

            acc = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [35]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [36]:
EPOCHS = 10
best_valid_loss = float('inf')
total_time = 0
for epoch in range(EPOCHS):
    start_time = time.monotonic()
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device, 1)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)
    
    #if valid_loss < best_valid_loss:
        #best_valid_loss = valid_loss
        #torch.save(model.state_dict(), 'vgg19-model.pt')
    
    end_time = time.monotonic()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    total_time += end_time - start_time
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 1m 13s
	Train Loss: 0.694 | Train Acc: 49.11%
	 Val. Loss: 0.693 |  Val. Acc: 48.94%
Epoch: 02 | Epoch Time: 1m 16s
	Train Loss: 0.696 | Train Acc: 48.22%
	 Val. Loss: 0.693 |  Val. Acc: 48.94%
Epoch: 03 | Epoch Time: 1m 14s
	Train Loss: 0.696 | Train Acc: 47.21%
	 Val. Loss: 0.693 |  Val. Acc: 49.46%
Epoch: 04 | Epoch Time: 1m 14s
	Train Loss: 0.694 | Train Acc: 47.81%
	 Val. Loss: 0.693 |  Val. Acc: 45.75%
Epoch: 05 | Epoch Time: 1m 14s
	Train Loss: 0.692 | Train Acc: 52.08%
	 Val. Loss: 0.693 |  Val. Acc: 45.26%
Epoch: 06 | Epoch Time: 1m 17s
	Train Loss: 0.695 | Train Acc: 45.46%
	 Val. Loss: 0.693 |  Val. Acc: 45.78%
Epoch: 07 | Epoch Time: 1m 17s
	Train Loss: 0.693 | Train Acc: 51.56%
	 Val. Loss: 0.691 |  Val. Acc: 51.09%
Epoch: 08 | Epoch Time: 1m 15s
	Train Loss: 0.696 | Train Acc: 47.03%
	 Val. Loss: 0.691 |  Val. Acc: 51.61%
Epoch: 09 | Epoch Time: 1m 15s
	Train Loss: 0.693 | Train Acc: 50.68%
	 Val. Loss: 0.690 |  Val. Acc: 52.10%
Epoch: 10 | Epoch T

In [37]:
print('Vgg11 with Droback')
print(f'Total training time: {int(total_time/60)}m {int(total_time%60)}s')

Vgg11 with Droback
Total training time: 12m 34s
