In [1]:
from torchvision.models import list_models,get_model
from torchvision.datasets import CIFAR100
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import torch.nn.functional as F

from model import GoogLeNet,ResNet

In [2]:
googlenet = get_model("googlenet", weights=None)
resnet = get_model("resnet50", weights=None)
vgg = get_model("vgg11", weights=None)



In [3]:
class VGG(nn.Module):
    def __init__(self):
        super().__init__()
        self.vgg = get_model("vgg11")
        self.linear = nn.Sequential(
            nn.Linear(1000,256),
            nn.ReLU(),
            nn.Linear(256,256),
            nn.ReLU(),
            nn.Linear(256,100)
        )
    
    def forward(self,x):
        out = self.vgg(x)
        out = self.linear(out)
        return out

In [4]:
train_tfm = transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


test_tfm = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
])
train_set = CIFAR100(root='./', train = True, download= True,transform=train_tfm)
test_set = CIFAR100(root='./', train = False, download= True,transform=test_tfm)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
res_config = {
    "lr":1e-4,
    "weight_decay":1e-4,
}
google_config = {
    "lr":1e-4,
    "weight_decay":1e-4,
}
vgg_config = {
    "model":ResNet(num_classes=100),
    "lr":0.0003,
    "weight_decay":0,
}

In [6]:
batch_size = 16
num_epoch = 40
device = "cuda" if torch.cuda.is_available() else "cpu"
config = vgg_config

In [7]:
def collate_fn(batch):
    images,target = list(zip(*batch))
    target = nn.functional.one_hot(torch.tensor(target), num_classes=100).to(torch.float32)
    images = torch.stack(images)
    return images,target

In [8]:
validation_split = 0.1
val_size = int(validation_split * len(train_set))
train_size = len(train_set) - val_size
train_set, valid_set = random_split(train_set, [train_size, val_size], generator=torch.Generator().manual_seed(0))

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

model = config['model']
model.to(device)
model.train()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=config['model'].parameters(),lr=config['lr'],weight_decay=config['weight_decay'])

In [9]:
print(len(valid_set))

5000


In [11]:
def valid(valid_loader, model, loss_fn):
    show_bar = tqdm(valid_loader, leave=False)
    show_bar.set_description(f'[Valid]')
    model.eval()
    acc_recoder = []
    loss_recoder = []
    for idx, batch in enumerate(show_bar):
        images, target = batch
        images = images.to('cuda')
        target = target.to('cuda')

        output = model(images)
        print(images.shape)
        loss = loss_fn(output, target).item()

        acc = (output.argmax(dim=-1) == target).float().mean().item()

        if (idx + 1) % 5 == 0:
            show_bar.set_postfix({'loss': f'{loss:.5f}', 'acc': f'{acc:.4f}'})
        loss_recoder.append(loss)
        acc_recoder.append(acc)
    logger.info(
        f'Valid loss={(sum(loss_recoder) / len(loss_recoder)):.5f} ,acc={(sum(acc_recoder) / len(acc_recoder)):.5f}')

valid(valid_loader,model,loss_fn)

  0%|          | 0/313 [00:00<?, ?it/s]

torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size

torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([16, 3, 224, 224])
torch.Size([8, 3, 224, 224])


NameError: name 'logger' is not defined

In [10]:
    for epoch in range(num_epoch):
        # loss_recoder, acc_recoder = train_epoch(train_loader, model, loss_fn, optimizer, epoch)
        # logger.info(
        #     f"Epoch {epoch + 1}: loss={(sum(loss_recoder) / len(loss_recoder)):.5f} ,acc={(sum(acc_recoder) / len(acc_recoder)):.5f}")
        # total_loss += loss_recoder
        # total_acc += acc_recoder

        valid(valid_loader,model,loss_fn)

  0%|          | 0/10000 [00:00<?, ?it/s]

torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([1,

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 10.00 GiB total capacity; 9.09 GiB already allocated; 0 bytes free; 9.28 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [12]:
torch.save(model.state_dict(),'google_3.1loss.pth')