<a href="https://colab.research.google.com/github/UOS-COMP6252/public/blob/main/lecture7/data-augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1 style="text-align: center;">COMP6252 Deep Learning Technologies</h1>
<h2 style="text-align: center;"> Data Augmentation</h2>

In [None]:
## We will be using comet_ml to log our experiments
import getpass
import os
try:
  import comet_ml
except ModuleNotFoundError:
  %pip install comet_ml
  import comet_ml
comet_api_key=os.environ.get("COMET_API_KEY")
if comet_api_key is None:
  comet_api_key=getpass.getpass("Enter key")

In [None]:
%%sh
if [ ! -f flowers-recognition.zip ] 
then
   kaggle datasets download -d alxmamaev/flowers-recognition
fi
if [ ! -d flowers ] 
then
   unzip flowers-recognition.zip >/dev/null
fi


In [None]:
import torch
import torchvision
from torchvision.transforms import TrivialAugmentWide,AugMix,AutoAugment,RandAugment,Resize,ToTensor,Compose
from torch.utils.data import DataLoader,Dataset, random_split
dataset=torchvision.datasets.ImageFolder("flowers")

In [None]:
seed=97531
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic=True

In [None]:
class MyDataset(Dataset):
    def __init__(self,subset,transform=None):
        self.subset=subset
        self.transform=transform
    def __getitem__(self,idx):
        x,y=self.subset[idx]
        if self.transform:
            x=self.transform(x)
        return x,y
    def __len__(self):
        return len(self.subset)

In [None]:
len(dataset)

In [None]:
#train_d,valid_d,test_d=random_split(dataset,lengths=[0.7,0.2,0.1])
train_d,valid_d=random_split(dataset,lengths=[0.8,0.2])


In [None]:
augment_choices={'trivial':TrivialAugmentWide(),'random':RandAugment(),'mix':AugMix(),'auto':AutoAugment()}
#train_t=Compose([RandomHorizontalFlip(),Resize((180,180)),ToTensor()])
augmentation='trivial'
valid_t=Compose([Resize((180,180)),ToTensor()])
if augmentation=='None':
    train_t=valid_t
else:
    train_t=Compose([augment_choices[augmentation],Resize((180,180)),ToTensor()])

In [None]:
train_dt=MyDataset(train_d,train_t)
valid_dt=MyDataset(valid_d,valid_t)
#test_dt=MyDataset(test_d,test_t)

In [None]:
train_loader=DataLoader(train_dt,batch_size=32,shuffle=True,num_workers=2)
valid_loader=DataLoader(valid_dt,batch_size=32,shuffle=False,num_workers=2)

In [None]:
itr=iter(train_loader)
imgs,labels=next(itr)

In [None]:
import torch.nn as nn
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        #(3,180,180)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=16,kernel_size=3,padding='same')
        self.pool1=nn.MaxPool2d(2)
        #(16,90,90)
        self.conv2=nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,padding='same')
        self.pool2=nn.MaxPool2d(2)
        #(32,45,45)
        self.conv3=nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,padding='same')
        self.pool3=nn.MaxPool2d(2)
        #(64,22,22)
        self.relu=nn.ReLU()
        self.fc1=nn.Linear(in_features=64*22*22,out_features=128)
        self.fc2=nn.Linear(in_features=128,out_features=5)
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.relu(x)
        x=self.pool2(x)
        x=self.conv3(x)
        x=self.relu(x)
        x=self.pool3(x)
        x=x.view(x.size()[0],-1)
        x=self.fc1(x)
        x=self.relu(x)
        x=self.fc2(x)
        return x



In [None]:
model=Model()

In [None]:
def accuracy(model,batch,loss_fn):
    imgs,labels=batch
    imgs=imgs.cuda()
    labels=labels.cuda()
    outputs=model(imgs)
    _,pred=torch.max(outputs,dim=1)
    acc=torch.sum(pred==labels).item()
    loss=loss_fn(outputs,labels)
    return loss,torch.tensor(acc),len(labels)

@torch.no_grad() 
def evaluate(model,loader,loss_fn):
    model.eval()
    # crit is a list of pairs of tensors
    crit=[accuracy(model,batch,loss_fn) for batch in loader]
    crit=torch.tensor(crit)
    m=crit.mean(dim=0)
    loss=m[0]
    acc=m[1]
    bsize=m[2]
    return loss/bsize,acc/bsize

In [None]:
experiment = comet_ml.Experiment(api_key=comet_api_key,workspace="COMP6252",project_name="data-augmentation",auto_metric_logging=False, auto_output_logging=False)

In [None]:
from torch.optim import Adam
loss_fn=nn.CrossEntropyLoss()
optimizer=Adam(model.parameters())
model=model.cuda()
epochs=100
for epoch in range(epochs):
    model.train()
    epoch_loss=0.
    for imgs,labels in train_loader:
        imgs=imgs.cuda()
        labels=labels.cuda()
        outputs=model(imgs)
        loss=loss_fn(outputs,labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        epoch_loss=0.9*epoch_loss+0.1*loss.item()
    vl,va=evaluate(model,valid_loader,loss_fn)
    tl,ta=evaluate(model,train_loader,loss_fn)
    experiment.log_metrics({'loss':tl,'valid_loss':vl,'train_acc':ta,'val_accuracy':va}, epoch=epoch)
    if epoch%10==0:
        print("Epoch[{}]".format(epoch))
        print("Train: loss={:.4f}, accuracy={:.4f}".format(tl,ta))
        print("Valid: loss={:.4f}, accuracy={:.4f}".format(vl,va))
        print("----------------------")


In [None]:
experiment.end()