# Import

In [None]:
import os
import sys
import time
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

sys.version

%pwd

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

from torchvision import models,transforms,datasets
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid

torch.__version__

Check if GPU is present:

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print('Using gpu: %s ' % torch.cuda.is_available())

# Get dataset

## Dowload dataset

In [None]:
# !wget --no-check-certificate https://xxxxx/dogs-vs-cats.zip -O /tmp/dogs_and_cats.zip

data_zip = "/mnt/datasets/ml/dogs-vs-cats.zip"

## Get from Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%ls /content/drive/MyDrive/

drive_root = '/content/drive/MyDrive/'

%ls $drive_root/dataset

data_zip = os.path.join(drive_root, 'dataset/dogs-vs-cats.zip')

# Prepare dataset

In [None]:
%mkdir -p /tmp/dogs-vs-cats

data_root = '/tmp/dogs-vs-cats'

%cd $data_root

%ls $data_zip

In [None]:
import zipfile
zip_ref = zipfile.ZipFile(data_zip, 'r')
zip_ref.extractall(data_root)
zip_ref.close()

In [None]:
%ls /tmp/dogs-vs-cats

In [None]:
# %cd /tmp/dogs-vs-cats/
# %mkdir train

train_zip = os.path.join(data_root, 'train.zip')
zip_ref = zipfile.ZipFile(train_zip, 'r')
zip_ref.extractall(data_root)
zip_ref.close()

In [None]:
%ls /tmp/dogs-vs-cats

In [None]:
%ls /tmp/dogs-vs-cats/train/ | more

# Create validation data set

In [None]:
from glob import glob

files = glob(os.path.join(data_root, '*/*.jpg'))

print(data_root)

no_of_images = len(files)
print(f'Total no of images {no_of_images}')

In [None]:
os.mkdir(os.path.join(data_root,'valid'))

%ls $data_root

In [None]:
for t in ['train','valid']:
    for folder in ['dog/','cat/']:
        os.mkdir(os.path.join(data_root, t, folder))

In [None]:
%ls $data_root/train

In [None]:
%ls $data_root/valid

In [None]:
import numpy as np

shuffle = np.random.permutation(no_of_images)

for i in shuffle[:2000]:
    #shutil.copyfile(files[i],'../chapter3/dogsandcats/valid/')
    folder = files[i].split('/')[-1].split('.')[0]
    image = files[i].split('/')[-1]
    os.rename(files[i],os.path.join(data_root,'valid',folder,image))

for i in shuffle[2000:]:
    #shutil.copyfile(files[i],'../chapter3/dogsandcats/valid/')
    folder = files[i].split('/')[-1].split('.')[0]
    image = files[i].split('/')[-1]
    os.rename(files[i],os.path.join(data_root,'train',folder,image))

In [None]:
%ls /tmp/dogs-vs-cats/train/

In [None]:
%ls /tmp/dogs-vs-cats/valid/

# Load data into PyTorch tensors

In [None]:

simple_transform = transforms.Compose([transforms.Resize((224,224))
                                       ,transforms.ToTensor()
                                       ,transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

train = datasets.ImageFolder('/tmp/dogs-vs-cats/train/',simple_transform)
valid = datasets.ImageFolder('/tmp/dogs-vs-cats/valid/',simple_transform)

In [None]:
print(train.classes)  #Category determined by the name of the division folder
print(train.class_to_idx) #The index is 0,1 according to the order.
print(train.imgs) #Returns the path of the image obtained from all folders and their categories

In [None]:
def imshow(inp):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)

In [None]:
imshow(train[50][0])

# Sample 1

## Create data generators

In [None]:
train_data_gen = torch.utils.data.DataLoader(train,batch_size=64,num_workers=2, shuffle=True)
valid_data_gen = torch.utils.data.DataLoader(valid,batch_size=64,num_workers=2)

In [None]:
dataset_sizes = {'train':len(train_data_gen.dataset),'valid':len(valid_data_gen.dataset)}

In [None]:
dataloaders = {'train':train_data_gen,'valid':valid_data_gen}

## Create a network

In [None]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

if torch.cuda.is_available():
    model_ft = model_ft.cuda()
    print('cuda true')

In [None]:
model_ft

In [None]:
# Loss and Optimizer
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if torch.cuda.is_available():
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                if len(list(loss.data.size())) != 0: # cggos 20211120
                  running_loss += loss.data[0]
                else:
                  running_loss += loss.data.item()
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

# Sample 2

## Create data generators

In [None]:
train_data_loader = torch.utils.data.DataLoader(train,batch_size=32,num_workers=3,shuffle=True)
valid_data_loader = torch.utils.data.DataLoader(valid,batch_size=32,num_workers=3,shuffle=True)

## Create a network

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(56180, 500)
        self.fc2 = nn.Linear(500,50)
        self.fc3 = nn.Linear(50, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x,training=self.training)
        x = self.fc3(x)
        return F.log_softmax(x,dim=1)

In [None]:
model = Net()

is_cuda = False
if torch.cuda.is_available():
  is_cuda = True
  model.cuda()

In [None]:
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5)

In [None]:
def fit(epoch,model,data_loader,phase='training',volatile=False):
    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()
        volatile=True
    running_loss = 0.0
    running_correct = 0
    for batch_idx , (data,target) in enumerate(data_loader):
        if is_cuda:
            data,target = data.cuda(),target.cuda()
        data , target = Variable(data,volatile),Variable(target)
        if phase == 'training':
            optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output,target)

        loss_tmp = F.nll_loss(output,target,size_average=False)

        if len(list(loss_tmp.data.size())) != 0: # cggos 20211120
            running_loss += loss_tmp.data[0]
        else:
            running_loss += loss_tmp.data.item()
        preds = output.data.max(dim=1,keepdim=True)[1]
        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()
        if phase == 'training':
            loss.backward()
            optimizer.step()

    loss = running_loss/len(data_loader.dataset)
    accuracy = 100. * running_correct/len(data_loader.dataset)

    print(f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}')
    return loss,accuracy

In [None]:
train_losses , train_accuracy = [],[]
val_losses , val_accuracy = [],[]
for epoch in range(1,20):
    epoch_loss, epoch_accuracy = fit(epoch,model,train_data_loader,phase='training')
    val_epoch_loss , val_epoch_accuracy = fit(epoch,model,valid_data_loader,phase='validation')
    train_losses.append(epoch_loss)
    train_accuracy.append(epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)

In [None]:
plt.plot(range(1,len(train_losses)+1),train_losses,'bo',label = 'training loss')
plt.plot(range(1,len(val_losses)+1),val_losses,'r',label = 'validation loss')
plt.legend()

In [None]:
plt.plot(range(1,len(train_accuracy)+1),train_accuracy,'bo',label = 'train accuracy')
plt.plot(range(1,len(val_accuracy)+1),val_accuracy,'r',label = 'val accuracy')
plt.legend()