In [1]:
import numpy as np
import time
import os
import matplotlib.pyplot as plt
from fastai.vision import *
import torch
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets,models as torch_models
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

In [2]:
%load_ext tensorboard

In [3]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
    -O /tmp/cats_and_dogs_filtered.zip

--2021-08-27 05:41:17--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.5.128, 74.125.206.128, 64.233.184.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.5.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘/tmp/cats_and_dogs_filtered.zip’


2021-08-27 05:41:19 (43.8 MB/s) - ‘/tmp/cats_and_dogs_filtered.zip’ saved [68606236/68606236]



In [4]:
# ### Using tensorboard with google colab live ###
# !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
# !unzip ngrok-stable-linux-amd64.zip

In [5]:
# from IPython import get_ipython
# import os
# LOG_DIR = 'runs'
# os.makedirs(LOG_DIR, exist_ok=True)
# get_ipython().system_raw(
#     'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
#     .format(LOG_DIR))

In [6]:
# get_ipython().system_raw('./ngrok http 6006 &')

In [7]:
# ! curl -s http://localhost:4040/api/tunnels | python3 -c \
#     "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

In [8]:
import os
import zipfile

with zipfile.ZipFile('/tmp/cats_and_dogs_filtered.zip','r') as f:
  f.extractall('/tmp')

In [9]:
# !ls /tmp/cats_and_dogs_filtered/validation

In [10]:
train_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

test_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

In [11]:
train_dataset = datasets.ImageFolder(root='/tmp/cats_and_dogs_filtered/train',transform=train_transforms)
test_dataset = datasets.ImageFolder(root='/tmp/cats_and_dogs_filtered/validation',transform=test_transforms)

In [12]:
BATCH_SIZE = 32
EPOCHS = 10

DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
train_dataloader = DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle = True)
test_dataloader = DataLoader(test_dataset,batch_size = BATCH_SIZE,shuffle = True)

In [13]:
image,label = next(iter(train_dataloader))
print(image.shape,label.shape)

torch.Size([32, 3, 224, 224]) torch.Size([32])


In [14]:
net = torch_models.resnet34(pretrained = True)
for param in net.parameters():
  param.requires_grad = True
num_feats = net.fc.in_features
net.fc = torch.nn.Linear(num_feats,1)

net.to(DEVICE)
# print(net)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth


  0%|          | 0.00/83.3M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [15]:
optimizer = torch.optim.Adam(net.parameters(),lr = 0.0001)
criterion = torch.nn.BCEWithLogitsLoss()

In [16]:
def model_acc(out,labels):
  preds = (out.cpu().numpy() > 0.5)
  # print(preds,labels.cpu().numpy())
  preds = preds.reshape(-1,1)
  label_numpy = labels.cpu().numpy()
  res = np.mean(preds == label_numpy)
  # print(res)
  # print(preds.shape,labels.shape)
  # print(preds.cpu().numpy(),labels.cpu().numpy())
  return res

In [17]:
def train_loop(model,optimizer,criterion,train_dl,valid_dl,writer = None):
  
  train_loss = []
  test_loss = []
  train_acc = []
  test_acc = []
  for epoch in range(EPOCHS):
    t0 = time.time()
    train_batch_loss = []
    train_batch_acc = []
    for inputs,targets in train_dl:
      targets = targets.view(-1,1).float()
      # print(inputs.size(),targets.size())
      inputs,targets = inputs.to(DEVICE),targets.to(DEVICE)
      optimizer.zero_grad()
      outputs = model(inputs)
      # print(outputs.size(),targets.size())
      loss = criterion(outputs,targets)
      # for param in model.parameters():
      #   print(param.requires_grad)
      loss.backward()
      optimizer.step()
      train_batch_loss.append(loss.item())
      # print(outputs.clone().detach().cpu().numpy(),targets.cpu().numpy())
      train_batch_acc.append(model_acc(outputs.clone().detach(),targets))
      writer.add_scalar('Loss/train',loss.item(),epoch)
      writer.add_scalar('Accuracy/train',train_batch_acc[-1],epoch)
    epoch_loss = np.mean(train_batch_loss)
    train_loss.append(epoch_loss)
    epoch_acc = np.mean(train_batch_acc)
    train_acc.append(epoch_acc)
    # for param in model.parameters():
    #   print(param.requires_grad)

    test_batch_loss = []
    test_batch_acc = []
    for inputs,targets in valid_dl:
      targets = targets.view(-1,1).float()
      inputs,targets = inputs.to(DEVICE),targets.to(DEVICE)
      outputs = model(inputs)
      loss = criterion(outputs,targets)
      test_batch_loss.append(loss.item())
      test_batch_acc.append(model_acc(outputs.clone().detach(),targets))
      writer.add_scalar('Loss/test',loss.item(),epoch)
      writer.add_scalar('Accuracy/test',test_batch_acc[-1],epoch)
    epoch_test_loss = np.mean(test_batch_loss)
    test_loss.append(epoch_test_loss)
    epoch_test_acc = np.mean(test_batch_acc)
    test_acc.append(epoch_test_acc)
    
    time_taken = time.time() - t0
    print('-----------------------------------------------------------------------------')
    print(f'Epoch : {epoch},Time : {time_taken:.4f},train loss : {epoch_loss:.4f},test loss : {epoch_test_loss:.4f}')
    print(f'train acc : {epoch_acc:.4f} , test acc : {epoch_test_acc:.4f}')
    # print('train acc list : ',train_acc)
    # print('test acc list : ',test_acc)
    print('-----------------------------------------------------------------------------')
  writer.flush()
  return train_loss,test_loss,train_acc,test_acc

In [18]:
writer = SummaryWriter()

In [21]:
%tensorboard --logdir=runs/ --port=9000

Reusing TensorBoard on port 9000 (pid 132), started 0:07:06 ago. (Use '!kill 132' to kill it.)

<IPython.core.display.Javascript object>

In [20]:
train_loss,test_loss,train_acc,test_acc = train_loop(net,optimizer,criterion,train_dataloader,test_dataloader,writer = writer)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


-----------------------------------------------------------------------------
Epoch : 0,Time : 42.3746,train loss : 0.1112,test loss : 0.0773
train acc : 0.9588 , test acc : 0.9707
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
Epoch : 1,Time : 42.3122,train loss : 0.0275,test loss : 0.0537
train acc : 0.9916 , test acc : 0.9795
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
Epoch : 2,Time : 42.0916,train loss : 0.0059,test loss : 0.0393
train acc : 0.9995 , test acc : 0.9854
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
Epoch : 3,Time : 42.2806,train loss : 0.0022,test loss : 0.0523
train acc : 1.0000 , test acc : 0.9785
------------------------------------------

In [None]:
# writer.close()