In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os


In [None]:
pip install split-folders

[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
import splitfolders
splitfolders.ratio('/content/drive/MyDrive/Dogs', output="/content/drive/MyDrive/Newdogs", seed=1337, ratio=(.8, 0.1,0.1))

Copying files: 0 files [00:00, ? files/s]

KeyboardInterrupt: 

In [None]:
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms as T # for simplifying the transforms
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split
from torchvision import models

In [None]:
# Kaggle doesn't have 'timm' installed by default
!pip install timm
import timm
from timm.loss import LabelSmoothingCrossEntropy

Collecting timm
  Downloading timm-0.9.16-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: timm
Successfully installed timm-0.9.16


In [None]:
# remove warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import sys
from tqdm import tqdm
import time
import copy

In [None]:
def get_classes(data_dir):
    all_data = datasets.ImageFolder(data_dir)
    return all_data.classes

In [None]:
def get_data_loaders(data_dir, batch_size, train = False):
    if train:
        #train
        transform = T.Compose([
            T.RandomHorizontalFlip(),
            T.RandomVerticalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter()]), p=0.25),
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD), # imagenet means
            T.RandomErasing(p=0.1, value='random')
        ])
        train_data = datasets.ImageFolder(os.path.join(data_dir, "/content/drive/MyDrive/Newdogs/train/"), transform = transform)
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return train_loader, len(train_data)
    else:
        # val/test
        transform = T.Compose([ # We dont need augmentation for test transforms
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD), # imagenet means
        ])
        val_data = datasets.ImageFolder(os.path.join(data_dir, "/content/drive/MyDrive/Newdogs/val/"), transform=transform)
        test_data = datasets.ImageFolder(os.path.join(data_dir, "/content/drive/MyDrive/Newdogs/test/"), transform=transform)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)
        test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return val_loader, test_loader, len(val_data), len(test_data)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
dataset_path = "Newdogs"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
(train_loader, train_data_len) = get_data_loaders(dataset_path, 128, train=True)
(val_loader, test_loader, valid_data_len, test_data_len) = get_data_loaders(dataset_path, 32, train=False)


In [None]:
classes = get_classes("/content/drive/MyDrive/Newdogs/train/")
print(classes, len(classes))

['Bacterial_dermatosis', 'Fungal_infections', 'Healthy', 'Hypersensitivity_allergic_dermatosis'] 4


In [None]:
dataloaders = {
    "train": train_loader,
    "val": val_loader
}
dataset_sizes = {
    "train": train_data_len,
    "val": valid_data_len
}

In [None]:
print(len(train_loader), len(val_loader), len(test_loader))

3 2 2


In [None]:
print(train_data_len, valid_data_len, test_data_len)

351 42 47


In [None]:
# now, for the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [None]:
HUB_URL = "SharanSMenon/swin-transformer-hub:main"
MODEL_NAME = "swin_tiny_patch4_window7_224"
# check hubconf for more models.
model = torch.hub.load(HUB_URL, MODEL_NAME, pretrained=True) # load from torch hub

Downloading: "https://github.com/SharanSMenon/swin-transformer-hub/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth" to /root/.cache/torch/hub/checkpoints/swin_tiny_patch4_window7_224.pth
100%|██████████| 109M/109M [00:01<00:00, 94.5MB/s] 


In [None]:
for param in model.parameters(): #freeze model
    param.requires_grad = False

n_inputs = model.head.in_features
model.head = nn.Sequential(
    nn.Linear(n_inputs, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, len(classes))
)
model = model.to(device)
print(model.head)

Sequential(
  (0): Linear(in_features=768, out_features=512, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.3, inplace=False)
  (3): Linear(in_features=512, out_features=4, bias=True)
)


In [None]:
criterion = LabelSmoothingCrossEntropy()
criterion = criterion.to(device)
optimizer = optim.AdamW(model.head.parameters(), lr=0.001)

In [None]:
# lr scheduler
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.97)

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=40):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print("-"*10)

        for phase in ['train', 'val']: # We do training and validation phase per epoch
            if phase == 'train':
                model.train() # model to training mode
            else:
                model.eval() # model to evaluate

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'): # no autograd makes validation go faster
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1) # used for accuracy
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step() # step at end of epoch

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc =  running_corrects.double() / dataset_sizes[phase]

            print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict()) # keep the best validation accuracy model
        print()
    time_elapsed = time.time() - since # slight error
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print("Best Val Acc: {:.4f}".format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=40) # now it is a lot faster
# I will come back after 10 epochs

Epoch 0/39
----------


100%|██████████| 3/3 [02:52<00:00, 57.46s/it]


train Loss: 1.3323 Acc: 0.3504


100%|██████████| 2/2 [00:23<00:00, 11.51s/it]


val Loss: 1.2337 Acc: 0.4524

Epoch 1/39
----------


100%|██████████| 3/3 [02:26<00:00, 48.75s/it]


train Loss: 1.1293 Acc: 0.5641


100%|██████████| 2/2 [00:14<00:00,  7.41s/it]


val Loss: 1.1288 Acc: 0.6429

Epoch 2/39
----------


100%|██████████| 3/3 [02:23<00:00, 47.94s/it]


train Loss: 0.9804 Acc: 0.7094


100%|██████████| 2/2 [00:14<00:00,  7.40s/it]


val Loss: 1.0575 Acc: 0.5952

Epoch 3/39
----------


100%|██████████| 3/3 [02:17<00:00, 45.75s/it]


train Loss: 0.8933 Acc: 0.7436


100%|██████████| 2/2 [00:15<00:00,  7.60s/it]


val Loss: 0.9836 Acc: 0.6667

Epoch 4/39
----------


100%|██████████| 3/3 [02:19<00:00, 46.40s/it]


train Loss: 0.8143 Acc: 0.7749


100%|██████████| 2/2 [00:15<00:00,  7.53s/it]


val Loss: 0.9281 Acc: 0.7143

Epoch 5/39
----------


100%|██████████| 3/3 [02:16<00:00, 45.51s/it]


train Loss: 0.7350 Acc: 0.8319


100%|██████████| 2/2 [00:14<00:00,  7.48s/it]


val Loss: 0.8965 Acc: 0.7143

Epoch 6/39
----------


100%|██████████| 3/3 [02:23<00:00, 47.82s/it]


train Loss: 0.7094 Acc: 0.8433


100%|██████████| 2/2 [00:14<00:00,  7.46s/it]


val Loss: 0.8979 Acc: 0.6667

Epoch 7/39
----------


100%|██████████| 3/3 [02:13<00:00, 44.42s/it]


train Loss: 0.6479 Acc: 0.8832


100%|██████████| 2/2 [00:14<00:00,  7.45s/it]


val Loss: 0.8646 Acc: 0.7381

Epoch 8/39
----------


100%|██████████| 3/3 [02:12<00:00, 44.21s/it]


train Loss: 0.6163 Acc: 0.8974


100%|██████████| 2/2 [00:15<00:00,  7.57s/it]


val Loss: 0.8576 Acc: 0.7381

Epoch 9/39
----------


100%|██████████| 3/3 [02:12<00:00, 44.16s/it]


train Loss: 0.5795 Acc: 0.9259


100%|██████████| 2/2 [00:15<00:00,  7.80s/it]


val Loss: 0.8615 Acc: 0.7381

Epoch 10/39
----------


100%|██████████| 3/3 [02:19<00:00, 46.34s/it]


train Loss: 0.5745 Acc: 0.9259


100%|██████████| 2/2 [00:15<00:00,  7.53s/it]


val Loss: 0.8680 Acc: 0.6905

Epoch 11/39
----------


100%|██████████| 3/3 [02:10<00:00, 43.39s/it]


train Loss: 0.5317 Acc: 0.9487


100%|██████████| 2/2 [00:14<00:00,  7.29s/it]


val Loss: 0.8677 Acc: 0.6905

Epoch 12/39
----------


100%|██████████| 3/3 [02:12<00:00, 44.17s/it]


train Loss: 0.5141 Acc: 0.9544


100%|██████████| 2/2 [00:14<00:00,  7.34s/it]


val Loss: 0.8358 Acc: 0.7619

Epoch 13/39
----------


100%|██████████| 3/3 [02:07<00:00, 42.64s/it]


train Loss: 0.5087 Acc: 0.9544


100%|██████████| 2/2 [00:14<00:00,  7.46s/it]


val Loss: 0.8282 Acc: 0.7381

Epoch 14/39
----------


100%|██████████| 3/3 [02:19<00:00, 46.39s/it]


train Loss: 0.4857 Acc: 0.9601


100%|██████████| 2/2 [00:14<00:00,  7.49s/it]


val Loss: 0.8351 Acc: 0.7619

Epoch 15/39
----------


100%|██████████| 3/3 [02:09<00:00, 43.00s/it]


train Loss: 0.4880 Acc: 0.9687


100%|██████████| 2/2 [00:14<00:00,  7.29s/it]


val Loss: 0.8272 Acc: 0.7619

Epoch 16/39
----------


100%|██████████| 3/3 [02:11<00:00, 43.83s/it]


train Loss: 0.4894 Acc: 0.9601


100%|██████████| 2/2 [00:14<00:00,  7.35s/it]


val Loss: 0.8120 Acc: 0.7619

Epoch 17/39
----------


100%|██████████| 3/3 [02:08<00:00, 42.96s/it]


train Loss: 0.4711 Acc: 0.9630


100%|██████████| 2/2 [00:14<00:00,  7.21s/it]


val Loss: 0.8279 Acc: 0.7619

Epoch 18/39
----------


100%|██████████| 3/3 [02:23<00:00, 47.88s/it]


train Loss: 0.4781 Acc: 0.9687


100%|██████████| 2/2 [00:14<00:00,  7.49s/it]


val Loss: 0.8374 Acc: 0.6905

Epoch 19/39
----------


100%|██████████| 3/3 [02:10<00:00, 43.58s/it]


train Loss: 0.4449 Acc: 0.9915


100%|██████████| 2/2 [00:14<00:00,  7.32s/it]


val Loss: 0.8102 Acc: 0.7381

Epoch 20/39
----------


100%|██████████| 3/3 [02:11<00:00, 43.97s/it]


train Loss: 0.4564 Acc: 0.9744


100%|██████████| 2/2 [00:14<00:00,  7.36s/it]


val Loss: 0.8031 Acc: 0.7619

Epoch 21/39
----------


100%|██████████| 3/3 [02:11<00:00, 43.79s/it]


train Loss: 0.4412 Acc: 0.9886


100%|██████████| 2/2 [00:14<00:00,  7.27s/it]


val Loss: 0.8014 Acc: 0.7857

Epoch 22/39
----------


100%|██████████| 3/3 [02:18<00:00, 46.20s/it]


train Loss: 0.4262 Acc: 0.9943


100%|██████████| 2/2 [00:14<00:00,  7.45s/it]


val Loss: 0.8028 Acc: 0.7857

Epoch 23/39
----------


100%|██████████| 3/3 [02:12<00:00, 44.11s/it]


train Loss: 0.4290 Acc: 0.9915


100%|██████████| 2/2 [00:14<00:00,  7.37s/it]


val Loss: 0.8002 Acc: 0.7619

Epoch 24/39
----------


100%|██████████| 3/3 [02:08<00:00, 42.97s/it]


train Loss: 0.4453 Acc: 0.9772


100%|██████████| 2/2 [00:14<00:00,  7.14s/it]


val Loss: 0.8097 Acc: 0.7619

Epoch 25/39
----------


100%|██████████| 3/3 [02:09<00:00, 43.13s/it]


train Loss: 0.4288 Acc: 0.9858


100%|██████████| 2/2 [00:14<00:00,  7.28s/it]


val Loss: 0.7961 Acc: 0.7619

Epoch 26/39
----------


100%|██████████| 3/3 [02:08<00:00, 42.74s/it]


train Loss: 0.4271 Acc: 0.9858


100%|██████████| 2/2 [00:14<00:00,  7.11s/it]


val Loss: 0.8004 Acc: 0.7381

Epoch 27/39
----------


100%|██████████| 3/3 [02:21<00:00, 47.29s/it]


train Loss: 0.4169 Acc: 0.9915


100%|██████████| 2/2 [00:15<00:00,  7.60s/it]


val Loss: 0.8018 Acc: 0.7143

Epoch 28/39
----------


100%|██████████| 3/3 [02:07<00:00, 42.55s/it]


train Loss: 0.4169 Acc: 0.9915


100%|██████████| 2/2 [00:14<00:00,  7.15s/it]


val Loss: 0.7936 Acc: 0.7381

Epoch 29/39
----------


100%|██████████| 3/3 [02:11<00:00, 43.88s/it]


train Loss: 0.4085 Acc: 0.9915


100%|██████████| 2/2 [00:14<00:00,  7.41s/it]


val Loss: 0.7894 Acc: 0.7381

Epoch 30/39
----------


100%|██████████| 3/3 [02:07<00:00, 42.56s/it]


train Loss: 0.4138 Acc: 0.9943


100%|██████████| 2/2 [00:14<00:00,  7.21s/it]


val Loss: 0.7876 Acc: 0.7381

Epoch 31/39
----------


100%|██████████| 3/3 [02:19<00:00, 46.46s/it]


train Loss: 0.4275 Acc: 0.9829


100%|██████████| 2/2 [00:14<00:00,  7.20s/it]


val Loss: 0.7918 Acc: 0.7619

Epoch 32/39
----------


100%|██████████| 3/3 [02:09<00:00, 43.09s/it]


train Loss: 0.4105 Acc: 0.9972


100%|██████████| 2/2 [00:14<00:00,  7.25s/it]


val Loss: 0.7928 Acc: 0.7381

Epoch 33/39
----------


100%|██████████| 3/3 [02:07<00:00, 42.56s/it]


train Loss: 0.4047 Acc: 0.9943


100%|██████████| 2/2 [00:14<00:00,  7.31s/it]


val Loss: 0.7908 Acc: 0.7619

Epoch 34/39
----------


100%|██████████| 3/3 [02:09<00:00, 43.27s/it]


train Loss: 0.4018 Acc: 1.0000


100%|██████████| 2/2 [00:14<00:00,  7.29s/it]


val Loss: 0.7937 Acc: 0.7619

Epoch 35/39
----------


100%|██████████| 3/3 [02:19<00:00, 46.49s/it]


train Loss: 0.4121 Acc: 0.9915


100%|██████████| 2/2 [00:14<00:00,  7.37s/it]


val Loss: 0.7872 Acc: 0.7619

Epoch 36/39
----------


100%|██████████| 3/3 [02:09<00:00, 43.09s/it]


train Loss: 0.4052 Acc: 0.9943


100%|██████████| 2/2 [00:14<00:00,  7.16s/it]


val Loss: 0.7891 Acc: 0.7381

Epoch 37/39
----------


100%|██████████| 3/3 [02:10<00:00, 43.49s/it]


train Loss: 0.4078 Acc: 0.9943


100%|██████████| 2/2 [00:14<00:00,  7.30s/it]


val Loss: 0.7944 Acc: 0.7143

Epoch 38/39
----------


100%|██████████| 3/3 [02:06<00:00, 42.24s/it]


train Loss: 0.4098 Acc: 0.9943


100%|██████████| 2/2 [00:14<00:00,  7.15s/it]


val Loss: 0.7877 Acc: 0.7143

Epoch 39/39
----------


100%|██████████| 3/3 [02:19<00:00, 46.44s/it]


train Loss: 0.4032 Acc: 1.0000


100%|██████████| 2/2 [00:14<00:00,  7.13s/it]

val Loss: 0.7923 Acc: 0.7619

Training complete in 99m 49s
Best Val Acc: 0.7857





In [None]:
test_loss = 0.0
class_correct = list(0 for i in range(len(classes)))
class_total = list(0 for i in range(len(classes)))
model_ft.eval()

for data, target in tqdm(test_loader):
    data, target = data.to(device), target.to(device)
    with torch.no_grad(): # turn off autograd for faster testing
        output = model_ft(data)
        loss = criterion(output, target)
    test_loss = loss.item() * data.size(0)
    _, pred = torch.max(output, 1)
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    if len(target) == 32:
        for i in range(32):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

test_loss = test_loss / test_data_len
print('Test Loss: {:.4f}'.format(test_loss))
for i in range(len(classes)):
    if class_total[i] > 0:
        print("Test Accuracy of %5s: %2d%% (%2d/%2d)" % (
            classes[i], 100*class_correct[i]/class_total[i], np.sum(class_correct[i]), np.sum(class_total[i])
        ))
    else:
        print("Test accuracy of %5s: NA" % (classes[i]))
print("Test Accuracy of %2d%% (%2d/%2d)" % (
            100*np.sum(class_correct)/np.sum(class_total), np.sum(class_correct), np.sum(class_total)
        ))

100%|██████████| 2/2 [00:26<00:00, 13.14s/it]

Test Loss: 0.2908
Test Accuracy of Bacterial_dermatosis: 71% ( 5/ 7)
Test Accuracy of Fungal_infections: 50% ( 6/12)
Test Accuracy of Healthy: 85% ( 6/ 7)
Test Accuracy of Hypersensitivity_allergic_dermatosis: 83% ( 5/ 6)
Test Accuracy of 68% (22/32)





In [None]:
# our model earns 93% test accuracy, which is very high. lets save it
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model.cpu(), example)
traced_script_module.save("Dogs_disease.pt")