In [None]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d andreasantoro/split-garbage-dataset #veri setinin buraya yapıştır

! unzip /content/split-garbage-dataset.zip

In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms as T # for simplifying the transforms
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split
from torchvision import models

In [3]:
## Now, we import timm, torchvision image models
!pip install timm # kaggle doesnt have it installed by default
import timm
from timm.loss import LabelSmoothingCrossEntropy

Collecting timm
  Downloading timm-0.9.16-py3-none-any.whl (2.2 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.2 MB[0m [31m8.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.8/2.2 MB[0m [31m12.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━[0m [32m1.6/2.2 MB[0m [31m15.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.2/2.2 MB[0m [31m17.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting 

In [4]:
# remove warnings
import warnings
warnings.filterwarnings("ignore")

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
import sys
import time
import copy
from tqdm import tqdm

In [6]:
def get_classes(data_dir):
    all_data = datasets.ImageFolder(data_dir)
    return all_data.classes

In [7]:
def get_data_loaders(data_dir, batch_size, train = False):
    if train:
        #train
        transform = T.Compose([
            T.RandomHorizontalFlip(),
            T.RandomVerticalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter()]), p=0.25),
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD), # imagenet means
            T.RandomErasing(p=0.1, value='random')
        ])
        train_data = datasets.ImageFolder(os.path.join(data_dir, "train/"), transform = transform)
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return train_loader, len(train_data)
    else:
        # val/test
        transform = T.Compose([ # We dont need augmentation for test transforms
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD), # imagenet means
        ])
        val_data = datasets.ImageFolder(os.path.join(data_dir, "valid/"), transform=transform)
        test_data = datasets.ImageFolder(os.path.join(data_dir, "test/"), transform=transform)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)
        test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return val_loader, test_loader, len(val_data), len(test_data)

In [8]:
dataset_path = "/content/split-garbage-dataset"

In [9]:
(train_loader, train_data_len) = get_data_loaders(dataset_path, 128, train=True)
(val_loader, test_loader, valid_data_len, test_data_len) = get_data_loaders(dataset_path, 32, train=False)

In [10]:
classes = get_classes("/content/split-garbage-dataset/train")
print(classes, len(classes))

['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'] 6


In [11]:
dataloaders = {
    "train": train_loader,
    "val": val_loader
}
dataset_sizes = {
    "train": train_data_len,
    "val": valid_data_len
}

In [12]:
print(len(train_loader), len(val_loader), len(test_loader))

14 11 14


In [13]:
print(train_data_len, valid_data_len, test_data_len)

1768 328 431


In [14]:
# now, for the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [15]:
# torch.hub.load version
HUB_URL = "SharanSMenon/swin-transformer-hub:main"
MODEL_NAME = "swin_tiny_patch4_window7_224"
model = torch.hub.load(HUB_URL, MODEL_NAME, pretrained=True)

# timm.create_model version also available
# model=timm.create_model('swin_tiny_patch4_window7_224',pretrained=True)

Downloading: "https://github.com/SharanSMenon/swin-transformer-hub/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth" to /root/.cache/torch/hub/checkpoints/swin_tiny_patch4_window7_224.pth
100%|██████████| 109M/109M [00:00<00:00, 321MB/s] 


In [16]:
for param in model.parameters(): #freeze model
    param.requires_grad = False

n_inputs = model.head.in_features
model.head = nn.Sequential(
    nn.Linear(n_inputs, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, len(classes))
)
model = model.to(device)
print(model.head)

Sequential(
  (0): Linear(in_features=768, out_features=512, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.3, inplace=False)
  (3): Linear(in_features=512, out_features=6, bias=True)
)


In [17]:
criterion = LabelSmoothingCrossEntropy()
criterion = criterion.to(device)
optimizer = optim.AdamW(model.head.parameters(), lr=0.001)

In [18]:
# lr scheduler
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.97)

In [19]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print("-"*10)

        for phase in ['train', 'val']: # We do training and validation phase per epoch
            if phase == 'train':
                model.train() # model to training mode
            else:
                model.eval() # model to evaluate

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'): # no autograd makes validation go faster
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1) # used for accuracy
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step() # step at end of epoch

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc =  running_corrects.double() / dataset_sizes[phase]

            print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict()) # keep the best validation accuracy model
        print()
    time_elapsed = time.time() - since # slight error
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print("Best Val Acc: {:.4f}".format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

In [20]:
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=7) # now it is a lot faster
# I will come back after 10 epochs

Epoch 0/6
----------


100%|██████████| 14/14 [00:13<00:00,  1.00it/s]


train Loss: 1.0988 Acc: 0.7274


100%|██████████| 11/11 [00:02<00:00,  4.85it/s]


val Loss: 0.7741 Acc: 0.8537

Epoch 1/6
----------


100%|██████████| 14/14 [00:11<00:00,  1.18it/s]


train Loss: 0.7526 Acc: 0.8727


100%|██████████| 11/11 [00:02<00:00,  4.78it/s]


val Loss: 0.7197 Acc: 0.8872

Epoch 2/6
----------


100%|██████████| 14/14 [00:11<00:00,  1.18it/s]


train Loss: 0.6725 Acc: 0.9072


100%|██████████| 11/11 [00:02<00:00,  4.91it/s]


val Loss: 0.6815 Acc: 0.8933

Epoch 3/6
----------


100%|██████████| 14/14 [00:13<00:00,  1.07it/s]


train Loss: 0.6209 Acc: 0.9282


100%|██████████| 11/11 [00:02<00:00,  3.84it/s]


val Loss: 0.6658 Acc: 0.9085

Epoch 4/6
----------


100%|██████████| 14/14 [00:10<00:00,  1.30it/s]


train Loss: 0.6010 Acc: 0.9434


100%|██████████| 11/11 [00:02<00:00,  3.75it/s]


val Loss: 0.6515 Acc: 0.9207

Epoch 5/6
----------


100%|██████████| 14/14 [00:10<00:00,  1.28it/s]


train Loss: 0.5756 Acc: 0.9559


100%|██████████| 11/11 [00:03<00:00,  3.59it/s]


val Loss: 0.6340 Acc: 0.9268

Epoch 6/6
----------


100%|██████████| 14/14 [00:12<00:00,  1.13it/s]


train Loss: 0.5626 Acc: 0.9610


100%|██████████| 11/11 [00:02<00:00,  4.57it/s]

val Loss: 0.6287 Acc: 0.9146

Training complete in 1m 44s
Best Val Acc: 0.9268





In [21]:
test_loss = 0.0
class_correct = list(0 for i in range(len(classes)))
class_total = list(0 for i in range(len(classes)))
model_ft.eval()

TRUE=np.empty((0), dtype=int) ######
PRED=np.empty((0), dtype=int) ######

for data, target in tqdm(test_loader):
    data, target = data.to(device), target.to(device)
    with torch.no_grad(): # turn off autograd for faster testing
        output = model_ft(data)
        loss = criterion(output, target)
    test_loss = loss.item() * data.size(0)
    _, pred = torch.max(output, 1)

    TRUE=np.concatenate([TRUE,target.data.cpu().numpy()],0) ######
    PRED=np.concatenate([PRED,pred.data.cpu().numpy()],0) ######

    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    if len(target) == 32:
        for i in range(32):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

test_loss = test_loss / test_data_len
print('Test Loss: {:.4f}'.format(test_loss))
for i in range(len(classes)):
    if class_total[i] > 0:
        print("Test Accuracy of %5s: %2d%% (%2d/%2d)" % (
            classes[i], 100*class_correct[i]/class_total[i], np.sum(class_correct[i]), np.sum(class_total[i])
        ))
    else:
        print("Test accuracy of %5s: NA" % (classes[i]))
print("Test Accuracy of %2d%% (%2d/%2d)" % (
            100*np.sum(class_correct)/np.sum(class_total), np.sum(class_correct), np.sum(class_total)
        ))


100%|██████████| 14/14 [00:02<00:00,  4.96it/s]

Test Loss: 0.0262
Test Accuracy of cardboard: 89% (61/68)
Test Accuracy of glass: 88% (72/81)
Test Accuracy of metal: 93% (59/63)
Test Accuracy of paper: 95% (100/105)
Test Accuracy of plastic: 85% (61/71)
Test Accuracy of trash: 71% (20/28)
Test Accuracy of 89% (373/416)





In [22]:
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model.cpu(), example)
traced_script_module.save("swin_transformer.pt")

In [23]:
from sklearn.metrics import classification_report
print(classification_report(TRUE, PRED, target_names=classes, digits=4))

              precision    recall  f1-score   support

   cardboard     0.9688    0.8857    0.9254        70
       glass     0.9012    0.8902    0.8957        82
       metal     0.8630    0.9265    0.8936        68
       paper     0.9115    0.9537    0.9321       108
     plastic     0.8289    0.8514    0.8400        74
       trash     0.8750    0.7241    0.7925        29

    accuracy                         0.8933       431
   macro avg     0.8914    0.8719    0.8799       431
weighted avg     0.8946    0.8933    0.8928       431

