In [1]:
IN_COLAB = True if 'google.colab' in str(get_ipython()) else False
if IN_COLAB:
    %pip install dvc torchview graphviz -q 

In [2]:
from torchvision import models, transforms, io
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch import nn, optim
from dvc.api import DVCFileSystem
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from PIL import Image

In [3]:
fs = DVCFileSystem("https://dagshub.com/izzalDev/acne-classification-v2",remote='origin')
print("Filesystem setup completed")

Filesystem setup completed


In [4]:
class AcneDataset(Dataset):
    def __init__(self, csv_file, root_dir, file_system, transform=False):
        self.df = pd.read_csv(file_system.open(f'{root_dir}/{csv_file}'),dtype={'label':'category'})
        self.root_dir = root_dir
        self.filesystem = file_system
        self.transform = transform
        self.classes = self.df.label.cat.categories.to_list()
        self.targets = self.df['label'].cat.codes
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        filepath = f'{self.root_dir}/{self.df.label[idx]}/{self.df.filename[idx]}'
        label = int(self.targets[idx])
        image = Image.open(self.filesystem.open(filepath))
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [5]:
train_transforms = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

test_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

train_set = AcneDataset('_annotations.csv','data/processed/train',fs,train_transforms)
test_set = AcneDataset('_annotations.csv','data/processed/test',fs,test_transforms)

In [6]:
class_counts = np.bincount(train_set.targets)
class_weights = 1/class_counts
sampler = WeightedRandomSampler(weights=class_weights, num_samples=100, replacement=True)

bs = 32
train_loader = DataLoader(train_set, batch_size=bs, sampler=sampler)
test_loader = DataLoader(test_set, batch_size=bs)

In [7]:
class CustomEfficientnet(nn.Module):
    def __init__(self, output_size):
        super().__init__()
        self.model = models.efficientnet_b0(weights='IMAGENET1K_V1')
        self.freeze()
        self.model.classifier = nn.Sequential(
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(1280,output_size),
            nn.Softmax(dim=1)
        ) 
    def forward(self, x):
        return self.model(x)
    def freeze(self):
        for param in self.model.parameters():
            param.requires_grad = False
    def unfreez(self):
        for param in self.model.parameters():
            param.requires_grad = True

In [8]:
from tqdm.auto import tqdm
model = CustomEfficientnet(3).to('mps')
criterion = nn.CrossEntropyLoss().to('mps')
optimizer = optim.AdamW(model.parameters(),lr=0.001)

In [10]:
from tqdm.auto import tqdm

def loop_fn(mode, dataset, dataloader, model, criterion, optimizer, device='mps'):
    if mode == "train":
        model.train()
    elif mode == "test":
        model.eval()
    cost = 0
    for feature, target in tqdm(dataloader, desc=mode.title()):
        feature, target = feature.to(device), target.to(device)
        output = model(feature)
        loss = criterion(output, target)
        
        if mode == "train":
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        cost += loss.item() * feature.shape[0]
    cost = cost / len(dataset)
    return cost

while True:
    train_cost = loop_fn("train", train_set, train_loader, model, criterion, optimizer)
    with torch.no_grad():
        test_cost = loop_fn("test", test_set, test_loader, model, criterion, optimizer)
    # break

Train:   0%|          | 0/4 [00:00<?, ?it/s]

Test:   0%|          | 0/62 [00:00<?, ?it/s]

Train:   0%|          | 0/4 [00:00<?, ?it/s]

Test:   0%|          | 0/62 [00:00<?, ?it/s]

KeyboardInterrupt: 