In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
import cv2
import numpy as np
from torchvision.transforms import Compose, Resize, ToTensor, Normalize, ToPILImage, v2, CenterCrop

class SkinDataset(Dataset):
    def __init__(self, root='/kaggle/input/huhuhu7/skintone/', train=True, transform=None):
        super().__init__()
        self.image_paths = []
        self.labels = []
        self.categories = ["dark", "light", "mid-dark", "mid-light"]
        self.transform = transform
        
        if train:
            data_path = os.path.join(root, 'train')
        else:
            data_path = os.path.join(root, 'valid')
        
        for i, category in enumerate(self.categories):
            data_files = os.path.join(data_path,category)
            for item in os.listdir(data_files):
                path = os.path.join(data_files,item)
                self.image_paths.append(path)
                self.labels.append(i)
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = cv2.imread(image_path)
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label



In [2]:
train_transform = Compose([
        ToPILImage(),
        Resize(256),
        CenterCrop(224),
#         ToTensor(),
        v2.RandomHorizontalFlip(p=0.5),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
test_transform = Compose([
        ToPILImage(),
        Resize(256),
        CenterCrop(224),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]) 
root = '/kaggle/input/huhuhu7/skintone/'
train_dataset = SkinDataset(root=root, train=True, transform=train_transform)
print(train_dataset.__len__())
test_dataset = SkinDataset(root=root, train=False, transform=test_transform)
print(test_dataset.__len__())
train_loader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False)

# for images, labels in train_loader:
#     print(images.shape, labels.shape)
# for images, labels in test_loader:
#     print(images.shape, labels.shape)

19204
3058


In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
import cv2
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
from torchvision.transforms import Compose, Resize, ToTensor, Normalize, ToPILImage, RandomResizedCrop



In [6]:
from torchvision.models import resnet50, ResNet50_Weights, efficientnet_v2_m, EfficientNet_V2_M_Weights
import torch
import torch.nn as nn

class MyEffnet(nn.Module):
    def __init__(self, n_classes=4):
        super().__init__()
        self.backbone = efficientnet_v2_m(weights=EfficientNet_V2_M_Weights.DEFAULT)
        self.backbone.classifier[1] = nn.Linear(1280, n_classes)
    
    def forward(self, x):
        x = self.backbone(x)
        return x

In [7]:
x = torch.randn(16, 3, 224, 224)
model = MyEffnet()
# print(model)
print(model(x).shape)

Downloading: "https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_m-dc08266a.pth
100%|██████████| 208M/208M [00:00<00:00, 336MB/s] 


torch.Size([16, 4])


In [8]:
import torch.optim as optim
from tqdm import tqdm
epochs = 40
batch_size = 8
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, amsgrad=True)#optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
num_workers = 2
device = 'cuda' if torch.cuda.is_available() else 'cpu'
train_transform = Compose([
        ToPILImage(),
        Resize(256),
        CenterCrop(224),
#         ToTensor(),
        v2.RandomHorizontalFlip(p=0.5),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
test_transform = Compose([
        ToPILImage(),
        Resize(256),
        CenterCrop(224),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]) 

In [9]:
train_dataset = SkinDataset(root= root, train=True, transform=train_transform)
print(train_dataset.__len__())
test_dataset = SkinDataset(root=root, train=False, transform=test_transform)
print(test_dataset.__len__())
train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

19204
3058


In [10]:
model = MyEffnet().to(device)
if os.path.exists('last.pt'):
    if torch.cuda.is_available():
        model.load_state_dict(torch.load('last.pt'))
    else:
        model.load_state_dict(torch.load('last.pt', map_location=torch.device('cpu')))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3) 

In [11]:
best_acc = 0
best_model = MyEffnet().to(device)
if os.path.exists('best.pt'):
    best_model.load_state_dict(torch.load('best.pt',map_location=torch.device('cpu')))
    best_model.eval()
    all_predictions_best = []
    all_labels_best = []
    for iter, (images, labels) in enumerate(test_dataloader):
        images = images.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = best_model(images)
            loss = criterion(outputs, labels)
            predictions = torch.argmax(outputs.cpu(), dim=1)
            all_predictions_best.extend(predictions)
            all_labels_best.extend(labels.cpu())     
    all_labels_best = [label.item() for label in all_labels_best]
    all_predictions_best = [prediction.item() for prediction in all_predictions_best]
    best_acc = accuracy_score(all_labels_best, all_predictions_best)

In [None]:
epochs=20
for epoch in range(epochs):
    model.train()
    progress_bar = tqdm(train_dataloader)
    for iter, (images, labels) in enumerate(progress_bar):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        # writer.add_scalar('Train/Loss', loss, epoch*len(train_dataloader)+iter)
        progress_bar.set_description('Epoch: {}/{} Iter: {} Loss: {:.4f}'.format(epoch+1, epochs, iter+1, loss.item()))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    all_predictions = []
    all_labels = []
    for iter, (images, labels) in enumerate(test_dataloader):
        images = images.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = model(images)
            loss = criterion(outputs, labels)
            predictions = torch.argmax(outputs.cpu(), dim=1)
            all_predictions.extend(predictions)
            all_labels.extend(labels.cpu())
    all_labels = [label.item() for label in all_labels]
    all_predictions = [prediction.item() for prediction in all_predictions]
    acc = accuracy_score(all_labels, all_predictions)
    print('Epoch: {}/{} Test Loss: {:.4f} Test Acc: {:.4f}'.format(epoch+1, epochs, loss.item(), acc))
    torch.save(model.state_dict(), 'last.pt')
    if acc > best_acc:
        torch.save(model.state_dict(), 'best.pt')
        best_acc=acc
    # writer.add_scalars('Val/Accuracy', acc, epoch)

Epoch: 1/20 Iter: 2401 Loss: 0.4228: 100%|██████████| 2401/2401 [06:42<00:00,  5.96it/s]


Epoch: 1/20 Test Loss: 0.9839 Test Acc: 0.7737


Epoch: 2/20 Iter: 2401 Loss: 0.4437: 100%|██████████| 2401/2401 [06:34<00:00,  6.08it/s]


Epoch: 2/20 Test Loss: 0.8342 Test Acc: 0.7260


Epoch: 3/20 Iter: 2401 Loss: 1.2457: 100%|██████████| 2401/2401 [06:33<00:00,  6.11it/s]


Epoch: 3/20 Test Loss: 1.3230 Test Acc: 0.7858


Epoch: 4/20 Iter: 2401 Loss: 0.2581: 100%|██████████| 2401/2401 [06:32<00:00,  6.11it/s]


Epoch: 4/20 Test Loss: 1.4520 Test Acc: 0.7734


Epoch: 5/20 Iter: 2401 Loss: 0.7783: 100%|██████████| 2401/2401 [06:34<00:00,  6.08it/s]


Epoch: 5/20 Test Loss: 0.6421 Test Acc: 0.7538


Epoch: 6/20 Iter: 2401 Loss: 0.8843: 100%|██████████| 2401/2401 [06:33<00:00,  6.10it/s]


Epoch: 6/20 Test Loss: 0.8139 Test Acc: 0.7861


Epoch: 7/20 Iter: 2401 Loss: 0.3128: 100%|██████████| 2401/2401 [06:34<00:00,  6.08it/s]


Epoch: 7/20 Test Loss: 0.9617 Test Acc: 0.7858


Epoch: 8/20 Iter: 2401 Loss: 0.3019: 100%|██████████| 2401/2401 [06:36<00:00,  6.06it/s]


Epoch: 8/20 Test Loss: 0.9713 Test Acc: 0.7901


Epoch: 9/20 Iter: 2401 Loss: 1.0295: 100%|██████████| 2401/2401 [06:37<00:00,  6.04it/s]


Epoch: 9/20 Test Loss: 0.6795 Test Acc: 0.7874


Epoch: 10/20 Iter: 2401 Loss: 0.0088: 100%|██████████| 2401/2401 [06:37<00:00,  6.04it/s]


Epoch: 10/20 Test Loss: 0.9304 Test Acc: 0.7848


Epoch: 11/20 Iter: 2401 Loss: 0.1339: 100%|██████████| 2401/2401 [06:37<00:00,  6.03it/s]


Epoch: 11/20 Test Loss: 0.6627 Test Acc: 0.7829


Epoch: 12/20 Iter: 2401 Loss: 0.3980: 100%|██████████| 2401/2401 [06:38<00:00,  6.03it/s]


Epoch: 12/20 Test Loss: 0.8689 Test Acc: 0.7727


Epoch: 13/20 Iter: 1088 Loss: 0.1095:  45%|████▌     | 1087/2401 [03:00<03:36,  6.08it/s]

In [None]:
test_model = MyEffnet().to(device)
test_model.load_state_dict(torch.load('best.pt', map_location=torch.device('cpu')))
test_model.eval()

In [None]:
import matplotlib.pyplot as plt
import random
%matplotlib inline
categories = ["dark", "light", "mid-dark", "mid-light"]
indices = random.sample(range(0, test_dataset.__len__()), 8)
print(indices)
images = torch.stack([test_dataset.__getitem__(i)[0] for i in indices])
# images = torch.from_numpy(images)
# print(images)
# # labels = [categories[int(test_dataset.__getitem__(i)[1])] for i in indices]
labels = [test_dataset.__getitem__(i)[1] for i in indices]
predictions = torch.argmax(test_model(images.to(device)).cpu(), dim=1)
print(images.shape)
print(labels)
print(predictions

In [None]:
fig, axes = plt.subplots(2, 4, figsize=(10, 5))
axes = axes.flatten()
for i in range(8):
  img = cv2.imread(test_dataset.image_paths[indices[i]])
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  axes[i].imshow(img)
  title = f"Prediction: {categories[predictions[i]]}"

  # if labels is not None:
  #     title += f"\nTrue Label: {categories[labels[i]]}"

  axes[i].set_title(title)
  axes[i].axis('off')

plt.tight_layout()
plt.show()