# hair style classification

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import os
from tqdm.notebook import tqdm
from PIL import Image

import torch
import torchvision.transforms as T
import torchvision.models as models

from sklearn.metrics import f1_score

In [None]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

device = 'mps'

# data

In [None]:
from glob import glob
import json

root_path = '/Users/kimhongseok/cv_79_projects/part1/chapter3/7/data'
annot_path = os.path.join(root_path, 'annotations')

In [None]:
annotation_path_list = list(glob(f'{annot_path}/**/*.json', recursive=True))
len(annotation_path_list)

In [None]:
with open(annotation_path_list[0], 'r') as json_f:
    sample_js = json.load(json_f)

sample_js

## annotation dataframe 생성

In [None]:
annotation_path_list[0].split('/')

In [None]:
'/'.join(annotation_path_list[0].split('/')[9:-1])

In [None]:
from collections import defaultdict

select_categories = ['basestyle', 'length', 'color', 'sex']
new_annotations = pd.DataFrame(columns=['filename']+select_categories)
filename_list = []
catefory_list_map = defaultdict(list)

for idx, annot_path in enumerate(annotation_path_list):
    sub_dir = '/'.join(annot_path.split('/')[9:-1])
    with open(annot_path, 'r') as f:
        js = json.load(f)

    image_filename = js['filename']
    image_path = os.path.join(sub_dir, image_filename)
    filename_list.append(image_path)
    for cat in select_categories:
        catefory_list_map[cat].append(js[cat])

In [None]:
new_annotations['filename'] = filename_list
for cat, cat_list in catefory_list_map.items():
    new_annotations[cat] = cat_list

In [None]:
new_annotations

In [None]:
# class를 one-hot vector로 표현

columns = new_annotations.columns.tolist()
new_annotations = pd.get_dummies(new_annotations, columns=columns[1:], dtype=int)
new_annotations

In [None]:
new_annotations.info()

In [None]:
new_annotations.to_csv('/Users/kimhongseok/cv_79_projects/part1/chapter3/7/data/annotations/annotations.csv', index=False)

# annotation

In [None]:
data_df = pd.read_csv('/Users/kimhongseok/cv_79_projects/part1/chapter3/7/data/annotations/annotations.csv')
data_df

In [None]:
data_df = data_df.sample(frac=1).reset_index(drop=True)
data_df

# CustomDataset

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root_path, data_df, classes, transforms):
        super().__init__()
        self.data = []
        self.transforms = transforms

        l = data_df.shape[0]
        for i in range(l):
            img_path = os.path.join(root_path, data_df.iloc[i, 0])
            classes = torch.tensor(data_df.iloc[i, 1:].tolist()).float()
            self.data.append([img_path, classes])

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img = Image.open(self.data[idx][0]).convert('RGB')
        img = self.transforms(img)
        classes = self.data[idx][1]

        return img, classes

In [None]:
data_df = data_df.iloc[:1000]
data_df

In [None]:
transforms = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor()
])

classes = data_df.columns[1:]
root_path = '/Users/kimhongseok/cv_79_projects/part1/chapter3/7/data/images'

total_dataset = CustomDataset(root_path, data_df, classes, transforms)

In [None]:
plt.figure(figsize=(3, 3))
plt.imshow(total_dataset[0][0].permute(1, 2, 0))
plt.show()

In [None]:
total_num = len(total_dataset)
train_num, valid_num, test_num = int(total_num*0.8), int(total_num*0.1), int(total_num*0.1)

train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(total_dataset, [train_num, valid_num, test_num])

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=100, shuffle=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

# training, evaluation

In [None]:
def training(model, train_dataloader, criterion, optimizer, threshold, epoch, num_epochs):
    model.train()
    train_loss = 0.0
    total_labels = []
    total_preds = []

    tbar = tqdm(train_dataloader)
    for images, labels in tbar:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        preds = (torch.sigmoid(outputs) > threshold).float()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        total_labels.extend(labels.cpu().numpy())
        total_preds.extend(preds.cpu().numpy())

        tbar.set_description(f'Epoch/Epochs [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}')

    train_loss /= len(train_dataloader)
    train_f1 = f1_score(total_labels, total_preds, average='macro')

    return model, train_loss, train_f1

def evaluation(model, valid_dataloader, criterion, threshold, epoch, num_epochs):
    model.eval()
    valid_loss = 0.0
    total_labels = []
    total_preds = []

    with torch.no_grad():
        tbar = tqdm(valid_dataloader)
        for images, labels in tbar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            preds = (torch.sigmoid(outputs) > threshold).float()

            valid_loss += loss.item()
            total_labels.extend(labels.cpu().numpy())
            total_preds.extend(preds.cpu().numpy())

            tbar.set_description(f'Epoch/Epochs [{epoch+1}/{num_epochs}], Valid Loss: {loss.item():.4f}')

    valid_loss /= len(valid_dataloader)
    valid_f1 = f1_score(total_labels, total_preds, average='macro')

    return model, valid_loss, valid_f1

def training_loop(model, train_dataloader, valid_dataloader, criterion, optimizer, threshold, num_epochs):
    model.to(device)
    
    for epoch in range(num_epochs):
        model, train_loss, train_f1 = training(model, train_dataloader, criterion, optimizer, threshold, epoch, num_epochs)
        model, valid_loss, valid_f1 = evaluation(model, valid_dataloader, criterion, threshold, epoch, num_epochs)

        print(f'Train Loss: {train_loss}, Train F1: {train_f1}, Valid Loss: {valid_loss}, Valid F1: {valid_f1}')

    return model

# model

In [None]:
model = models.resnet34(pretrained=True)
model

In [None]:
for param in model.parameters():
    param.requires_grad = True

model.fc = torch.nn.Linear(in_features=4096, out_features=len(classes), bias=True)
model.fc.requires_grad = True

In [None]:
model

In [None]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
threshold = 0.5
num_epochs = 10

model = training_loop(model, train_dataloader, valid_dataloader, criterion, optimizer, threshold, num_epochs)