# Compare dataset
- `Model` ResNet34
- `Image size` 224x224
- `Epoch` 50
- `Batch size` 64

|dataset|best val f1-score|best val accuracy|
|--|--|--|
|taco|
|kaggle|
|taco+kaggle|

In [15]:
import torch
import torch.nn as nn
import torchvision.models
from sklearn.metrics import f1_score

import sys
import os.path as p
import yaml
import glob
import time
from tqdm import tqdm

In [2]:
ROOT_PATH = "/opt/ml/"
BASE_PATH = p.join(ROOT_PATH, "p4-opt-5-vibrhanium-/baseline")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sys.path.append(BASE_PATH)

from src.utils.common import read_yaml
from src.utils.macs import calc_macs
from src.utils.common import get_label_counts
from src.dataloader import create_dataloader
from src.trainer import TorchTrainer
from src.loss import CustomCriterion

In [3]:
import os.path as p
import time
from tqdm import tqdm
import torch
import torchvision
from sklearn.metrics import f1_score
from src.utils.common import read_yaml
from src.utils.macs import calc_macs
ROOT_PATH = "/opt/ml/"
BASE_PATH = p.join(ROOT_PATH, "p4-opt-5-vibrhanium-/baseline")
DATA_CONFIG_PATH = p.join(BASE_PATH, "configs/data/kaggle.yaml")
data_config = read_yaml(cfg=DATA_CONFIG_PATH)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
scaler = (
    torch.cuda.amp.GradScaler()
    if data_config["FP16"] and device != torch.device("cpu")
    else None
)
# scaler = None
# -- MACs, Parameter 수를 체크하는 함수
def check_spec(model):
    # MAC 계산
    macs = calc_macs(model, (3, 224, 224))
    # Parameter 수 계산
    num_parameters = sum(p.numel() for p in model.parameters())
    return macs, num_parameters
def _get_len_label_from_dataset(dataset) -> int:
    """Get length of label from dataset.
    Args:
        dataset: torch dataset
    Returns:
        A number of label in set.
    """
    if isinstance(dataset, torchvision.datasets.ImageFolder) or isinstance(
        dataset, torchvision.datasets.vision.VisionDataset
    ):
        return len(dataset.classes)
    elif isinstance(dataset, torch.utils.data.Subset):
        return _get_len_label_from_dataset(dataset.dataset)
    else:
        raise NotImplementedError
# -- evaluation을 수행하는 함수.
@torch.no_grad()
def evaluate(model, dataloader):
    model.eval()
    start_time = time.time()
    preds = []
    gt = []
    correct = 0
    total = 0
    num_classes = _get_len_label_from_dataset(dataloader.dataset)
    label_list = [i for i in range(num_classes)]
    pbar = tqdm(enumerate(dataloader), total=len(dataloader))
    for batch, (data, labels) in pbar:
        data, labels = data.to(device), labels.to(device)
        if scaler:
            with torch.cuda.amp.autocast():
                outputs = model(data)
        else:
            outputs = model(data)
        outputs = torch.squeeze(outputs)
        _, pred = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()
        preds += pred.to("cpu").tolist()
        gt += labels.to("cpu").tolist()
        pbar.update()
        pbar.set_description(
            f"Acc: {(correct / total) * 100:.2f}% "
            f"F1(macro): {f1_score(y_true=gt, y_pred=preds, labels=label_list, average='macro', zero_division=0):.2f}"
        )
    consumed_time = time.time() - start_time
    accuracy = correct / total
    f1 = f1_score(
        y_true=gt, y_pred=preds, labels=label_list, average="macro", zero_division=0
    )
    return f1, accuracy, consumed_time
def print_spec(consumed_time, macs, num_parameters, f1, accuracy):
    print(f"Inference time: {consumed_time:.3f}s")
    print(f"MAC score: {int(macs)}")
    print(f"Parameter num: {int(num_parameters)}")
    print()
    print(f"F1 score: {f1:.3f} | Accuracy: {accuracy:.3f}")

### 1-1-1. TACO - All Data, Battery 제외

**1) Model**

`epoch` 50

In [41]:
MODEL_PATH = p.join(ROOT_PATH, "input/exp/Resnet34/Resnet34_kaggle_50_0619_1301.pt")
num_classes = 4

model = torchvision.models.resnet34(pretrained=True)
model.fc = nn.Linear(512, num_classes) # 마지막 layer
model.to(device)
model.load_state_dict(torch.load(MODEL_PATH)) # 기존 weight 불러옴
# model
print("Done.")

Done.


**2) Dataset**

In [42]:
DATA_CONFIG_PATH = p.join(BASE_PATH, "configs/data/kaggle.yaml")
DATA_PATH = p.join(ROOT_PATH, "input/data_kaggle/val")

data_config = read_yaml(cfg=DATA_CONFIG_PATH)
# scaler = torch.cuda.amp.GradScaler() if data_config["FP16"] and device != torch.device("cpu") else None
scaler = None
train_dl, val_dl, _ = create_dataloader(config=data_config)

In [43]:
print(f"Validation datat from {DATA_PATH}")
print(f"{'Category':10} | num")
print("----------------------")

total = 0
categories = ["Glass", "Metal", "Paper", "Plastic"]
for category in categories:
    src_dir = p.join(DATA_PATH, category)
    imgs = glob.glob(p.join(src_dir, "*"))
    total += len(imgs)

    print(f"{category:10} | {len(imgs):5}")
print(f"{'TOTAL':11}| {total:5}")

Validation datat from /opt/ml/input/data_kaggle/val
Category   | num
----------------------
Glass      |    50
Metal      |    41
Paper      |    40
Plastic    |    48
TOTAL      |   179


In [44]:
bf_macs, bf_num_parameters = check_spec(model)
bf_f1, bf_accuracy, bf_consumed_time = evaluate(model=model, dataloader=val_dl)
print(f"model: {MODEL_PATH} validation data: {DATA_CONFIG_PATH}")
print_spec(bf_consumed_time, bf_macs, bf_num_parameters, bf_f1, bf_accuracy)

Acc: 59.78% F1(macro): 0.26: 100%|██████████| 3/3 [00:02<00:00,  1.21it/s]

model: /opt/ml/input/exp/Resnet34/Resnet34_kaggle_50_0619_1301.pt validation data: /opt/ml/p4-opt-5-vibrhanium-/baseline/configs/data/kaggle.yaml
Inference 시간: 3.680s
MAC score: 3671555588
Parameter 수: 21286724

F1 score: 0.259 | Accuracy: 0.598





### 1-2-1. TACO - Sample Data

**1) Model**

`epoch` 50

In [30]:
MODEL_PATH = p.join(ROOT_PATH, "input/exp/Resnet34/Resnet34_taco_sample.pt")
num_classes = 9

model = torchvision.models.resnet34(pretrained=True)
model.fc = nn.Linear(512, num_classes)
model.to(device)
model.load_state_dict(torch.load(MODEL_PATH))
# model
print("Done.")

Done.


In [35]:
print(f"Validation datat from {DATA_PATH}")
print(f"{'Category':10} | num")
print("----------------------")

total = 0
categories = ["Battery", "Clothing", "Glass", "Metal", "Paper", "Paperpack", "Plastic", "Plasticbag", "Styrofoam"]
for category in categories:
    src_dir = p.join(DATA_PATH, category)
    imgs = glob.glob(p.join(src_dir, "*"))
    total += len(imgs)

    print(f"{category:10} | {len(imgs):5}")
print(f"{'TOTAL':11}| {total:5}")

Validation datat from /opt/ml/input/data_kaggle/val
Category   | num
----------------------
Battery    |     0
Clothing   |     0
Glass      |    50
Metal      |    41
Paper      |    40
Paperpack  |     0
Plastic    |    48
Plasticbag |     0
Styrofoam  |     0
TOTAL      |   179


**2) Dataset**

In [20]:
DATA_CONFIG_PATH = p.join(BASE_PATH, "configs/data/taco_sample.yaml")
DATA_PATH = p.join(ROOT_PATH, "input/data_sample/val")

data_config = read_yaml(cfg=DATA_CONFIG_PATH)
# scaler = torch.cuda.amp.GradScaler() if data_config["FP16"] and device != torch.device("cpu") else None
scaler = None
train_dl, val_dl, _ = create_dataloader(config=data_config)

In [21]:
print(f"Validation datat from {DATA_PATH}")
print(f"{'Category':10} | num")
print("----------------------")

total = 0
categories = ["Battery", "Clothing", "Glass", "Metal", "Paper", "Paperpack", "Plastic", "Plasticbag", "Styrofoam"]
for category in categories:
    src_dir = p.join(DATA_PATH, category)
    imgs = glob.glob(p.join(src_dir, "*"))
    total += len(imgs)

    print(f"{category:10} | {len(imgs):5}")
print(f"{'TOTAL':11}| {total:5}")

Validation datat from /opt/ml/input/sample_data/val
Category   | num
----------------------
Battery    |    78
Clothing   |   202
Glass      |   300
Metal      |   300
Paper      |   300
Paperpack  |   300
Plastic    |   300
Plasticbag |   300
Styrofoam  |   300
TOTAL      |  2380


**3) Check spec**

In [18]:
bf_macs, bf_num_parameters = check_spec(model)
bf_f1, bf_accuracy, bf_consumed_time = evaluate(model=model, dataloader=val_dl)
print(f"model: {MODEL_PATH} validation data: {DATA_CONFIG_PATH}")
print_spec(bf_consumed_time, bf_macs, bf_num_parameters, bf_f1, bf_accuracy)



Acc: 43.87% F1(macro): 0.41: 100%|██████████| 38/38 [00:19<00:00,  1.94it/s]

/opt/ml/input/exp/Resnet34/Resnet34_taco_sample.pt
Inference 시간: 19.841s
MAC score: 3671558153
Parameter 수: 21289289

F1 score: 0.412 | Accuracy: 0.439





In [31]:
DATA_CONFIG_PATH = p.join(BASE_PATH, "configs/data/taco.yaml")
DATA_PATH = p.join(ROOT_PATH, "input/data/val")

data_config = read_yaml(cfg=DATA_CONFIG_PATH)
# scaler = torch.cuda.amp.GradScaler() if data_config["FP16"] and device != torch.device("cpu") else None
scaler = None
train_dl, val_dl, _ = create_dataloader(config=data_config)

In [32]:
bf_macs, bf_num_parameters = check_spec(model)
bf_f1, bf_accuracy, bf_consumed_time = evaluate(model=model, dataloader=val_dl)
print(f"model: {MODEL_PATH} validation data: {DATA_CONFIG_PATH}")
print_spec(bf_consumed_time, bf_macs, bf_num_parameters, bf_f1, bf_accuracy)

Acc: 40.84% F1(macro): 0.35: 100%|██████████| 128/128 [00:52<00:00,  2.46it/s]

model: /opt/ml/input/exp/Resnet34/Resnet34_taco_sample.pt validation data: /opt/ml/p4-opt-5-vibrhanium-/baseline/configs/data/taco.yaml
Inference 시간: 52.482s
MAC score: 3671558153
Parameter 수: 21289289

F1 score: 0.347 | Accuracy: 0.408





### TACO - Sample data

**1) Model**

`epoch` 100

In [19]:
MODEL_PATH = p.join(ROOT_PATH, "input/exp/Resnet34/Resnet34_taco_sample_100.pt")
num_classes = 9

model = torchvision.models.resnet34(pretrained=True)
model.fc = nn.Linear(512, num_classes)
model.to(device)
model.load_state_dict(torch.load(MODEL_PATH))
# model
print("Done.")

Done.


**2) Dataset**

In [22]:
DATA_CONFIG_PATH = p.join(BASE_PATH, "configs/data/taco_sample.yaml")

data_config = read_yaml(cfg=DATA_CONFIG_PATH)
scaler = torch.cuda.amp.GradScaler() if data_config["FP16"] and device != torch.device("cpu") else None
# scaler = None
train_dl, val_dl, _ = create_dataloader(config=data_config)

In [23]:
bf_macs, bf_num_parameters = check_spec(model)
bf_f1, bf_accuracy, bf_consumed_time = evaluate(model=model, dataloader=val_dl)
print(f"{MODEL_PATH}")
print_spec(bf_consumed_time, bf_macs, bf_num_parameters, bf_f1, bf_accuracy)

Acc: 45.80% F1(macro): 0.44: 100%|██████████| 38/38 [00:17<00:00,  2.21it/s]


/opt/ml/input/exp/Resnet34/Resnet34_taco_sample_100.pt
Inference 시간: 17.606s
MAC score: 3671558153
Parameter 수: 21289289

F1 score: 0.439 | Accuracy: 0.458


In [26]:
DATA_CONFIG_PATH = p.join(BASE_PATH, "configs/data/taco.yaml")

data_config = read_yaml(cfg=DATA_CONFIG_PATH)
# scaler = torch.cuda.amp.GradScaler() if data_config["FP16"] and device != torch.device("cpu") else None
scaler = None
train_dl, val_dl, _ = create_dataloader(config=data_config)

In [27]:
bf_macs, bf_num_parameters = check_spec(model)
bf_f1, bf_accuracy, bf_consumed_time = evaluate(model=model, dataloader=val_dl)
print(f"{MODEL_PATH}")
print_spec(bf_consumed_time, bf_macs, bf_num_parameters, bf_f1, bf_accuracy)



Acc: 44.69% F1(macro): 0.37: 100%|██████████| 128/128 [00:57<00:00,  2.23it/s]

/opt/ml/input/exp/Resnet34/Resnet34_taco_sample_100.pt
Inference 시간: 57.799s
MAC score: 3671558153
Parameter 수: 21289289

F1 score: 0.373 | Accuracy: 0.447



