In [1]:
import os
import time
import json
import numpy as np
import pandas as pd
import wandb
import math
import random
from typing import Tuple, Sequence, Callable
from PIL import Image

import torch, torchvision
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary
from torchvision import transforms
from torchvision.models import resnet50
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder

from material import MyDataset

In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [18]:
def get_dataloader(object, batch_size, is_train, num_workers=0):
    path = '/home/compu/Documents/exports/'

    if(is_train):
        transforms_train = transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(
                [0.485, 0.456, 0.406],
                [0.229, 0.224, 0.225]
            )
        ])
        dataset = MyDataset(dir = path + object + '/train/', image_ids = path + 'newfile.json', transforms=transforms_train)

    else:
        transforms_test = transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.ToTensor(),
            transforms.Normalize(
                [0.485, 0.456, 0.406],
                [0.229, 0.224, 0.225]
            )
        ])
        dataset = MyDataset(dir = path + object + '/test/', image_ids = path + 'newfile.json', transforms=transforms_test)

    dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)

    print('데이터셋 크기:', len(dataset))
    return dataloader

In [4]:
def log_image_table(images, predicted, labels, probs):
    "Log a wandb.Table with (img, pred, target, scores)"
    # 🐝 Create a wandb Table to log images, labels and predictions to
    table = wandb.Table(columns=["image", "pred", "target"]+[f"score_{i}" for i in range(10)])
    for img, pred, targ, prob in zip(images.to("cpu"), predicted.to("cpu"), labels.to("cpu"), probs.to("cpu")):
        table.add_data(wandb.Image(img[0].numpy()*255), pred, targ, *prob.numpy())
    wandb.log({"predictions_table":table}, commit=False)

In [5]:
def validate_model(model, dataset, loss_func, log_images=False, batch_idx=0):
    model.eval()
    val_loss = 0.
    preds_list = []
    y_list = []

    with torch.inference_mode():
        correct = 0
        for i, (images, targets) in enumerate(dataset):
            images = images.to(device)
            targets = targets.to(device)
            outputs = model(images)
            loss = loss_func(outputs, targets)

            outputs = (outputs > 0.5).float()
            acc = (outputs == targets).float().mean()

            # Log one batch of images to the dashboard, always same batch_idx.
            if i==batch_idx and log_images:
                log_image_table(images, outputs, targets, outputs)

    return val_loss / len(dataset.dataset), correct / len(dataset.dataset)

In [6]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [19]:
def train_model(dataset, object, num_classes, epochs):
    wandb.init(
    project="kristin",
    config={
        "epochs": epochs,
        "batch_size": dataset.batch_size,
        "lr": 1e-3,
        })
    
    config = wandb.config
    n_steps_per_epoch = math.ceil(len(dataset.dataset) / config.batch_size)

    model = resnet50(pretrained=False)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    model = model.to(device)
    
    # model = get_model(num_classes=7).to(device)
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    loss_func = nn.MultiLabelSoftMarginLoss()
    early_stopping = EarlyStopping(patience = 5, verbose = True, path = "material1.pt")

    wandb.watch(model, loss_func, log='all', log_freq=10)
    model.train()

    example_ct = 0
    step_ct = 0
    for epoch in range(config.epochs):
        for i, (images, targets) in enumerate(dataset):
            optimizer.zero_grad()

            images = images.to(device)
            targets = targets.to(device)

            outputs = model(images)
            train_loss = loss_func(outputs, targets)

            train_loss.backward()
            optimizer.step()

            if (i+1) % 64 == 0:
                outputs = (outputs > 0.5).float()
                acc = (outputs == targets).float().mean()
                metrics = {"train_loss": train_loss.item(), 
                           "train_accuracy": acc.item()}
                wandb.log(metrics)
        
        early_stopping(train_loss.item(), model)
        
        if early_stopping.early_stop:
            break

    #     testset = get_dataloader(object, batch_size=config.batch_size, is_train=False)
    #     val_loss, accuracy = validate_model(model, testset, loss_func, log_images=(epoch==(config.epochs-1)))

    #     val_metrics = {"val/val_loss": val_loss, 
    #                    "val/val_accuracy": accuracy}
    #     wandb.log({**metrics, **val_metrics})
        
    #     print(f"Train Loss: {train_loss:.3f}, Valid Loss: {val_loss:3f}, Accuracy: {accuracy:.2f}")

    # wandb.summary['test_accuracy'] = 0.8
    wandb.finish()

In [20]:
trainset = get_dataloader('material', batch_size=64, is_train=True)
train_model(trainset, 'material', num_classes=7, epochs=100)

데이터셋 크기: 17042
Problem at: /tmp/ipykernel_6516/3929884293.py 7 train_model


Traceback (most recent call last):
  File "/home/compu/anaconda3/envs/kristin/lib/python3.7/site-packages/wandb/sdk/wandb_init.py", line 1078, in init
    run = wi.init()
  File "/home/compu/anaconda3/envs/kristin/lib/python3.7/site-packages/wandb/sdk/wandb_init.py", line 574, in init
    manager._inform_init(settings=self.settings, run_id=self.settings.run_id)
  File "/home/compu/anaconda3/envs/kristin/lib/python3.7/site-packages/wandb/sdk/wandb_manager.py", line 173, in _inform_init
    svc_iface._svc_inform_init(settings=settings, run_id=run_id)
  File "/home/compu/anaconda3/envs/kristin/lib/python3.7/site-packages/wandb/sdk/service/service_sock.py", line 38, in _svc_inform_init
    self._sock_client.send(inform_init=inform_init)
  File "/home/compu/anaconda3/envs/kristin/lib/python3.7/site-packages/wandb/sdk/lib/sock_client.py", line 211, in send
    self.send_server_request(server_req)
  File "/home/compu/anaconda3/envs/kristin/lib/python3.7/site-packages/wandb/sdk/lib/sock_client

Exception: problem

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# 정확도(accuracy) 계산
accuracy = np.mean(y_list == preds_list)
print(f'Accuracy: {accuracy:.3f}')

# 정밀도(precision) 계산
precision = precision_score(y_list, preds_list, average='micro')
print(f'Precision: {precision:.3f}')

# 재현율(recall) 계산
recall = recall_score(y_list, preds_list, average='micro')
print(f'Recall: {recall:.3f}')

# F1 점수(F1 score) 계산
f1 = f1_score(y_list, preds_list, average='micro')
print(f'F1: {f1:.3f}')

In [None]:
import sklearn.metrics as skm

print(skm.classification_report(y_list, preds_list))

In [None]:
# 모델 저장
torch.save(model.state_dict(), 'material_end.pt')