In [1]:
from PIL import Image
import cv2
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import torch
import torch.nn as nn
import torch.optim as optim
import math
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, Lambda
import pandas as pd
from sklearn.model_selection import train_test_split
from einops.layers.torch import Rearrange
from torchinfo import summary
from tqdm import tqdm
import time
import pickle

from ECGDataset import ECGDataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(0)

<torch._C.Generator at 0x1973c197f10>

In [55]:
folder = 'train_1'
SAVE_PATH = r'C:\Users\trust\bhf-data-science-centre-ecg-challenge'

n_batch = 256
n_workers = 0

# ImageTransformer, ECGDataset constants
#IMG_NOISE = 10 # add noise in image transformer / deprecated due to time issue, take too much time... 
fft_overlap = 4 # 현재 칸에서 좌우 총 몇 칸을 fft 할지
fft_zoom = 5 # fft 결과 얼마나 확대할지 / 앞에서 얼만큼을 취할지
fft_res = 16 # = n_patch, probably / 가로를 몇 등분해서 fft 할지
height = 480 # target resize in image transform = model input size
width = 864 # target resize in image transform = model input size

# Model constants
in_channel = 3 # gray image 1 + fft mag 1 + fft phase 1
n_class = 5 # out class / 질환군 5개에 대한 예측
embd_size = 324 # conv2d out channel = multihead attn in/out dim
n_patch = 16 # how many patchs per height, width in ViT
n_layer = 6 # Transformer encoder block number
n_head = 6 # n_head of multihead attn / embd_size must be divisible by n_heads
mha_drop = 0.2 # drop out p of multi head attn
ff_expansion = 2 # expansion = hidden dim / in/out dim ratio of feedforward in transformer encoder
ff_drop = 0.2 # drop out p of of feedforward in transformer encoder

In [5]:
# Data Check

In [13]:
# Valid Image file path list save
content_dir = os.path.join(SAVE_PATH, folder)
txt_dir = os.path.join(SAVE_PATH,'preprocess',folder+'.txt')

if os.path.isfile(txt_dir):
    print('Already exist')
else:
    len_corr = 0
    len_tot = 0
    with open(txt_dir,"w") as file:
        for n in os.listdir(content_dir):
            img_path = os.path.join(content_dir,n)
            len_tot +=1
            try:
                Image.open(img_path).verify()
                file.write(img_path + "\n")
                len_corr += 1
                """
                if cv2.imread(img_path) is not None:
                    file.write(img_path + "\n")
                    len_corr += 1
                else:
                    print(img_path)
                """
            except:
                print(img_path)
                continue
        file.close()
    print(len_corr,'/',len_tot)

Already exist


In [15]:
# Data Load

In [17]:
class ImageTransform():
    def __init__(self, target_height, target_width, fft_overlap, fft_zoom, fft_res):
        self.height = target_height
        self.width = target_width
        self.fft_overlap = fft_overlap
        self.fft_zoom = fft_zoom
        self.fft_res = fft_res
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((self.height,self.width))
        ])

    def __call__(self, img):
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # already read in gray
        height, width = img.shape
        
        """ Add Noise / deprecated due to time issue, take too much time... 
        for i in range(height):
            for j in range(width):
                img[i][j] += np.random.normal() * IMG_NOISE # add random noise
        """
        
        gray = cv2.adaptiveThreshold(img, 255,  cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 159, 0) # cv2.adaptive( img / gray )
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(4,4))
        gray = cv2.dilate(gray, kernel)
        gray = cv2.bitwise_not(gray)

        crop_h = height//self.fft_res
        crop_w = width//self.fft_res
        fft = np.zeros((height,width,2))

        k=1 # fft -k ~ +(fft_overlap-k) patch
        gray_extend = np.zeros((height, (width-4)*2))
        gray_extend[:,0:k*crop_w] = gray[:,-2-k*crop_w:-2]
        gray_extend[:,k*crop_w:(k+fft_res)*crop_w] = gray[:,2:-2]
        gray_extend[:,(k+fft_res)*crop_w:] = gray[:,2:-2-k*crop_w]
                               
        for j in range(self.fft_res):
            """
            gray_slide = np.zeros((height,width))
            i = (self.fft_res + j - 1) % self.fft_res
            gray_slide[:,2:-2-i*crop_w] = gray[:,2+i*crop_w:-2]
            gray_slide[:,-2-i*crop_w:-2] = gray[:,2:2+i*crop_w]
            fft_img = np.fft.fft(gray_slide[:,2:2+crop_w*self.fft_overlap])/crop_w/self.fft_overlap
            """
            fft_img = np.fft.fft(gray_extend[:,crop_w*j:crop_w*(j+self.fft_overlap)])/crop_w/self.fft_overlap
            fft_mag = np.abs(fft_img[:,:crop_w//self.fft_zoom]) * 30
            fft_phase = np.angle(fft_img[:,:crop_w//self.fft_zoom]) + np.pi
            fft[:,2+crop_w*j:2+crop_w*(j+1),0] = cv2.resize(fft_mag, dsize=(crop_w,height))
            fft[:,2+crop_w*j:2+crop_w*(j+1),1] = cv2.resize(fft_phase, dsize=(crop_w,height))
        return self.transform(np.dstack([img,fft]))

In [15]:
# data load
txt_dir = os.path.join(SAVE_PATH,'preprocess',folder+'.txt')
corr_img_path = np.array(open(txt_dir,"r").read().splitlines())

train_path, test_path = train_test_split(corr_img_path, test_size = 0.4, random_state=17, shuffle=True)
val_path, test_path = train_test_split(test_path, test_size = 0.5, random_state=17, shuffle=True)

csv_path = os.path.join(SAVE_PATH,'train_final.csv')
label_data = pd.read_csv(csv_path).set_index('ID')

train_dataset = ECGDataset(train_path, label_data, ImageTransform(height, width, fft_overlap, fft_zoom, fft_res))
val_dataset = ECGDataset(val_path, label_data, ImageTransform(height, width, fft_overlap, fft_zoom, fft_res))
test_dataset = ECGDataset(test_path, label_data, ImageTransform(height, width, fft_overlap, fft_zoom, fft_res))

init start
init ongoing... 50 img done
init ongoing... 100 img done
init ongoing... 150 img done
init ongoing... 200 img done
init ongoing... 250 img done
init ongoing... 300 img done
init ongoing... 350 img done
init ongoing... 400 img done
init ongoing... 450 img done
init ongoing... 500 img done
init ongoing... 550 img done
init done with 599 img
init start
init ongoing... 50 img done
init ongoing... 100 img done
init ongoing... 150 img done
init ongoing... 200 img done
init done with 200 img
init start
init ongoing... 50 img done
init ongoing... 100 img done
init ongoing... 150 img done
init ongoing... 200 img done
init done with 200 img


In [17]:
# dataset pickle save
train_dataset_save_path = os.path.join(SAVE_PATH,'preprocess',folder+'_train_dataset.pickle')
val_dataset_save_path = os.path.join(SAVE_PATH,'preprocess',folder+'_val_dataset.pickle')
test_dataset_save_path = os.path.join(SAVE_PATH,'preprocess',folder+'_test_dataset.pickle')

with open(train_dataset_save_path,"wb") as f:
    pickle.dump(train_dataset, f)
with open(val_dataset_save_path,"wb") as f:
    pickle.dump(val_dataset, f)
with open(test_dataset_save_path,"wb") as f:
    pickle.dump(test_dataset, f)

In [7]:
# dataset pickle load
train_dataset_save_path = os.path.join(SAVE_PATH,'preprocess',folder+'_train_dataset.pickle')
val_dataset_save_path = os.path.join(SAVE_PATH,'preprocess',folder+'_val_dataset.pickle')
test_dataset_save_path = os.path.join(SAVE_PATH,'preprocess',folder+'_test_dataset.pickle')

with open(train_dataset_save_path,"rb") as f:
    train_dataset = pickle.load(f)
with open(val_dataset_save_path,"rb") as f:
    val_dataset = pickle.load(f)
with open(test_dataset_save_path,"rb") as f:
    test_dataset = pickle.load(f)

In [8]:
# dataset to dataloader
train_dataloader = DataLoader(train_dataset, batch_size=n_batch, shuffle=True, num_workers=n_workers, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=n_batch, shuffle=True, num_workers=n_workers, pin_memory=True)
test_dataloader = DataLoader(test_dataset, batch_size=n_batch, shuffle=True, num_workers=n_workers, pin_memory=True)

In [13]:
# dataload 중간 결과 확인
item = train_dataset.__getitem__(8)
print(item[0].size())
print(item[1])

batch_iter = iter(train_dataloader)
inputs, label = next(batch_iter)
print(inputs.size())
print(label.size())

torch.Size([3, 480, 864])
tensor([0., 0., 1., 0., 0.])
torch.Size([256, 3, 480, 864])
torch.Size([256, 5])


In [9]:
# ViT Model

In [13]:
class EmbeddingLayer(nn.Module):
    def __init__(self, in_channel, embd_size, height, width, n_patch):
        super().__init__()
        patch_size = (height//n_patch, width//n_patch)
        self.project = nn.Sequential(
            nn.Conv2d(in_channel, embd_size, kernel_size=patch_size, stride=patch_size),
            # n_batch, in_channel, height, width -> n_batch, embd_size, n_patch, n_patch
            Rearrange('b c (h) (w) -> b (h w) c')
            # n_batch, embd_size, n_patch, n_patch -> n_batch, n_patch**2, embd_size
        )
        self.cls_token = nn.Parameter(torch.randn(1,1,embd_size)) # 1, 1, embd_size
        self.positions = nn.Parameter(torch.randn(n_patch**2 + 1,embd_size)) # n_batch, 1 + n_patch**2, embd_size

    def forward(self, x):
        x = self.project(x)
        # n_batch, in_channel, height, width -> n_batch, embd_size, n_patch, n_patch -> n_batch, n_patch**2, embd_size
        cls_tokens = self.cls_token.repeat(x.shape[0], 1,1) # 1, 1, embd_size -> n_batch, 1, embd_size
        x = torch.cat((cls_tokens, x), dim=1) # n_batch, 1 + n_patch**2, embd_size
        x += self.positions # n_batch, 1 + n_patch**2, embd_size
        return x # n_batch, 1 + n_patch**2, embd_size

In [15]:
class MultiheadAttn(nn.Module):
    def __init__(self, embd_size, n_head, mha_drop):
        super().__init__()
        self.multiheadattn = nn.MultiheadAttention(embd_size, n_head, dropout=mha_drop, batch_first=True)
        self.query = nn.Linear(embd_size, embd_size)
        self.key = nn.Linear(embd_size, embd_size)
        self.value = nn.Linear(embd_size, embd_size)

    def forward(self, x):
        query = self.query(x)
        key = self.key(x)
        value = self.value(x)
        return self.multiheadattn(query, key, value, need_weights=False)

In [17]:
class FeedForwardBlock(nn.Sequential):
    def __init__(self, embd_size, ff_expansion, ff_drop):
        super().__init__(
            nn.Linear(embd_size, ff_expansion*embd_size),
            nn.GELU(),
            nn.Dropout(ff_drop),
            nn.Linear(ff_expansion*embd_size,embd_size)
        )

In [19]:
class TransformerEncoderBlock(nn.Module):
    def __init__(self, embd_size, n_head, mha_drop, ff_expansion, ff_drop):
        super().__init__()
        self.mha = MultiheadAttn(embd_size, n_head, mha_drop)
        self.ffb = FeedForwardBlock(embd_size, ff_expansion, ff_drop)
        self.lnorm = nn.LayerNorm(embd_size)

    def forward(self, x):
        norm_x = self.lnorm(x)
        multihead_out, attn = self.mha(norm_x)

        out1 = multihead_out + x

        norm_out = self.lnorm(out1)
        ffb_out = self.ffb(norm_out)

        out2 = ffb_out + out1
        return out2

class TransformerEncoder(nn.Sequential):
    def __init__(self, n_layer, *args):
        super().__init__(*[TransformerEncoderBlock(*args) for _ in range(n_layer)])

In [21]:
class ViT(nn.Module):
    def __init__(
            self,
            n_class,
            in_channel,
            embd_size,
            height,
            width,
            n_patch,
            n_layer,
            n_head,
            mha_drop,
            ff_expansion,
            ff_drop
        ):
        super().__init__()
        self.embd = EmbeddingLayer(in_channel, embd_size, height, width, n_patch)
        self.encoder = TransformerEncoder(n_layer, embd_size, n_head, mha_drop, ff_expansion, ff_drop)
        self.lnorm = nn.LayerNorm(embd_size)
        self.head = nn.Linear(embd_size, n_class)

    def forward(self, x):
        x = self.embd(x)
        x = self.encoder(x)
        x = self.lnorm(x) # n_batch, n_patch**2 + 1, embd_size
        cls_final = x[:,0] # n_batch, embd_size
        return self.head(cls_final) # n_batch, n_class

In [23]:
# Training

In [25]:
# ASL Loss
#https://github.com/Alibaba-MIIL/ASL/tree/main
#Asymmetric Loss For Multi-Label Classification
class AsymmetricLossOptimized(nn.Module):
    ''' Notice - optimized version, minimizes memory allocation and gpu uploading,
    favors inplace operations'''

    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False):
        super(AsymmetricLossOptimized, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

        # prevent memory allocation and gpu uploading every iteration, and encourages inplace operations
        self.targets = self.anti_targets = self.xs_pos = self.xs_neg = self.asymmetric_w = self.loss = None

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """

        self.targets = y
        self.anti_targets = 1 - y

        # Calculating Probabilities
        self.xs_pos = torch.sigmoid(x)
        self.xs_neg = 1.0 - self.xs_pos

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            self.xs_neg.add_(self.clip).clamp_(max=1)

        # Basic CE calculation
        self.loss = self.targets * torch.log(self.xs_pos.clamp(min=self.eps))
        self.loss.add_(self.anti_targets * torch.log(self.xs_neg.clamp(min=self.eps)))

        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            self.xs_pos = self.xs_pos * self.targets
            self.xs_neg = self.xs_neg * self.anti_targets
            self.asymmetric_w = torch.pow(1 - self.xs_pos - self.xs_neg,
                                          self.gamma_pos * self.targets + self.gamma_neg * self.anti_targets)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            self.loss *= self.asymmetric_w

        return -self.loss.sum()

In [27]:
# Cosine Annealing Warmup Restarts
#https://gaussian37.github.io/dl-pytorch-lr_scheduler/
#https://github.com/katsura-jp/pytorch-cosine-annealing-with-warmup/tree/master
class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """

    def __init__(self,
                 optimizer : torch.optim.Optimizer,
                 first_cycle_steps : int,
                 cycle_mult : float = 1.,
                 max_lr : float = 0.1,
                 min_lr : float = 0.001,
                 warmup_steps : int = 0,
                 gamma : float = 1.,
                 last_epoch : int = -1
        ):
        assert warmup_steps < first_cycle_steps

        self.first_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle_mult = cycle_mult # cycle steps magnification
        self.base_max_lr = max_lr # first max learning rate
        self.max_lr = max_lr # max learning rate in the current cycle
        self.min_lr = min_lr # min learning rate
        self.warmup_steps = warmup_steps # warmup step size
        self.gamma = gamma # decrease rate of max learning rate by cycle

        self.cur_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle = 0 # cycle count
        self.step_in_cycle = last_epoch # step size of the current cycle

        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)

        # set learning rate min_lr
        self.init_lr()

    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)

    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch

        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [29]:
model = ViT(n_class,in_channel,embd_size,height,width,n_patch,n_layer,n_head,mha_drop,ff_expansion,ff_drop).to(device)

In [139]:
# ASL criterion constants
gamma_neg = 0.2
gamma_pos = 0
clip = 0. # 0.05
eps = 1e-8

# cosine annealing warm restart scheduler constants
first_cycle_steps = 50      #first_cycle_steps (int): First cycle step size
cycle_mult = 1.0            #cycle_mult(float): Cycle steps magnification. Default: -1.0
max_lr = 0.1                #max_lr(float): First cycle's max learning rate. Default: 0.1
min_lr = 3e-4               #min_lr(float): Min learning rate. Default: 0.001
warmup_steps = 15           #warmup_steps(int): Linear warmup step size. Default: 0
gamma = 1.0                 #gamma(float): Decrease rate of max learning rate by cycle. Default: 1.0
last_epoch = -1             #last_epoch (int): The index of last epoch. Default: -1

n_epochs = 300
MAX_GRADIENT = 100
val_train_ratio = 5
ACT = 3

criterion = AsymmetricLossOptimized(gamma_neg, gamma_pos, clip, eps)
optimizer = optim.Adam(model.parameters(), lr=min_lr)
scheduler = CosineAnnealingWarmupRestarts(optimizer,first_cycle_steps,cycle_mult,max_lr,min_lr,warmup_steps,gamma)

In [33]:
def training(model, dataloader, dataset, criterion, optimizer):
    model.train()
    running_loss = 0.0

    for inputs, target in dataloader:
        inputs = inputs.float().to(device)
        target = target.float().to(device)
        outputs = model(inputs)
        loss = criterion(outputs, target)
        running_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        #torch.nn.utils.clip_grad_norm_(model.parameters(), MAX_GRADIENT)
        optimizer.step()

    return running_loss

In [35]:
def validating(model, dataloader, dataset, criterion):
    model.eval()
    running_loss = 0.0
    
    with torch.no_grad():
        for inputs, target in dataloader:
            inputs = inputs.float().to(device)
            target = target.float().to(device)
            outputs = model(inputs)
            loss = criterion(outputs, target)
            running_loss += loss.item()
    
    return running_loss

In [37]:
def save_loss(loss_data, save_path):
    with open(save_path,"w") as file:
        for item in loss_data:
            file.write(str(item) + "\n")
        file.close()

In [39]:
def print_loss(train_loss, val_loss):
    plt.figure(figsize=(10,7))
    plt.plot(train_loss, color='green', label='train loss')
    plt.plot(list(range(val_train_ratio-1,len(train_loss),val_train_ratio)), val_loss, color='red', label='val loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [None]:
train_loss = []
val_loss = []
model_save_path = os.path.join(SAVE_PATH,'training',folder+'_ChkPt_'+str(ACT)+'.pt')
train_loss_save_path = os.path.join(SAVE_PATH,'training',folder+'_train_loss_'+str(ACT)+'.txt')
val_loss_save_path = os.path.join(SAVE_PATH,'training',folder+'_val_loss_'+str(ACT)+'.txt')

start = time.time()
min_loss = np.Inf

for epoch in range(n_epochs):
    print(f"Epoch {epoch+1} / {n_epochs}")
    epoch_loss = training(model, train_dataloader, train_dataset, criterion, optimizer)
    train_loss.append(epoch_loss)
    print(f"Train Loss : {epoch_loss:.4f}")
    if epoch % val_train_ratio == val_train_ratio - 1:
        epoch_loss = validating(model, train_dataloader, train_dataset, criterion)
        val_loss.append(epoch_loss)
        print(f"Valid Loss : {epoch_loss:.4f}")
        if epoch_loss < min_loss:
            min_loss = epoch_loss
            print(f'New loss record : {min_loss:.6f}')
            #torch.save(model.state_dict(), model_save_path)
    #scheduler.step()
end = time.time()
print(f"Train Done > Training time : {(end-start)/60:.3f} minutes / {n_epochs} epochs")
#save_loss(train_loss, train_loss_save_path)
#save_loss(val_loss, val_loss_save_path)
print_loss(train_loss, val_loss)

Epoch 1 / 300
Train Loss : 1206.8809
Epoch 2 / 300
Train Loss : 1207.8633
Epoch 3 / 300
Train Loss : 1208.3166
Epoch 4 / 300
Train Loss : 1207.0205
Epoch 5 / 300
Train Loss : 1207.4197
Valid Loss : 1207.4965
New loss record : 1207.496536
Epoch 6 / 300
Train Loss : 1207.3855
Epoch 7 / 300
Train Loss : 1207.7419
Epoch 8 / 300
Train Loss : 1206.6521
Epoch 9 / 300
Train Loss : 1207.9035
Epoch 10 / 300
Train Loss : 1207.3366
Valid Loss : 1207.4945
New loss record : 1207.494507
Epoch 11 / 300
Train Loss : 1208.1018
Epoch 12 / 300
Train Loss : 1207.7406
Epoch 13 / 300
Train Loss : 1208.2281
Epoch 14 / 300
Train Loss : 1207.5710
Epoch 15 / 300
Train Loss : 1207.8055
Valid Loss : 1207.4950
Epoch 16 / 300
Train Loss : 1208.0450
Epoch 17 / 300
Train Loss : 1207.2653
Epoch 18 / 300
Train Loss : 1207.4537
Epoch 19 / 300
Train Loss : 1207.6290
Epoch 20 / 300
Train Loss : 1207.7830
Valid Loss : 1207.4938
New loss record : 1207.493774
Epoch 21 / 300
Train Loss : 1208.1812
Epoch 22 / 300
Train Loss : 1

In [43]:
torch.cuda.empty_cache()

In [131]:
torch.save(model.state_dict(), os.path.join(SAVE_PATH,'training',folder+'_ChkPt_2.pt'))

In [85]:
model.load_state_dict(torch.load(os.path.join(SAVE_PATH,'training',folder+'_ChkPt_4.pt'),weights_only=True))

<All keys matched successfully>

In [135]:
def test(model, dataloader, dataset, criterion):
    model.eval()
    with torch.no_grad():
        for inputs, target in dataloader:
            #inputs, target = data[0].float().to(device), data[1].float().to(device)
            inputs = inputs.float().to(device)
            target = target.float().to(device)
            #print(target)
            outputs = model(inputs)
            outputs = torch.sigmoid(outputs)
            #print(torch.sigmoid(outputs))
            print("loss : ",criterion(outputs, target))
            for i in range(10):
                print(i," target ",target[i])
                print(i," output ",outputs[i])

test(model, test_dataloader, test_dataset, criterion)

loss :  tensor(683.2834, device='cuda:0')
0  target  tensor([1., 1., 1., 0., 0.], device='cuda:0')
0  output  tensor([0.2421, 0.1297, 0.2861, 0.2419, 0.1011], device='cuda:0')
1  target  tensor([0., 0., 0., 0., 0.], device='cuda:0')
1  output  tensor([0.2422, 0.1297, 0.2858, 0.2416, 0.1010], device='cuda:0')
2  target  tensor([1., 0., 0., 0., 0.], device='cuda:0')
2  output  tensor([0.2416, 0.1293, 0.2861, 0.2418, 0.1012], device='cuda:0')
3  target  tensor([0., 0., 0., 0., 1.], device='cuda:0')
3  output  tensor([0.2420, 0.1297, 0.2862, 0.2420, 0.1011], device='cuda:0')
4  target  tensor([0., 0., 0., 0., 0.], device='cuda:0')
4  output  tensor([0.2419, 0.1295, 0.2860, 0.2418, 0.1011], device='cuda:0')
5  target  tensor([0., 0., 0., 0., 0.], device='cuda:0')
5  output  tensor([0.2420, 0.1297, 0.2862, 0.2420, 0.1011], device='cuda:0')
6  target  tensor([0., 0., 0., 0., 0.], device='cuda:0')
6  output  tensor([0.2401, 0.1290, 0.2880, 0.2437, 0.1017], device='cuda:0')
7  target  tensor([0