In [54]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torchvision.models as models
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import json
import numpy as np
import os
import cv2
from tqdm import tqdm
import random
from matplotlib import pyplot as plt
import pandas as pd
from collections import namedtuple
from sklearn.metrics import accuracy_score
from copy import deepcopy

import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split

### SEED

In [55]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

# Dataset

In [56]:
import cv2
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, imgs_path: list, labels_path: list=None, transform = None, mode='train'):
        self.imgs_path = imgs_path
        self.labels_path = labels_path
        self.transform = transform
        self.mode = mode

    def __len__(self):
        return len(self.imgs_path)
        
    def __getitem__(self, idx):
        img_path = self.imgs_path[idx]
        img = cv2.imread(img_path)
        
        if self.transform:
            img = self.transform(image = img)['image']

        if self.mode == 'train' or self.mode == 'valid':
            label_path = self.labels_path[idx]
            with open(label_path, 'r') as file:
                lines = file.readlines()
            
            labels = [float(x) for x in lines[0].strip().split()]
            lab = labels[0]
            return img, torch.tensor(lab)
            
        else: # test일 때
            return img

# Train

### Data load

In [57]:
BASE = '/mnt/d/Jupyter-Goodyoung/DiabetesSolution-AI'
SAVE_PATH = f"{BASE}/save"
MODEL_SAVE = f'{SAVE_PATH}/EfficientNetB0.pth' #
WORKERS = 4
EPOCHS = 50 # 훈련 epoch 지정
BATCH_SIZE = 16# batch size 지정
# IMAGE_SIZE = (256, 256) # 이미지 크기 지정
NUM_CLASSES = 721
RANDOM_STATE = 42 # seed 고정
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

seed_everything(RANDOM_STATE) # SEED 고정

In [58]:
# 파일에서 문자열을 읽어와 리스트에 저장하는 함수
def read_strings_from_file(filename):
    strings = []
    with open(filename, 'r') as f:
        if filename == "TL_files.txt" or filename == "VL_files.txt" :
            for line in f:
                line = line.strip()
                line = line[:-4] +"txt"
                # line = line[:-4] +"json"
                strings.append(line)  # 줄바꿈 문자 제거            
        else:    
            for line in f:
                strings.append(line.strip())  # 줄바꿈 문자 제거
    return strings
TL_files = read_strings_from_file('TL_files.txt')
TS_files = read_strings_from_file('TS_files.txt')
VL_files = read_strings_from_file('VL_files.txt')
VS_files = read_strings_from_file('VS_files.txt')

# train : val = 8 : 2 나누기
combined_imgs = TS_files + VS_files
combined_label = TL_files + VL_files
divide_num = 1000
x_tr, x_val = train_test_split(combined_imgs[:divide_num], test_size=0.2, random_state=RANDOM_STATE)
y_tr, y_val = train_test_split(combined_label[:divide_num], test_size=0.2, random_state=RANDOM_STATE) # 둘 다 같은 비율로 나뉘어 진다

transform = A.Compose([
        A.Resize(480,480),
        A.Normalize(0.5,0.5),
        ToTensorV2()
])

train_dataset = CustomDataset(x_tr, y_tr, transform = transform, mode = 'train')
valid_dataset = CustomDataset(x_val, y_val, transform = transform, mode = 'valid')

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=WORKERS,
    pin_memory=True
)
val_dataloader = DataLoader(
    dataset=valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True
        )

### Model train

#### model selection

In [59]:
# torch init cache
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [61]:
import timm
model = timm.create_model('efficientnet_b0', pretrained=True)
model.classifier = nn.Linear(1280,NUM_CLASSES)

In [62]:
optimizer = optim.AdamW(model.parameters(), lr=0.001)#(params=model.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss().to(DEVICE)
lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=0.000001)

In [63]:
best_val_loss = 1e9
best_model = None
early_stop = 0

model.to(DEVICE)
# model.load_state_dict(torch.load(MODEL_SAVE))

for epoch in range(EPOCHS+1):
    model.train()
    print('Epoch {}/{}'.format(epoch, EPOCHS))
    print('-'*20)
    train_loss = 0

    #### train ####
    for imgs, label in tqdm(train_dataloader):
        imgs = imgs.to(DEVICE)
        label = label.to(DEVICE)
        # 초기화
        optimizer.zero_grad() 
        # 예측
        output = model(imgs) 
        # 순전파
        one_hot_label = torch.zeros(label.size(0), NUM_CLASSES, device=label.device)
        one_hot_label.scatter_(1, label.long().unsqueeze(1), 1)
        loss = loss_fn(output, one_hot_label).to(DEVICE) 
        
        # 역전파
        loss.backward() 
        # 학습
        optimizer.step() 
        
        train_loss += loss.item()
        
    #### valid ####
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for imgs, label in tqdm(val_dataloader):
            imgs = imgs.to(DEVICE)
            label = label.to(DEVICE)
            # 예측

            output = model(imgs)
            # 순전파
            one_hot_label = torch.zeros(label.size(0), NUM_CLASSES, device=label.device)
            one_hot_label.scatter_(1, label.long().unsqueeze(1), 1)
            loss = loss_fn(output, one_hot_label).to(DEVICE)
            
            val_loss+=loss.item()
            
    
    print(f"EPOCH: {epoch}, TRAIN LOSS: {train_loss:.6f},  VAL LOSS: {val_loss:.6f}")
    
    if lr_scheduler is not None:
        lr_scheduler.step()
        
    if best_val_loss > val_loss:
        print("Model Save")
        
        best_val_loss = val_loss
        torch.save(model.state_dict(), MODEL_SAVE)
        early_stop = 0
        file_path = f'{SAVE_PATH}/result_all.txt'
        with open(file_path, 'a') as file:
            file.write(f"[BEST]: EPOCH: {epoch}, TRAIN LOSS: {train_loss:.6f},  VAL LOSS: {val_loss:.6f}\n")
    else:
        early_stop += 1

    # early stop
    if early_stop > 22:
        print("Early Stop")
        break

Epoch 0/50
--------------------


100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [02:24<00:00,  2.88s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:35<00:00,  2.70s/it]


EPOCH: 0, TRAIN LOSS: 107.040427,  VAL LOSS: 8.205411
Epoch 1/50
--------------------


100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [02:22<00:00,  2.86s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:35<00:00,  2.73s/it]


EPOCH: 1, TRAIN LOSS: 9.288014,  VAL LOSS: 1.489130
Epoch 2/50
--------------------


100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [02:22<00:00,  2.84s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:35<00:00,  2.73s/it]


EPOCH: 2, TRAIN LOSS: 2.536738,  VAL LOSS: 1.241166
Epoch 3/50
--------------------


100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [02:24<00:00,  2.89s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:35<00:00,  2.70s/it]


EPOCH: 3, TRAIN LOSS: 1.995905,  VAL LOSS: 1.932149
Epoch 4/50
--------------------


100%|███████████████████████████████████████████████████████████████████████████████████| 50/50 [02:23<00:00,  2.87s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:35<00:00,  2.72s/it]


EPOCH: 4, TRAIN LOSS: 2.914657,  VAL LOSS: 3.233294
Epoch 5/50
--------------------


 18%|███████████████                                                                     | 9/50 [00:27<02:04,  3.04s/it]


KeyboardInterrupt: 

# Inference

In [116]:
# label data load
import json
with open(f"{BASE}/DiabetesSolution-AI/label_mapping_data.json") as f:
    data = json.load(f)
label_data = list(data.keys())
# transform
transform = A.Compose([
        A.Resize(480,480),
        A.Normalize(0.5,0.5),
        ToTensorV2()
])

# load test path
num= 9898
test_dataset = CustomDataset([combined_imgs[num]], transform = transform, mode = 'test')
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=1,
    shuffle=False
)

model.load_state_dict(torch.load(MODEL_SAVE))
model.to(DEVICE)
model.eval()  # 모델을 평가 모드로 설정

with torch.no_grad():
    for imgs in test_dataloader:  # 테스트 데이터로더 사용
        imgs = imgs.to(DEVICE)
        # 예측
        output = model(imgs)
        
        # 확률값으로 변환
        probabilities = nn.functional.softmax(output, dim=1) # 확률로 변환 -> softmax
        # # 가장 높은 확률을 가진 클래스 선택
        predicted_classes = torch.argmax(probabilities, dim=1)
        # # 예측 결과 저장
        # predictions.extend(predicted_classes.cpu().numpy())

# 예측 결과 출력
print(combined_imgs[num])
print(predicted_classes.item())
print(label_data[predicted_classes.item()])

/mnt/d/Jupyter-Goodyoung/diabetes/data/Train/TS/TS1/C/02/C02084/02/측면/C_02_C02084_얼그레이마카롱_02_05.jpg
12
전골
