# Import

In [None]:
import torch 
import random
import numpy as np
import os
from torchvision.transforms import v2

import torch

from torch.utils.data import DataLoader, random_split

from torchvision import datasets

import numpy as np

import os
import random
from tqdm.notebook import tqdm    

from datetime import datetime

from sklearn.metrics import accuracy_score # 정확도 계산 함수
from sklearn.metrics import recall_score   # 재현율 계산 함수
from sklearn.metrics import f1_score       # F1 점수 계산 함수
from datetime import datetime

In [None]:
%pip install efficientnet_pytorch

from efficientnet_pytorch import EfficientNet

# Setting

In [None]:
now = datetime.now()
current_datetime = now.strftime("%y%m%d_%H%M")

In [None]:
test_name = f"cp2_{current_datetime}"
train_dataset_path = '/kaggle/input/image-classification2023-2nd/train'

In [None]:
seed = 42
train_ratio = 0.9
batch_size = 64
learning_rate = 0.0005
betas = (0.9, 0.999) # 유사 모멘텀, 기본값 (0.9, 0.999)
epoch_num = 10
dropout_rate = 0 # 기본값 0
weight_decay = 0 # 기본값 0
amsgrad = False # 기본값 False
advprop = True # 기본값 False, pretrained 모델 아닐시 해당사항 X
pretrained = True

In [None]:
train_transforms = v2.Compose([
    v2.CenterCrop((240, 240)),
    # v2.Lambda(lambda img:v2.functional.adjust_brightness(img, 1.2)),
    v2.RandomRotation(degrees=(0, 30)),
    v2.Lambda(lambda img:v2.functional.adjust_contrast(img, 0.3)),
    v2.Lambda(lambda img:v2.functional.adjust_saturation(img, 0.3)),
    v2.ToTensor(),
])

In [None]:
valid_transforms = v2.Compose([
    v2.CenterCrop((240, 240)),
    # v2.Lambda(lambda img:v2.functional.adjust_brightness(img, 1.2)),
    v2.RandomRotation(degrees=(0, 30)), # 경진대회 종료 후 확인
    v2.Lambda(lambda img:v2.functional.adjust_contrast(img, 0.3)),
    v2.Lambda(lambda img:v2.functional.adjust_saturation(img, 0.3)),
    v2.ToTensor(),
])

# Train

In [None]:
directories = [
    '/kaggle/working/csv',
    '/kaggle/working/save_state_dict',
    '/kaggle/working/save_dict',
]

for dir_path in directories:
    os.makedirs(dir_path, exist_ok=True)

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

seed_everything(seed)

In [None]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# 제너레이터 시드값 고정
g = torch.Generator()
g.manual_seed(seed)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
dataset = datasets.ImageFolder(root=train_dataset_path)

In [None]:
dataset_size = len(dataset)
train_size = int(dataset_size * train_ratio)
val_size = dataset_size - train_size

trainset, valset = random_split(dataset, [train_size, val_size])

In [None]:
trainset.dataset.transform = train_transforms
valset.dataset.transform = valid_transforms

In [None]:
train_loader = DataLoader(dataset=trainset, batch_size=batch_size, 
                          shuffle=True, worker_init_fn=seed_worker,
                          generator=g, num_workers=0)
val_loader = DataLoader(dataset=valset, batch_size=batch_size, 
                          shuffle=False, worker_init_fn=seed_worker,
                          generator=g, num_workers=0)

In [None]:
if pretrained:
    model = EfficientNet.from_pretrained('efficientnet-b1', 
                                        num_classes=2,
                                        advprop=advprop,
                                        dropout_rate=dropout_rate) 
else:
    model = EfficientNet.from_name('efficientnet-b1', 
                                    num_classes=2,
                                    dropout_rate=dropout_rate) 
# 장비 할당
model = model.to(device)

In [None]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), 
                             lr=learning_rate,
                             amsgrad=amsgrad,
                             betas=betas)

In [None]:
# wandb.init(project="AI_TermProject", name=test_name)

In [None]:
def train(model, loader_train, loader_valid, criterion, optimizer, 
          scheduler=None, epochs=10, save_file=f'/kaggle/working/save_state_dict/{test_name}_state.pth'):
    
    valid_loss_min = np.inf # 최소 손실값 초기화 (검증 데이터용) 

    # 총 에폭만큼 반복
    for epoch in range(epochs):
        print(f'에폭 [{epoch+1}/{epochs}] \n-----------------------------')
        
        # == [ 훈련 ] ==============================================
        model.train()        # 모델을 훈련 상태로 설정
        epoch_train_loss = 0 # 에폭별 손실값 초기화 (훈련 데이터용)
        train_preds_list = []  # 훈련 데이터 예측값 저장용 리스트 초기화
        train_true_list = []   # 훈련 데이터 실젯값 저장용 리스트 초기화
        # '반복 횟수'만큼 반복 
        for images, labels in tqdm(loader_train):
            # 이미지, 레이블(타깃값) 데이터 미니배치를 장비에 할당 
            images = images.to(device)
            labels = labels.to(device)
            
            # 옵티마이저 내 기울기 초기화
            optimizer.zero_grad()
            # 순전파 : 이미지 데이터를 신경망 모델의 입력값으로 사용해 출력값 계산
            outputs = model(images)
            # 손실 함수를 활용해 outputs와 labels의 손실값 계산
            loss = criterion(outputs, labels)
            # 현재 배치에서의 손실 추가 (훈련 데이터용)
            epoch_train_loss += loss.item() 
            loss.backward()       # 역전파 수행
            optimizer.step()      # 가중치 갱신
            if scheduler != None: # 스케줄러 학습률 갱신 
                scheduler.step() 
                
            # 예측값 및 실제값 저장 (훈련 데이터용)
            train_preds = torch.max(outputs.cpu(), dim=1)[1].numpy()
            train_true = labels.cpu().numpy()
            train_preds_list.extend(train_preds)
            train_true_list.extend(train_true)
            
        # 훈련 데이터 정확도 계산
        train_accuracy = accuracy_score(train_true_list, train_preds_list)

        # 훈련 데이터 손실값 출력
        print(f'\t훈련 데이터 손실값 : {epoch_train_loss/len(loader_train):.4f}')
        print(f'\t훈련 데이터 정확도 : {train_accuracy:.4f}')
        
        # == [ 검증 ] ==============================================
        model.eval()         # 모델을 평가 상태로 설정 
        epoch_valid_loss = 0 # 에폭별 손실값 초기화 (검증 데이터용)
        preds_list = []      # 예측값 저장용 리스트 초기화
        true_list = []       # 실젯값 저장용 리스트 초기화
        
        with torch.no_grad(): # 기울기 계산 비활성화
            for images, labels in loader_valid:
                images = images.to(device)
                labels = labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                epoch_valid_loss += loss.item()
                
                # 예측값 및 실제값 
                preds = torch.max(outputs.cpu(), dim=1)[1].numpy() 
                true = labels.cpu().numpy() 
    
                preds_list.extend(preds)
                true_list.extend(true)
                
        # 정확도, 재현율, F1 점수 계산
        val_accuracy = accuracy_score(true_list, preds_list)
        val_recall = recall_score(true_list, preds_list)
        val_f1_score = f1_score(true_list, preds_list)

#         wandb.log({"train_loss": epoch_train_loss/len(loader_train),
#                    "train_accuracy" : train_accuracy,
#                    "val_loss": epoch_valid_loss/len(loader_valid), 
#                    "val_accuracy": val_accuracy,
#                    "val_recall" : val_recall,
#                    "val_f1_score" : val_f1_score})
        
        # 검증 데이터 손실값 및 정확도, 재현율, F1점수 출력
        print(f'\t검증 데이터 손실값 : {epoch_valid_loss/len(loader_valid):.4f}')
        print(f'\t정확도 : {val_accuracy:.4f} / 재현율 : {val_recall:.4f} / F1 점수 : {val_f1_score:.4f}')
        # == [ 최적 모델 가중치 찾기 ] ==============================
        # 현 에폭에서의 손실값이 최소 손실값 이하면 모델 가중치 저장 
        if epoch_valid_loss <= valid_loss_min: 
            print(f'\t### 검증 데이터 손실값 감소 ({valid_loss_min:.4f} --> {epoch_valid_loss:.4f}). 모델 저장')
            # 모델 가중치를 파일로 저장 
            torch.save(model.state_dict(), save_file) 
            valid_loss_min = epoch_valid_loss # 최소 손실값 갱신 
        torch.save(model.state_dict(), f"/kaggle/working/save_dict/{test_name}_{epoch}.pth") 
    return torch.load(save_file) # 저장한 모델 가중치를 불러와 반환

In [None]:
# 모델 훈련
model_state_dict = train(model = model,
                         loader_train = train_loader, 
                         loader_valid = val_loader,
                         criterion = criterion, 
                         optimizer = optimizer,
                         epochs = epoch_num)

torch.cuda.empty_cache()
# wandb.finish()