In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score
import time
import matplotlib.pyplot as plt 


device = torch.device('cuda')

from utils.dataset import Custom_dataset
from model.base_model import Network

#이미지, 라벨 데이터 로드 해 옴 
def load_data(path):
    #이미지 path 
    train_png = np.array(sorted(glob(f'{path}/train/*.png')))
    test_png = np.array(sorted(glob(f'{path}/test/*.png')))
    
    #라벨 인코더 
    train_labels = pd.read_csv(f"{path}/train_df.csv")['label']
    label_unique = sorted(np.unique(train_labels))
    label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}
    
    #라벨 
    train_labels = [label_unique[k] for k in train_labels]
    
    return np.array(train_png),np.array(test_png),label_unique, np.array(train_labels )

#데이터 패스 받아서 데이터 로더로 바로 반환 
def make_loader(train_index,valid_index,batch_size):
    global train_png, train_labels 
    train_x = train_png[train_index]
    train_y = train_labels[train_index]
    
    valid_x = train_png[valid_index]
    valid_y = train_labels[valid_index]
    
    train_dataset = Custom_dataset(train_x,train_y)
    valid_dataset = Custom_dataset(valid_x,valid_y)

    train_loader = DataLoader(train_dataset,batch_size,shuffle=True)
    valid_loader = DataLoader(valid_dataset,batch_size,shuffle=False)
    return train_loader,valid_loader

#예측, 실제 값 F1 score 계산 
def score_function(real,pred):
    score = f1_score(real,pred,average='macro')
    return score 

#학습 도중 모델 세이브 
def model_save(model,fold):
    torch.save(model.state_dict(),f'./saved_model/best_model_{fold}.pt')

#init train 
def init_train():
    model = Network().to(device)
    optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    scaler = torch.cuda.amp.GradScaler()
    return model, optimizer, criterion, scaler 

#Train 
def train(dataloader,model,optimizer,criterion,scaler,CFG):
    train_loss = 0.0
    train_pred = [] 
    train_y = [] 
    for batch in (dataloader):
        optimizer.zero_grad()
        train_loss = 0.0
        x = torch.tensor(batch[0],dtype=torch.float32,device=device)
        y = torch.tensor(batch[1],dtype=torch.long,device=device)

        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred,y)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()/len(dataloader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
    return train_loss, train_pred, train_y 


#Evaluate 
def evaluate(train_y, train_pred,train_loss,epoch,CFG,TIME):
    train_f1 = score_function(train_y,train_pred) 
    print(f'epoch : {epoch+1}/{CFG.epochs}    time : {TIME:.0f}s/{TIME*(CFG.epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')
    return train_f1 

#Valid Evaluation 
def valid_test(dataloader,model,loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0,0 
    test_pred = [] 
    test_y = [] 
    with torch.no_grad():
        for x,y in dataloader:
            x,y = x.to(device),y.to(device)
            pred = model(x) 
            test_loss += loss_fn(pred,y).item() 
            test_pred += pred.argmax(1).detach().cpu().numpy().tolist()
            test_y += y.detach().cpu().numpy().tolist()
        test_f1 = score_function(test_y,test_pred)
    test_loss /= num_batches 
    correct /= size 
    print(f'TEST   loss : {test_loss:.5f}    f1 : {test_f1:.5f}')


            

In [None]:
#하이퍼 파라미터들 
class CFG:
    # path = '/content/open/'
    path = './data/open'
    batch_size = 8 
    epochs = 25 
    shuffle = True 
#data path load     
train_png, test_png, label_unique, train_labels = load_data(CFG.path)

#KFold 
kf = KFold(n_splits=5)
for fold,(train_index, valid_index) in enumerate(kf.split(train_png)):
    train_loader, valid_loader = make_loader(train_index,valid_index,CFG.batch_size)
    model,optimizer,criterion,scaler = init_train()

    best = 0 
    loss_history = [] 
    
    for epoch in tqdm(range(CFG.epochs)):
      start = time.time() 
      train_loss, train_pred, train_y  = train(train_loader,model,optimizer,criterion,scaler,CFG)
      TIME = time.time() - start 
      #Plot losses per epoch 
      if epoch == 0:
        loss_history.append(train_loss)
      else:
        loss_history.append(train_loss)
        plt.plot(np.arange(epoch+1),np.array(loss_history))
        plt.show()
      #Evaluate per epoch 
      train_f1 = evaluate(train_y,train_pred,train_loss,epoch,CFG,TIME)
      #Evaluate Valid per epoch 
      valid_test(valid_loader,model,criterion)
  
      #model save 
      if train_f1 > best:
        best = train_f1
        model_save(model,fold)
      
    
    

# Inference 

In [None]:
model.load_state_dict(torch.load('./model/best_model_29.pt'))
#데이터 
test_dataset = Custom_dataset(np.array(test_png),np.array(['tmp']*len(test_png)),mode='test')

test_dataloader = DataLoader(test_dataset,batch_size=CFG.batch_size,shuffle=False)

model.eval()
f_pred = [] 

with torch.no_grad():
    for batch in (test_dataloader):
        x = torch.tensor(batch[0],dtype=torch.float32,device=device)
        with torch.cuda.amp.autocast():
            pred = model(x) 
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]
submission = pd.read_csv('./data/open/sample_submission.csv')
submission['label'] = f_result
submission.to_csv('submission0906_1.csv',index=False)


<All keys matched successfully>