<a href="https://colab.research.google.com/github/amthreeh/-dacon-papering/blob/main/%5Bljw%5Defficient_b0_epoch_100.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

#이미지 확인
import torchvision
import matplotlib.pyplot as plt
import PIL

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
import zipfile
import os

In [None]:
pwd

'/content/drive/MyDrive/papering'

In [None]:
cd /content/drive/MyDrive/papering

/content/drive/MyDrive/papering


# unzip

In [None]:
# !unzip open.zip -d ./data

# Hyperparameter setting

In [None]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':100,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':128,
    'SEED':41
}

# fixed randomseed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

# 데이터 전처리

In [None]:
all_img_list = glob.glob('./data/train/*/*')

In [None]:
print(all_img_list)

['./data/train/반점/2.png', './data/train/반점/0.png', './data/train/반점/1.png', './data/train/이음부불량/0.png', './data/train/이음부불량/5.png', './data/train/이음부불량/10.png', './data/train/이음부불량/4.png', './data/train/이음부불량/11.png', './data/train/이음부불량/14.png', './data/train/이음부불량/13.png', './data/train/이음부불량/3.png', './data/train/이음부불량/1.png', './data/train/이음부불량/6.png', './data/train/이음부불량/8.png', './data/train/이음부불량/16.png', './data/train/이음부불량/12.png', './data/train/이음부불량/2.png', './data/train/이음부불량/15.png', './data/train/이음부불량/7.png', './data/train/이음부불량/9.png', './data/train/걸레받이수정/0.png', './data/train/걸레받이수정/99.png', './data/train/걸레받이수정/89.png', './data/train/걸레받이수정/171.png', './data/train/걸레받이수정/154.png', './data/train/걸레받이수정/104.png', './data/train/걸레받이수정/175.png', './data/train/걸

In [None]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('/')[3])

In [None]:
df.head()

Unnamed: 0,img_path,label
0,./data/train/반점/2.png,반점
1,./data/train/반점/0.png,반점
2,./data/train/반점/1.png,반점
3,./data/train/이음부불량/0.png,이음부불량
4,./data/train/이음부불량/5.png,이음부불량


In [None]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, stratify=df['label'], random_state=CFG['SEED'])

# label encoding

In [None]:
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'])
val['label'] = le.transform(val['label'])

In [None]:
le.fit_transform(train['label'])

array([18, 18, 18, ..., 18, 18,  6])

# Custom_dataset

In [None]:
class CustomDataset(Dataset):
  def __init__(self, img_path_list, label_list, transforms=None):
    self.img_path_list = img_path_list
    self.label_list = label_list
    self.transforms = transforms

  def __getitem__(self, index):
    img_path = self.img_path_list[index]
    
    image = cv2.imread(img_path)

    if self.transforms is not None:
      image = self.transforms(image=image)['image']

    if self.label_list is not None:
      label = self.label_list[index]
      return image, label
    else:
      return image

  def __len__(self):
    return len(self.img_path_list)

# augmentation

In [None]:
train_transform = A.Compose([
    A.Resize(224,224), # 이미지 리사이즈
    A.Transpose(p=0.5), # 이미지 반전
    A.HorizontalFlip(p=0.5), 
    A.ShiftScaleRotate(p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.ChannelShuffle(),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0), # 이미지넷 데이터셋 통계값으로 Normalize
    A.CoarseDropout(p=0.5),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(224,224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0), # 텐서타입은 안해줌
    ToTensorV2() # Normalize를 먼저하고 tensor화를 진행해야한다.
])


In [None]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

# model define

In [None]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(BaseModel, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained=True) #가중치 불러오기 
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [None]:
# self.classifier = nn.Line
# classifier = nn.Sequential(
#         nn.Linear(25088,4096),
#         nn.ReLU(),
#         nn.Dropout(p=0.5),
#         nn.Linear(4096,4096),
#         nn.ReLU(),
#         nn.Dropout(p=0.5),
#         nn.Linear(4096,10))

In [None]:
# # 새로운 방법
# model = torch.models.efficientnet_b0(pretrained=True)
# weights = torch.models.EfficientNet_B0_Weights.DEFAULT
# model_loaded = torch.models.efficient_b0(weights=weights)

# print(torch.models.EfficientNet_B0_Weights.DEFAULT)

# torchvision.models.efficientnet_b0(weights=torch.load("모델weights.pth"))

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val Weighted F1 Score : [{_val_score:.5f}]')
       
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_score < _val_score:
            best_score = _val_score
            best_model = model
    
    return best_model

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval() #evaluation과정에서 사용하지 않아야 하는 layer들은 알아서 off시킴 
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            pred = model(imgs)
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            #모든 연산을 기록함, 연산 기록에서 역전파가 이뤄짐. 역전파를 중단하고 분리한 tensor를 반환함. 
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='weighted')
    
    return _val_loss, _val_score

- ReduceLROnPlateau: 더이상 학습이 진행되지 않을 때 learning rate를 감소시키는 scheduler

In [None]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [1], Train Loss : [1.57898] Val Loss : [1.02416] Val Weighted F1 Score : [0.64485]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.95097] Val Loss : [0.81097] Val Weighted F1 Score : [0.72293]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.77418] Val Loss : [0.75776] Val Weighted F1 Score : [0.74324]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.61841] Val Loss : [0.82446] Val Weighted F1 Score : [0.73338]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.54144] Val Loss : [0.78512] Val Weighted F1 Score : [0.77263]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.43094] Val Loss : [0.78204] Val Weighted F1 Score : [0.76475]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.36077] Val Loss : [0.75471] Val Weighted F1 Score : [0.79028]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.29759] Val Loss : [0.88569] Val Weighted F1 Score : [0.77030]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.28648] Val Loss : [0.95328] Val Weighted F1 Score : [0.74906]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.24162] Val Loss : [0.89801] Val Weighted F1 Score : [0.78893]
Epoch 00010: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.17477] Val Loss : [0.83761] Val Weighted F1 Score : [0.79604]


  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.13313] Val Loss : [0.80758] Val Weighted F1 Score : [0.79741]


In [None]:
pwd

'/content/drive/MyDrive/papering'

In [None]:
test = pd.read_csv('./data/test.csv')

In [None]:
test.head()

Unnamed: 0,id,img_path
0,TEST_000,./test/000.png
1,TEST_001,./test/001.png
2,TEST_002,./test/002.png
3,TEST_003,./test/003.png
4,TEST_004,./test/004.png


In [None]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds

In [None]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/25 [00:00<?, ?it/s]

In [None]:
submit = pd.read_csv('./data/sample_submission.csv')

In [None]:
submit['label'] = preds

In [None]:
submit.loc[submit['label'] == 0, 'label'] = '가구수정'
submit.loc[submit['label'] == 1, 'label'] = '걸레받이수정'
submit.loc[submit['label'] == 2, 'label'] = '곰팡이'
submit.loc[submit['label'] == 3, 'label'] = '꼬임'
submit.loc[submit['label'] == 4, 'label'] = '녹오염'
submit.loc[submit['label'] == 5, 'label'] = '들뜸'
submit.loc[submit['label'] == 6, 'label'] = '면불량'
submit.loc[submit['label'] == 7, 'label'] = '몰딩수정'
submit.loc[submit['label'] == 8, 'label'] = '반점'
submit.loc[submit['label'] == 9, 'label'] = '석고수정'
submit.loc[submit['label'] == 10, 'label'] = '오염'
submit.loc[submit['label'] == 11, 'label'] = '오타공'
submit.loc[submit['label'] == 12, 'label'] = '울음'
submit.loc[submit['label'] == 13, 'label'] = '이음부불량'
submit.loc[submit['label'] == 14, 'label'] = '창틀,문틀수정'
submit.loc[submit['label'] == 15, 'label'] = '터짐'
submit.loc[submit['label'] == 16, 'label'] = '틈새과다'
submit.loc[submit['label'] == 17, 'label'] = '피스'
submit.loc[submit['label'] == 18, 'label'] = '훼손'

In [None]:
submit.head()

Unnamed: 0,id,label
0,TEST_000,터짐
1,TEST_001,오염
2,TEST_002,훼손
3,TEST_003,몰딩수정
4,TEST_004,오염


In [None]:
submit.to_csv('./baseline_submit.csv', index=False)