<a href="https://colab.research.google.com/github/amthreeh/-AI-SPARK-air_compressor/blob/main/VIT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [19]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m80.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m85.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.14.1 safetensors-0.3.1 timm-0.9.2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
## Hyperparameter Setting

CFG = {
    'IMG_SIZE':224,
    'EPOCHS': 50,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':32,
    'SEED': 41
}

In [5]:
all_img_list = glob.glob("/content/drive/MyDrive/papering/data"+'/train/*/*')
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('/')[7])

In [6]:
import numpy as np
df['label'].value_counts()

훼손                 1405
오염                  595
걸레받이수정        307
꼬임                  210
터짐                  162
곰팡이               145
오타공                142
몰딩수정            130
면불량               99
석고수정              57
들뜸                  54
피스                    51
창틀,문틀수정      27
울음                  22
이음부불량           17
녹오염                14
가구수정               12
틈새과다                5
반점                   3
Name: label, dtype: int64

In [7]:
for i in range(len(df)):
  if df['label'][i] == '틈새과다' or df['label'][i] == '반점' or df['label'][i] == '가구수정' or df['label'][i] == '녹오염':
    # print(df.drop(labels = i, axis=0))
    df = df.drop(labels = i, axis=0)
print(df['label'].value_counts())

훼손                 1405
오염                  595
걸레받이수정        307
꼬임                  210
터짐                  162
곰팡이               145
오타공                142
몰딩수정            130
면불량               99
석고수정              57
들뜸                  54
피스                    51
창틀,문틀수정      27
울음                  22
이음부불량           17
Name: label, dtype: int64


In [None]:
print(len(df))

3423


In [8]:
# split train val dataset
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, stratify=df['label'], random_state=CFG['SEED'])

In [9]:
## Label-Encoding
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'])
val['label'] = le.transform(val['label'])

In [10]:
## CustomDataset

class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [11]:
train_transform = A.Compose([
    A.Resize(224,224), # 이미지 리사이즈
    A.Transpose(p=0.5), # 이미지 반전 
    A.ShiftScaleRotate(p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.ChannelShuffle(),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0), # 이미지넷 데이터셋 통계값으로 Normalize
    A.CoarseDropout(p=0.5),
    ToTensorV2()
])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [12]:
#weighted sampling
a_list = train['label'].value_counts()
class_counts = train['label'].value_counts().to_list()
class_weights = [1 / a_list[i] for i in range(len(class_counts))]
num_samples = sum(class_counts)
labels_lists = train['label'].to_list()
weights = [class_weights[labels_lists[i]] for i in range(int(num_samples))]
sampler = WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples), replacement=True)

- weighted random sampling            
  클래스 불균형 문제를 해결하기 위한 방법 중 하나                     
  개별 이미지 한장이 뽑힐 확률은 1/전체개수               
  이를 보완하고자 더 적은 이미지를 갖는 클래스의 이미지가 뽑힐 확률을 높이도록 큰 가중치를 곱하고 반대로 많은 이미지를 갖는 클래스의 이미지가 뽑힐 확룰이 낮아지도록 작은 가중치를 곱해 클래스 당 확룰을 동일하게 맞춰줌.   
  이렇게 맞춰진 가중 확률을 기반으로 sampler가 이미지를 확률적으로 골라서 배치를 만듦
  
        

In [13]:
# make dataloader
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], sampler=sampler, shuffle=False, num_workers=2)

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=2)

In [20]:
import timm

class vit(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=19)

    def forward(self, x):
        return self.model(x)

## Model Define
# class BaseModel(nn.Module):
#     def __init__(self, num_classes=len(le.classes_)):
#         super(BaseModel, self).__init__()
#         self.backbone = models.efficientnet_b1(pretrained=True) #densenet161(pretrained=True)
#         self.classifier = nn.Linear(1000, num_classes)
        
#     def forward(self, x):
#         x = self.backbone(x)
#         x = self.classifier(x)
#         return x

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha]*19)
        self.alpha[18] = 1-alpha
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = F.log_softmax(input)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * at

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()

In [16]:
## Train

def trains(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
#     criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(device)
    criterion = FocalLoss(gamma=2, alpha=0.25)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val Weighted F1 Score : [{_val_score:.5f}]')
       
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_score < _val_score:
            best_score = _val_score
            best_model = model
    
    return best_model

In [17]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='weighted')
    
    return _val_loss, _val_score

In [21]:
#init model
model = vit()

Downloading model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [22]:
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = trains(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.59984] Val Loss : [0.55813] Val Weighted F1 Score : [0.03899]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.51733] Val Loss : [0.62069] Val Weighted F1 Score : [0.06693]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.46797] Val Loss : [0.46172] Val Weighted F1 Score : [0.24828]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.41165] Val Loss : [0.39449] Val Weighted F1 Score : [0.32166]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.36280] Val Loss : [0.42474] Val Weighted F1 Score : [0.19585]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.31290] Val Loss : [0.37921] Val Weighted F1 Score : [0.25802]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.29019] Val Loss : [0.37058] Val Weighted F1 Score : [0.38065]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.25610] Val Loss : [0.36389] Val Weighted F1 Score : [0.26961]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.24187] Val Loss : [0.42020] Val Weighted F1 Score : [0.29849]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.23155] Val Loss : [0.36334] Val Weighted F1 Score : [0.37863]
Epoch 00010: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.16714] Val Loss : [0.26294] Val Weighted F1 Score : [0.49268]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.14143] Val Loss : [0.23583] Val Weighted F1 Score : [0.53945]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.12770] Val Loss : [0.22810] Val Weighted F1 Score : [0.48452]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.11852] Val Loss : [0.24393] Val Weighted F1 Score : [0.49277]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.11384] Val Loss : [0.26845] Val Weighted F1 Score : [0.50619]
Epoch 00015: reducing learning rate of group 0 to 7.5000e-05.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.08359] Val Loss : [0.22038] Val Weighted F1 Score : [0.55363]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.07212] Val Loss : [0.21395] Val Weighted F1 Score : [0.56966]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.06026] Val Loss : [0.21475] Val Weighted F1 Score : [0.56745]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.05749] Val Loss : [0.21679] Val Weighted F1 Score : [0.55340]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.05469] Val Loss : [0.20402] Val Weighted F1 Score : [0.59564]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.05388] Val Loss : [0.20024] Val Weighted F1 Score : [0.55481]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.04947] Val Loss : [0.19442] Val Weighted F1 Score : [0.60147]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.04858] Val Loss : [0.20458] Val Weighted F1 Score : [0.60474]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.04453] Val Loss : [0.19846] Val Weighted F1 Score : [0.59673]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.04089] Val Loss : [0.22906] Val Weighted F1 Score : [0.54521]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.03805] Val Loss : [0.20896] Val Weighted F1 Score : [0.58087]
Epoch 00026: reducing learning rate of group 0 to 3.7500e-05.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.03316] Val Loss : [0.19358] Val Weighted F1 Score : [0.64022]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.03028] Val Loss : [0.19146] Val Weighted F1 Score : [0.64553]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.02288] Val Loss : [0.21102] Val Weighted F1 Score : [0.58785]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [30], Train Loss : [0.02444] Val Loss : [0.20486] Val Weighted F1 Score : [0.63614]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [31], Train Loss : [0.02515] Val Loss : [0.20040] Val Weighted F1 Score : [0.64279]
Epoch 00031: reducing learning rate of group 0 to 1.8750e-05.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [32], Train Loss : [0.01846] Val Loss : [0.20140] Val Weighted F1 Score : [0.65205]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [33], Train Loss : [0.01628] Val Loss : [0.20470] Val Weighted F1 Score : [0.65136]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [34], Train Loss : [0.01635] Val Loss : [0.21255] Val Weighted F1 Score : [0.65013]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [35], Train Loss : [0.01502] Val Loss : [0.21515] Val Weighted F1 Score : [0.66121]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [36], Train Loss : [0.01680] Val Loss : [0.20684] Val Weighted F1 Score : [0.66002]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [37], Train Loss : [0.01667] Val Loss : [0.19994] Val Weighted F1 Score : [0.64936]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [38], Train Loss : [0.01497] Val Loss : [0.20268] Val Weighted F1 Score : [0.66046]
Epoch 00038: reducing learning rate of group 0 to 9.3750e-06.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [39], Train Loss : [0.01493] Val Loss : [0.20660] Val Weighted F1 Score : [0.64021]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [40], Train Loss : [0.01286] Val Loss : [0.21013] Val Weighted F1 Score : [0.65293]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [41], Train Loss : [0.01150] Val Loss : [0.21432] Val Weighted F1 Score : [0.66299]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [42], Train Loss : [0.01110] Val Loss : [0.22038] Val Weighted F1 Score : [0.66129]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [43], Train Loss : [0.01000] Val Loss : [0.22489] Val Weighted F1 Score : [0.65787]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [44], Train Loss : [0.01072] Val Loss : [0.22480] Val Weighted F1 Score : [0.65567]
Epoch 00044: reducing learning rate of group 0 to 4.6875e-06.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [45], Train Loss : [0.01164] Val Loss : [0.22076] Val Weighted F1 Score : [0.66499]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [46], Train Loss : [0.01171] Val Loss : [0.21821] Val Weighted F1 Score : [0.66912]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [47], Train Loss : [0.01014] Val Loss : [0.22078] Val Weighted F1 Score : [0.66112]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [48], Train Loss : [0.01165] Val Loss : [0.21955] Val Weighted F1 Score : [0.66440]


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [49], Train Loss : [0.01036] Val Loss : [0.22175] Val Weighted F1 Score : [0.66786]
Epoch 00049: reducing learning rate of group 0 to 2.3437e-06.


  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch [50], Train Loss : [0.01119] Val Loss : [0.21840] Val Weighted F1 Score : [0.66702]


In [23]:
#test
test = pd.read_csv('/content/drive/MyDrive/papering/test.csv')
print(test)

           id        img_path
0    TEST_000  ./test/000.png
1    TEST_001  ./test/001.png
2    TEST_002  ./test/002.png
3    TEST_003  ./test/003.png
4    TEST_004  ./test/004.png
..        ...             ...
787  TEST_787  ./test/787.png
788  TEST_788  ./test/788.png
789  TEST_789  ./test/789.png
790  TEST_790  ./test/790.png
791  TEST_791  ./test/791.png

[792 rows x 2 columns]


In [24]:
cd /content/drive/MyDrive/papering

/content/drive/MyDrive/papering


In [25]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=2)

In [26]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    #preds = le.inverse_transform(preds)
    
    return preds

In [27]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/25 [00:00<?, ?it/s]

In [28]:
#submission
submit = pd.read_csv('./data/sample_submission.csv')

In [29]:
submit['label'] = preds

In [30]:
submit.head()

Unnamed: 0,id,label
0,TEST_000,14
1,TEST_001,14
2,TEST_002,2
3,TEST_003,5
4,TEST_004,7


In [31]:
submit.loc[submit['label'] == 0, 'label'] = '가구수정'
submit.loc[submit['label'] == 1, 'label'] = '걸레받이수정'
submit.loc[submit['label'] == 2, 'label'] = '곰팡이'
submit.loc[submit['label'] == 3, 'label'] = '꼬임'
submit.loc[submit['label'] == 4, 'label'] = '녹오염'
submit.loc[submit['label'] == 5, 'label'] = '들뜸'
submit.loc[submit['label'] == 6, 'label'] = '면불량'
submit.loc[submit['label'] == 7, 'label'] = '몰딩수정'
submit.loc[submit['label'] == 8, 'label'] = '반점'
submit.loc[submit['label'] == 9, 'label'] = '석고수정'
submit.loc[submit['label'] == 10, 'label'] = '오염'
submit.loc[submit['label'] == 11, 'label'] = '오타공'
submit.loc[submit['label'] == 12, 'label'] = '울음'
submit.loc[submit['label'] == 13, 'label'] = '이음부불량'
submit.loc[submit['label'] == 14, 'label'] = '창틀,문틀수정'
submit.loc[submit['label'] == 15, 'label'] = '터짐'
submit.loc[submit['label'] == 16, 'label'] = '틈새과다'
submit.loc[submit['label'] == 17, 'label'] = '피스'
submit.loc[submit['label'] == 18, 'label'] = '훼손'

In [32]:
submit.to_csv('./baseline_submit.csv', index=False)

In [33]:
df = pd.read_csv('/content/drive/MyDrive/papering/baseline_submit.csv')
df.head()

Unnamed: 0,id,label
0,TEST_000,"창틀,문틀수정"
1,TEST_001,"창틀,문틀수정"
2,TEST_002,곰팡이
3,TEST_003,들뜸
4,TEST_004,몰딩수정
