In [29]:
import os
import glob
from os.path import join as opj

import numpy as np
import pandas as pd 
from tqdm import tqdm
from easydict import EasyDict
from torch.cuda.amp import autocast
from sklearn.preprocessing import LabelEncoder

from dataloader import *
from network import *

import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
warnings.filterwarnings(action='ignore')

In [7]:
sub = pd.read_csv('sample_submission.csv')

In [8]:
bad_df = pd.read_csv('../data/train_df_bad.csv')  
 
le_bad = LabelEncoder() #le_bad
bad_df['label'] = le_bad.fit_transform(bad_df['label'])

good = le_bad.transform([label for label in le_bad.classes_ if 'good' in label]) #30개
ngood = le_bad.transform([label for label in le_bad.classes_ if not 'good' in label])

train_df = pd.read_csv('../data/train_df.csv')
le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['label'])
good2 = le.transform([label for label in le.classes_ if 'good' in label]) # 88개

In [9]:
def get_preds(li, good=good, ngood=ngood, good2=good2, le=le):
    ww = np.array([np.load(i) for i in li])
    w = ww.mean(axis=0)
    w_maxs = np.max(w, axis=1)
    w_preds = np.argmax(w, axis=1)

    df_k2 = pd.DataFrame(data = w_maxs, columns=['max'])
    df_k2['preds'] = w_preds
    df_k2['label'] = le.inverse_transform(w_preds) #string

    bad2 = np.load('../0405/effb4_bad_5fold.npy') #2514 x 30

    bad2_maxs = np.max(bad2, axis=1)
    bad2_preds = np.argmax(bad2, axis=1)
    df_bad2 = pd.DataFrame(data = bad2_maxs, columns=['max'])
    df_bad2['preds'] = bad2_preds

    idx2 = np.array(df_bad2[((df_bad2['preds'].isin(good)) & (df_bad2['max'] <0.999999)) | df_bad2['preds'].isin(ngood)].index)

    idx_bad2 = np.array(df_k2.loc[idx2][df_k2['label'].isin(le.inverse_transform(good2))].index)
    p_bad2 = np.argsort(w, axis=1)[idx_bad2, -2]
    
    df_k2['label'].iloc[idx_bad2]= le.inverse_transform(p_bad2)
        
    return df_k2['label'].values

In [10]:
li = glob.glob('files/softmax_*.npy')
sub['label'] = get_preds(li, good=good, ngood=ngood, good2=good2, le=le)
sub.head()

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-good
3,3,tile-gray_stroke
4,4,tile-good


### One-class Classification

In [12]:
args = EasyDict({'encoder_name':'efficientnet_b1',
                 'drop_path_rate':0.2,
                 'use_weight_norm':None,
                 'num_classes':88
                })

In [13]:
df_train = pd.read_csv('../data/train_df.csv')
df_test = pd.read_csv('../data/test_df.csv') 
device = 'cuda'

In [14]:
def predict(args, le, _type, test_loader, model_path):
    model = Network(args).to(device)
    model.load_state_dict(torch.load(opj(model_path, f'{_type}_model.pth'))['state_dict'])
    model.eval()
    output = []
    with torch.no_grad():
        with autocast():
            for batch in tqdm(test_loader):
                images = torch.tensor(batch, dtype = torch.float32, device = device)
                preds = model(images)
                output.extend(torch.tensor(torch.argmax(preds, dim=1), dtype=torch.int32).cpu().numpy())

    return le.inverse_transform(output)

def save_predicts(cls, sub, df_train, exps):
    idxLst = [sub.iloc[idx]['index'] for idx in range(len(sub)) if f'{cls}' in sub.iloc[idx]['label']]
    df_cls = df_train[df_train['class']==cls]
    le = LabelEncoder()
    df_cls['label'] = le.fit_transform(df_cls['label'])

    df_test_cls = df_test[df_test['index'].isin(idxLst)]
    transform = get_train_augmentation(img_size=512, ver=1)
    test_dataset = Test_Dataset(df_test_cls, transform)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)
    for exp in exps:
        model_path = f'results/{exp}'
        pred = predict(args, le, _type='best', test_loader=test_loader, model_path=model_path)
        np.save(f'{cls}_{exp}.npy', pred)

In [15]:
save_predicts(cls='toothbrush', sub=sub, df_train=df_train, exps=[294,295,296])
save_predicts(cls='zipper', sub=sub, df_train=df_train, exps=[297,298,299])

Test Dataset size:54


100%|██████████| 54/54 [00:02<00:00, 21.79it/s]
100%|██████████| 54/54 [00:01<00:00, 28.66it/s]
100%|██████████| 54/54 [00:01<00:00, 29.22it/s]


Test Dataset size:180


100%|██████████| 180/180 [00:05<00:00, 33.69it/s]
100%|██████████| 180/180 [00:05<00:00, 35.44it/s]
100%|██████████| 180/180 [00:05<00:00, 34.85it/s]


In [None]:
def hardVoting(cls, df_sub, npys):
    idxLst = [df_sub.iloc[idx]['index'] for idx in range(len(df_sub)) if cls in df_sub.iloc[idx]['label']]
    
    if not npys:
        raise AssertionError('npys must not be empty') 
    # 단일모델 예측 : 기존 모델 예측값 대신 단일 모델 예측값으로 전부 변경
    elif len(npys) == 1:
        path = npys[0]
        p = np.load(path, allow_pickle=True)
        df_sub['label'].iloc[sub['index'].isin(idxLst)] = p
    # 하드보팅 예측 : 단일 모델들의 예측값과 원래의 예측값에 대하여 hard voting
    else:
        df = df_sub[df_sub['index'].isin(idxLst)]

        for path in npys:
            num = os.path.basename(path).split('.')[0][-3:]
            p = np.load(path, allow_pickle=True)
            df[f'pred_{num}'] = p
        
        for i in range(len(df)):
            label_pred_list = [df.iloc[i,1],df.iloc[i,2],df.iloc[i,3],df.iloc[i,4]]
            newlabel = max(label_pred_list, key=label_pred_list.count)
            
            df_sub.loc[df.iloc[i]['index'],'label'] = newlabel
        
    return df_sub