In [None]:
import os
exp_id = 13

In [None]:
import json, math, random
from glob import glob

import ray
from ray import train

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import torch
from torch import nn
from torch.nn import functional as F
from torchvision import models
from torchvision import datasets, transforms
from torch.utils.data import Dataset

from transformers import ViTFeatureExtractor, ViTModel, ViTForImageClassification, ViTConfig

In [None]:
data_path = "/usr/src/coco/dacon/lg/data"
sample = glob(f'{data_path}/train/*')[5]

sample_csv = pd.read_csv(glob(sample+'/*.csv')[0])
sample_image = cv2.imread(glob(sample+'/*.jpg')[0])
sample_json = json.load(open(glob(sample+'/*.json')[0], 'r'))
train_df = pd.read_csv(f'{data_path}/train.csv')

In [None]:
# sample_csv.describe() 

In [None]:
csv_features = ['내부 온도 1 평균', '내부 온도 1 최고', '내부 온도 1 최저', '내부 습도 1 평균', '내부 습도 1 최고', 
                '내부 습도 1 최저', '내부 이슬점 평균', '내부 이슬점 최고', '내부 이슬점 최저']

crop = {'1':'딸기','2':'토마토','3':'파프리카','4':'오이','5':'고추','6':'시설포도'}
disease = {'1':{'a1':'딸기잿빛곰팡이병','a2':'딸기흰가루병','b1':'냉해피해','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '2':{'a5':'토마토흰가루병','a6':'토마토잿빛곰팡이병','b2':'열과','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '3':{'a9':'파프리카흰가루병','a10':'파프리카잘록병','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '4':{'a3':'오이노균병','a4':'오이흰가루병','b1':'냉해피해','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '5':{'a7':'고추탄저병','a8':'고추흰가루병','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '6':{'a11':'시설포도탄저병','a12':'시설포도노균병','b4':'일소피해','b5':'축과병'}}
risk = {'1':'초기','2':'중기','3':'말기'}


In [None]:
label_description_sub = {}
for key, value in disease.items():
    label_description_sub[f'{key}_00_0'] = f'{crop[key]}_정상'
    for disease_code in value:
        label = f'{key}_{disease_code}'
        label_description_sub[label] = f'{crop[key]}_{disease[key][disease_code]}'
print(list(label_description_sub.items())[-5:])
print(len(label_description_sub))

label_description = {}
for key, value in disease.items():
    label_description[f'{key}_00_0'] = f'{crop[key]}_정상'
    for disease_code in value:
        for risk_code in risk:
            label = f'{key}_{disease_code}_{risk_code}'
            label_description[label] = f'{crop[key]}_{disease[key][disease_code]}_{risk[risk_code]}'
# list(label_description.items())[:3]

target_label_description = {}
for c in sorted(train_df.label.unique()):
    target_label_description[c] = label_description[c]

print('n label : ', len(target_label_description))

In [None]:
label_encoder = {key:idx for idx, key in enumerate(target_label_description)}
label_decoder = {val:key for key, val in label_encoder.items()}

In [None]:
def create_new_feature(df):
    new_df = df.copy()
    new_df["new_temp"] = (new_df["내부 온도 1 최고"]-new_df["내부 온도 1 최저"])/(new_df["내부 온도 1 평균"] + 0.001)
    new_df["new_hum"] = (new_df["내부 습도 1 최고"]-new_df["내부 습도 1 최저"])/(new_df["내부 습도 1 평균"] + 0.001)
    new_df["new_dew"] = (new_df["내부 이슬점 최고"]-new_df["내부 이슬점 최저"])/(new_df["내부 이슬점 평균"] + 0.001)
#     cols = new_df.columns.tolist() + [c+"_" for c in new_df.columns.tolist()]
    
    ary = new_df.values
    if ary.shape[0] > 1:
        new_ary = []
        for i in range(ary.shape[0]-1):
            tmp = ary[i].tolist() + ary[i+1].tolist()
            new_ary.append(tmp)
    else:
        new_ary = [ary.tolist()[0] + [0]*new_df.shape[1]]
        
    return pd.DataFrame(np.array(new_ary))
    
class CustomDataset(Dataset):
    def __init__(self, files, max_len, labels=None, transform=None, csv_cols=[], mode='train'):
        self.mode = mode
        self.files = files
        self.csv_feature_dict = csv_cols
        self.csv_feature_check = [0]*len(self.files)
        self.csv_features = [None]*len(self.files)
        self.max_len = max_len # 24 * 6 * 2
        self.label_encoder = label_encoder
        self.transform = transform
        self.debug_df = None

    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        file = self.files[i]
        file_name = file.split('/')[-1]
        
        # csv
        if self.csv_feature_check[i] == 0:
            csv_path = f'{file}/{file_name}.csv'
            df = pd.read_csv(csv_path)[self.csv_feature_dict]
            df = df.replace('-', None)
            
            # MinMax scaling
            for col in df.columns:
                if df[col].dtype != np.float64:
                    df[col] = pd.to_numeric(df[col])
                tmp_col = df[col].copy()
                if tmp_col.isna().sum() > 0:
                    tmp_col = tmp_col.interpolate(method='linear') 
                tmp_col /= 100
                tmp_col.loc[tmp_col>1] = 1.0
                df[col] = tmp_col
            
            # new feature addition and zero padding
            df = create_new_feature(df)
            self.debug_df = file
            csv_feature = np.zeros((self.max_len, len(df.columns)))
            length = min(self.max_len, len(df))
            csv_feature[-length:] = df.to_numpy()[-length:]
            
            # csv_feature = df.values # shape (588, 9)
            self.csv_features[i] = csv_feature
            self.csv_feature_check[i] = 1
        else:
            csv_feature = self.csv_features[i]
        
        # image
        img_path = f'{file}/{file_name}.jpg'
        img = Image.open(img_path).convert("RGB")
        # img = self.transform(img)
        
        if self.mode == 'train':
            img = self.transform(img)
            
            json_path = f'{file}/{file_name}.json'
            with open(json_path, 'r') as f:
                json_file = json.load(f)
            
            crop = json_file['annotations']['crop']
            disease = json_file['annotations']['disease']
            risk = json_file['annotations']['risk']
            label = f'{crop}_{disease}_{risk}'
            
            return {
                'img' : img, #torch.tensor(img, dtype=torch.float32),
                'csv_feature' : torch.tensor(csv_feature, dtype=torch.float32),
                'label' : torch.tensor(self.label_encoder[label], dtype=torch.long)
            }
        else:
            img_tta0 = self.transform[0](img)
            img_tta1 = self.transform[1](img)
            img_tta2 = self.transform[2](img)
            return {
                'img' : img_tta0, #torch.tensor(img, dtype=torch.float32),
                'img_tta1' : img_tta1,
                'img_tta2' : img_tta2,
                'csv_feature' : torch.tensor(csv_feature, dtype=torch.float32)
            }
        
class CustomDataset_old(Dataset):
    def __init__(self, files, max_len, labels=None, transform=None, csv_cols=[], mode='train'):
        self.mode = mode
        self.files = files
        self.csv_feature_dict = csv_cols
        self.csv_feature_check = [0]*len(self.files)
        self.csv_features = [None]*len(self.files)
        self.max_len = max_len # 24 * 6 * 2
        self.label_encoder = label_encoder
        self.transform = transform

    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        file = self.files[i]
        file_name = file.split('/')[-1]
        
        # csv
        if self.csv_feature_check[i] == 0:
            csv_path = f'{file}/{file_name}.csv'
            df = pd.read_csv(csv_path)[self.csv_feature_dict]
            df = df.replace('-', None)
            # MinMax scaling
            for col in df.columns:
                if df[col].dtype != np.float64:
                    df[col] = pd.to_numeric(df[col])
                tmp_col = df[col].copy()
                if tmp_col.isna().sum() > 0:
                    tmp_col = tmp_col.interpolate(method='linear')
                tmp_col /= 100
                tmp_col.loc[tmp_col>1] = 1.0
                df[col] = tmp_col
            
            # zero padding
            csv_feature = np.zeros((self.max_len, len(df.columns)))
            length = min(self.max_len, len(df))
            csv_feature[-length:] = df.to_numpy()[-length:]
            
            # csv_feature = df.values # shape (588, 9)
            self.csv_features[i] = csv_feature
            self.csv_feature_check[i] = 1
        else:
            csv_feature = self.csv_features[i]
        
        # image
        img_path = f'{file}/{file_name}.jpg'
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)
        
        if self.mode == 'train':
            json_path = f'{file}/{file_name}.json'
            with open(json_path, 'r') as f:
                json_file = json.load(f)
            
            crop = json_file['annotations']['crop']
            disease = json_file['annotations']['disease']
            risk = json_file['annotations']['risk']
            label = f'{crop}_{disease}_{risk}'
            
            return {
                'img' : img, #torch.tensor(img, dtype=torch.float32),
                'csv_feature' : torch.tensor(csv_feature, dtype=torch.float32),
                'label' : torch.tensor(self.label_encoder[label], dtype=torch.long)
            }
        else:
            return {
                'img' : img, #torch.tensor(img, dtype=torch.float32),
                'csv_feature' : torch.tensor(csv_feature, dtype=torch.float32)
            }

# Dataset Preparing

In [None]:
seed = 1000

train = sorted(glob(f'{data_path}/train/*'))
test = sorted(glob(f'{data_path}/test/*'))

labels = pd.read_csv(f'{data_path}/train.csv')['label']
print(labels.value_counts())  

In [None]:
# labelsss.value_counts()
# parameters
pretrained_architecture = [
    "google/vit-base-patch16-224-in21k" ,'google/vit-base-patch16-224', 'google/vit-base-patch16-384'
]
pretrained_architecture = pretrained_architecture[0]

w = int(pretrained_architecture.split("-")[3])
img_size = (w, w) #(384, 384)
scale_val = 0.95

batch_size = 16
n_classes = len(label_encoder)
max_len= 24 * 6 * 2


In [None]:
compose_list = [
    transforms.Resize(img_size), 
    transforms.ColorJitter(brightness=0.5), 
    transforms.RandomHorizontalFlip(), 
    transforms.RandomResizedCrop(
        img_size, scale=(scale_val, 1), ratio=(scale_val, 1)
        ),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]

train_transform = transforms.Compose(compose_list)
test_transform = transforms.Compose([compose_list[0]]+compose_list[-2:])

# Network defining

In [None]:
print(f"using pretrained architecture : {pretrained_architecture}")

class ViTEncoder(nn.Module):
    def __init__(self):
        super(ViTEncoder, self).__init__()
        # self.feature_extractor = ViTFeatureExtractor.from_pretrained(pretrained_architecture)
        self.vit = ViTModel.from_pretrained(pretrained_architecture, output_attentions=False, add_pooling_layer=False)
        
    def forward(self, x):
        outputs = self.vit(x)
        seq_output = outputs[0][:, 0, :]
        return seq_output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout, max_len):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        pe = pe.transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x): # x shape: [bs, seq_len, embedding_dim]
        # x = x + self.pe[:x.size(0)]
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)
    
class TSTransformer(nn.Module):
    def __init__(self, input_dim, encoder_dim, d_model=128, nhead=4, nlayers=6, dropout=0.1, max_len=1000):
        super(TSTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.pe = PositionalEncoding(d_model=d_model, dropout=dropout, max_len=max_len)
        layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(layers, num_layers=nlayers)
        self.fc = nn.Linear(max_len, encoder_dim)
        
    def forward(self, x):
        x = self.embedding(x) # dim: (B, max_len(288), n_feat(9)) => (B, 288, d_moel(128))
        x = self.pe(x) # (288, B, 128)
        x = self.transformer(x) # (B, 288, 128)
        x = self.fc(x[...,0]) # (B, encoder_dim(1000))
        return x
    
class MyNetwork(nn.Module):
    def __init__(self, input_dim, encoder_dim, d_model=128, nhead=4, nlayers=4, num_classes=1, max_len=288):
        super(MyNetwork, self).__init__()
        self.vit_encoder = ViTEncoder()
        self.ts_transformer_encoder = TSTransformer(input_dim=input_dim, encoder_dim=int(encoder_dim/2), d_model=d_model, max_len=max_len)
        self.fc1 = nn.Linear(int(768+encoder_dim/4), encoder_dim) #768
        self.fc2 = nn.Linear(encoder_dim, encoder_dim)
        self.fc3 = nn.Linear(encoder_dim, num_classes)
        self.pooling = nn.AvgPool1d(kernel_size=3, stride=2, padding=1)
        self.dropout1 = nn.Dropout(0.7)
        self.dropout2 = nn.Dropout(0.7)
        
    def forward(self, img_input, seq_input):
        img_feat = self.vit_encoder(img_input) #(B, 768)
#         img_feat = self.pooling(img_feat) # (B, 384)
        
        seq_feat = self.ts_transformer_encoder(seq_input) #(B, 1000)
        seq_feat = self.pooling(seq_feat) #(B, 500)
        
        x = torch.concat([img_feat, seq_feat], axis=1) #(B, 884)
        x = self.fc1(x) # (B, 1000)
#         x = F.relu(self.fc1(x)) # (B, 1000)
#         x = self.dropout1(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout2(x)
        x = self.fc3(x) # (B, n_classes)

        return x


class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=False, reduce_size=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce_size = reduce_size

    def forward(self, inputs, targets):
    
        ce = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce)
        fl = self.alpha * (1-pt)**self.gamma * ce

        if self.reduce_size:
            return torch.mean(fl)
        else:
            return fl
        
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(seed)

# Training

In [None]:

ray.init(num_gpus=1, ignore_reinit_error=True)

@ray.remote(num_gpus=1)
class TrainRay(object):
    def __init__(self, train_loader, valid_loader, test_loader, input_dim, encoder_dim, d_model, max_len, n_classes, epochs, lr, exp_id, fold_no):
        use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.train_loader, self.val_loader, self.test_loader = train_loader, valid_loader, test_loader

        self.model = MyNetwork(
            input_dim=input_dim, encoder_dim=encoder_dim, d_model=d_model, num_classes=n_classes,max_len=max_len
        )
        if use_cuda:
            self.model = self.model.cuda() #to(self.device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.5)
        self.criterion = nn.CrossEntropyLoss() # FocalLoss()#
        self.scaler = torch.cuda.amp.GradScaler() 
        self.epochs = epochs
        self.fold_no = fold_no
        self.save_path = f"/usr/src/coco/dacon/lg/data/ckpt/exp_{exp_id}"
        os.makedirs(self.save_path, exist_ok=True)
        self.pref_cutoff = 0.91
    
    def train_epoch(self, batch_item, is_training):
        img = batch_item['img'].cuda()#to(self.device)
        csv_feature = batch_item['csv_feature'].cuda()#to(self.device)
        label = batch_item['label'].cuda()#to(self.device)
        if is_training:
            self.model.train()
            self.optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                output = self.model(img, csv_feature)
                loss = self.criterion(output, label)
            
            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
            
            score = self.get_f1(label, output)
            return loss, score
        else:
            self.model.eval()
            with torch.no_grad():
                with torch.cuda.amp.autocast():
                    output = self.model(img, csv_feature)
                loss = self.criterion(output, label)
            score = self.get_f1(label, output)
            return loss, score
        
    def train(self):
        loss_plot, val_loss_plot = [], []
        metric_plot, val_metric_plot = [], []

        for epoch in range(self.epochs):
            total_loss, total_val_loss = 0, 0
            total_acc, total_val_acc = 0, 0
            
            for batch, batch_item in enumerate(self.train_loader):
                batch_loss, batch_acc = self.train_epoch(batch_item, True)
                total_loss += batch_loss
                total_acc += batch_acc
            self.scheduler.step()
            
            print({
                'Phase': "Train", 
                'Epoch': epoch + 1,
                'Loss' : '{:06f}'.format(total_loss/(batch+1)),
                'F-1' : '{:06f}'.format(total_acc/(batch+1))
            })
            loss_plot.append(total_loss/(batch+1))
            metric_plot.append(total_acc/(batch+1))

            for batch, batch_item in enumerate(self.val_loader):
                batch_loss, batch_acc = self.train_epoch(batch_item, False)
                total_val_loss += batch_loss
                total_val_acc += batch_acc

            print({
                'Phase': "Valid", 
                'Epoch': epoch + 1,
                'Loss' : '{:06f}'.format(total_val_loss/(batch+1)),
                'F-1' : '{:06f}'.format(total_val_acc/(batch+1))
            })
            val_loss_plot.append(total_val_loss/(batch+1))
            val_metric_plot.append(total_val_acc/(batch+1))

            if np.max(val_metric_plot) == val_metric_plot[-1] and val_metric_plot[-1]>=self.pref_cutoff:
#                 torch.save(self.model.state_dict(), f'{self.save_path}/Fold{self.fold_no}_epoch_{epoch}_f1score_{val_metric_plot[-1]}.pth')
                torch.save(self.model.state_dict(), f'{self.save_path}/Fold{self.fold_no}.pth')
                print(f"ckpt saved! epoch: {epoch} / val-f1: {val_metric_plot[-1]}")
        return 
    
    def get_f1(self, real, pred):
        real = real.cpu()#to(self.device)
        pred = torch.argmax(pred, dim=1).cpu()#to(self.device)
        score = f1_score(real, pred, average='macro')
        return score
    
    def predict(self):
        self.set_best_weights()
        self.model.eval()
        tqdm_dataset = tqdm(enumerate(self.test_loader))
        results = []
        for batch, batch_item in tqdm_dataset:
            img = batch_item['img'].cuda()
            seq = batch_item['csv_feature'].cuda()
            with torch.no_grad():
                with torch.cuda.amp.autocast():
                    output = self.model(img, seq)
            output = torch.tensor(torch.argmax(output, dim=1), dtype=torch.int32).cpu().numpy()
            results.extend(output)
        return results
    
    def get_weights(self):
        return self.model.state_dict()

    def set_best_weights(self):
        self.model.load_state_dict(torch.load(self.save_path))
        print(f"best model loaded in here: {self.save_path}")

    def save(self):
        torch.save(self.model.state_dict(), "best.pth")
    
    def print_gpu_ids(self):
        return "This actor is allowed to use GPUs {}.".format(ray.get_gpu_ids())


In [None]:
n_fold = 5
learning_rate = 3e-5 #1e-4
d_model = 512 #256
dropout_rate = 0.1
epochs = 50
input_dim = 24 #9
encoder_dim = 2048

# train_ray = TrainRay.remote(
#     train_loader=train_dataloader, 
#     valid_loader=val_dataloader,
#     test_loader=test_dataloader,
#     input_dim=input_dim, encoder_dim=encoder_dim, d_model=d_model, max_len=max_len, 
#     n_classes=n_classes, epochs=epochs, lr=learning_rate, exp_id=exp_id
# )

In [None]:
seed_everything(seed)
skf = StratifiedKFold(n_splits=n_fold, random_state=seed, shuffle=True)

CV_f1, CV_info = [], []
fold_no = 1
for train_index, val_index in skf.split(X=train, y=labels):
    if fold_no >= 1:
#         print(labels[train_index].value_counts())
#         print(labels[val_index].value_counts())
        
        trainset = np.array(train)[train_index].tolist()
        validset = np.array(train)[val_index].tolist()

        train_dataset = CustomDataset(files=trainset, max_len=max_len, transform=train_transform, csv_cols=csv_features)
        val_dataset = CustomDataset(files=validset, max_len=max_len, transform=train_transform, csv_cols=csv_features)
        train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False) 

        train_ray = TrainRay.remote(
            train_loader=train_dataloader, 
            valid_loader=val_dataloader,
            test_loader=None,
            input_dim=input_dim, encoder_dim=encoder_dim, d_model=d_model, max_len=max_len, 
            n_classes=n_classes, epochs=epochs, lr=learning_rate, exp_id=exp_id, fold_no=fold_no
        )

        print(f"#==== FOLD {fold_no} ====#")
        ray.get([train_ray.train.remote() for _ in range(1)])
        del train_ray
    
    fold_no +=1
    
    
    print("#===================#")
    


In [None]:
#exp12-1: 0.9156  #d_model 512 -> 256
#exp12-2: 0.918   #d_model 256, pe modify #21epoch
#exp12-3: 0.915   #d_model 256, embeding_dim 1024, pe modify #15epoch
#exp12-4: 0.924   #d_model 512 embeding_dim 2048,  pe modify # epoch ##p100- batch16

# Inference

In [None]:
submission = pd.read_csv(f'{data_path}/sample_submission.csv')

In [None]:
compose_list = [
    transforms.Resize(img_size), 
    transforms.ColorJitter(brightness=0.5), 
    transforms.RandomHorizontalFlip(), 
    transforms.RandomResizedCrop(
        img_size, scale=(scale_val, 1), ratio=(scale_val, 1)
        ),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]

tta1 = transforms.Compose([compose_list[0]]+compose_list[-2:])
tta2 = transforms.Compose(compose_list[:2]+compose_list[-2:])
tta3 = transforms.Compose([compose_list[0]]+[compose_list[2]]+compose_list[-2:])
# print(tta1, tta2, tta3)

In [None]:
# test_dataset = CustomDataset(files=test, max_len=max_len, transform=test_transform, csv_cols=csv_features, mode = 'test')
test_dataset = CustomDataset(files=test, max_len=max_len, transform=[tta1, tta2, tta3], csv_cols=csv_features, mode = 'test')
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
from scipy import stats

def predict(dataset):
    model.eval()
    tqdm_dataset = tqdm(enumerate(dataset))
    results = []
    for batch, batch_item in tqdm_dataset:
        img = batch_item['img'].cuda()
        seq = batch_item['csv_feature'].cuda()
        with torch.no_grad():
            output = model(img, seq)
        output = torch.tensor(torch.argmax(output, dim=1), dtype=torch.int32).cpu().numpy()
        results.extend(output)
    return results

def predict_tta(dataset):
    model.eval()
    tqdm_dataset = tqdm(enumerate(dataset))
    results = []
    for batch, batch_item in tqdm_dataset:
        tta_res = []
        for im in ["img", "img_tta1", "img_tta2"]:
            img = batch_item[im].cuda()
            seq = batch_item['csv_feature'].cuda()
            with torch.no_grad():
                output = model(img, seq)
                #print("output dim", output.shape)
            unit_output = torch.tensor(torch.argmax(output, dim=1), dtype=torch.int32).cpu().numpy()
            #print("unit_output dim", unit_output.shape)
            tta_res.append(unit_output)
        output = stats.mode(np.array(tta_res))[0][0]
        #print(output)
        results.extend(output)
    return results

In [None]:
ckpt_path = f"{data_path}/ckpt/exp_{exp_id}"
ckpts = [c for c in os.listdir(ckpt_path) if c.endswith(".pth")]
print(ckpts)

In [None]:
seed_everything(seed)

for ckpt in ckpts:
    model = MyNetwork(input_dim=input_dim, encoder_dim=encoder_dim, d_model=d_model, num_classes=n_classes,max_len=max_len)
    model.load_state_dict(torch.load(f"{ckpt_path}/{ckpt}"))
    model = model.cuda()
    
    # preds = predict(test_dataloader)
    preds = predict_tta(test_dataloader)
    preds = np.array([label_decoder[int(val)] for val in preds])
    submission[f"label_{'_'.join(ckpt.split('_')[:3])}"] = preds
    
    model.cpu()
    del model

In [None]:
bag_pred = submission.iloc[:, 2:].mode(axis=1)

In [None]:
submission['label'] = bag_pred[0].values
submission.head()

In [None]:
submission.to_csv(f'{data_path}/result/res_{exp_id}.csv', index=False)