# **Imports**

In [None]:
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import torchvision
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset, RandomSampler
import pandas as pd
import os
import numpy as np
import random
import matplotlib.pyplot as plt
from skimage import io
from tqdm import tqdm
import copy
from IPython.display import clear_output
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import recall_score
from torchvision.models import *
import gc
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# **Utils**

In [None]:
def show_progress_plots(all_metrics, metric_names, val=None):
      n_metrics = len(all_metrics)
      fig, axes = plt.subplots(1,n_metrics, figsize=(5*n_metrics,n_metrics))
      # Show metric plots
      for i, metric_data, name in zip(range(n_metrics), all_metrics, metric_names):
        axes[i].plot(metric_data[0], label=f'{name}_train')
        if val is not None:
          axes[i].plot(metric_data[1], label=f'{name}_val')
        axes[i].legend()
      plt.show()

In [None]:
def train_model(model, loss_func, train_dataloader,optimizer, n_epochs, val_dataloader=None,\
                device='cpu', metric=None, metric_params=None, logs=True, plots=True):
  model.to(device)

  best_model, best_model_train,best_model_val = None, None, None 
  best_loss = best_loss_train = best_loss_val = 9999999
  best_metric = best_metric_train = best_metric_val = 9999999

  loss_train_history = []
  acc_train_history = []
  metric_train_history = []

  loss_val_history = []
  acc_val_history = []
  metric_val_history = []

  all_metrics = [[loss_train_history, loss_val_history], 
                     [acc_train_history, acc_val_history],
                     [metric_train_history, metric_val_history]]
  metric_names = ['loss', 'accuracy', metric.__name__]

  try:
    for epoch in range(n_epochs):
      for phase in ['train', 'val']:
        if logs:
          print(f"\nStart {phase} phase")
        if phase == 'train':
          model.train()
          dataloader = train_dataloader

        elif phase == 'val':
          model.eval()
          dataloader = val_dataloader

          if dataloader is None:
            continue

        epoch_loss = 0.
        epoch_acc = 0.
        epoch_metric = 0.

        for batch, labels in tqdm(dataloader, disable=not logs):
          batch = batch.to(device)
          labels = labels.to(device)
          optimizer.zero_grad()

          with torch.set_grad_enabled(phase == 'train'):
            preds = model(batch)
            loss_value = loss_func(preds, labels)
            preds_class = preds.argmax(dim=1)

            if phase == 'train':
              loss_value.backward()
              optimizer.step()


        
        # Metric values
          epoch_loss += loss_value.item()
          epoch_acc += (preds_class == labels.data).float().mean()
          if metric:
            epoch_metric += metric(labels.data.cpu(), preds_class.cpu(), average='macro')
          
        # Calc epoch metrics
        epoch_loss = np.around(epoch_loss/len(dataloader), decimals=3)
        epoch_acc = np.around(float(epoch_acc/len(dataloader)), decimals=3)
        if metric:
          epoch_metric = np.around(float(epoch_metric/len(dataloader)), decimals=3)
        
        # Show epoch metrics in console
        if logs:
          clear_output(True)
          print(f"Epoch {epoch+1} {phase} loss: {epoch_loss}")
          print(f"Epoch {epoch+1} {phase} acc: {epoch_acc}")
        if logs and metric:
          print(f"Epoch {epoch+1} {phase} metric: {epoch_metric}")
        
        # Save epoch metrics and choose best models
        if phase == 'train':
          loss_train_history.append(epoch_loss)
          acc_train_history.append(epoch_acc)
          if metric:
            metric_train_history.append(epoch_metric)

          if epoch_loss < best_loss_train:
            best_model_train = copy.deepcopy(model)
            best_loss_train = copy.deepcopy(epoch_loss)
            if metric:
              best_metric_train = copy.deepcopy(epoch_metric)
            if logs:
              print(f"Find new best model on train with loss: {best_loss_train}! epoch: {epoch+1}")
        else:
          loss_val_history.append(epoch_loss)
          acc_val_history.append(epoch_acc)
          if metric:
            metric_val_history.append(epoch_metric)

          if epoch_loss < best_loss_val:
            best_model_val = copy.deepcopy(model)
            best_loss_val = copy.deepcopy(epoch_loss)
            if metric:
              best_metric_val = copy.deepcopy(epoch_metric)
            if logs:
              print(f"Find new best model on val with loss: {best_loss_val}, epoch: {epoch+1} !")            

      # Choose best model 
      if val_dataloader is not None:
        best_model = best_model_val
        best_loss = best_loss_val
        if metric:
          best_metric = best_metric_val
      else:
        best_model = best_model_train
        best_loss = best_loss_train
        if metric:
          best_metric = best_metric_train
          
  except KeyboardInterrupt:
    print("Training stopped by user!")
    print(f"Best train loss: {best_loss_train}")
    print(f"Best val loss: {best_loss_val}")
    print(f"Best train metric: {best_metric_train}")
    print(f"Best val metric: {best_metric_val}")
    if plots:
      show_progress_plots(all_metrics, metric_names, val_dataloader)
    return best_model, best_loss, best_metric

  if plots:
    show_progress_plots(all_metrics, metric_names, val_dataloader)
  print(f"Best train loss: {best_loss_train}")
  print(f"Best val loss: {best_loss_val}")
  print(f"Best train metric: {best_metric_train}")
  print(f"Best val metric: {best_metric_val}")
  return best_model, best_loss, best_metric

In [None]:
def predict(model, dataloader, device='cpu'):
  model.eval()
  model.to(device)
  pred_labels = []
  img_names = []

  for inputs, paths in tqdm(dataloader):
    inputs = inputs.to(device)
    with torch.set_grad_enabled(False):
        preds = model(inputs)
    pred_labels.append(nn.functional.softmax(preds, dim=1).argmax(-1).data.cpu().numpy())
    img_names.extend(paths)
    
  pred_labels = np.concatenate(pred_labels)
  return pred_labels, img_names

In [None]:
def show_images(dataloader, n=10):
  img_tensors, labels = next(iter(dataloader))
  for idx in range(n):
      image = img_tensors[idx].permute(1, 2, 0).numpy()
      mean = np.array([0.485, 0.456, 0.406])
      std = np.array([0.229, 0.224, 0.225])
      image = std * image + mean
      plt.imshow(image.clip(0,1))
      plt.title("Класс: " + str(int(labels[idx])))
      plt.show()

In [None]:
def cross_validation(models_list, train_df, n_splits, batch_size, grad, \
                     transforms, train_path, n_epochs,metric, device='cpu'):
  kfold = KFold(n_splits=n_splits, shuffle=True, random_state=SEED)
  losses = []
  metrics = []
  for idx,get_model_f in enumerate(models_list):
    model_losses = []
    model_metrics = []

    fold_idx = 1
    for train_indices, test_indices in kfold.split(train_df):
        
      print(f"Start train model {idx+1}, fold {fold_idx}")
      model = get_model_f(grad)
      loss_f = torch.nn.CrossEntropyLoss()
      optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-3)

      train = train_df.iloc[train_indices]
      train_dataset = ImgDataset(train, train_path, transform=transforms[0], with_label=True)
      train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

      test = train_df.iloc[test_indices]
      test_dataset = ImgDataset(test, train_path, transform=transforms[1], with_label=True)
      test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

      _, best_loss, best_metric = train_model(model,loss_func=loss_f,\
                                              train_dataloader=train_dataloader,\
                                              val_dataloader=test_dataloader,\
                                              optimizer=optimizer, n_epochs=n_epochs,\
                                              device=device, metric=metric, logs=False)
      model_losses.append(best_loss)
      model_metrics.append(best_metric)
      print(f"End train model {idx+1}, fold {fold_idx}\n")
      fold_idx +=1
      
    losses.append(np.mean(model_losses))
    metrics.append(np.mean(model_metrics))
    print(f"Model {idx+1} mean loss {np.mean(model_losses)}")
    print(f"Model {idx+1} mean metric {np.mean(model_metrics)}")

  losses = {idx+1: val for idx, val in enumerate(losses)}
  metrics = {idx+1: val for idx, val in enumerate(metrics)}
  print(f"Mean losses: {losses}")
  print(f"Mean metrics: {metrics}")
  

In [None]:
def augment_and_save(df,max_count, data_path,save_folder, transforms):
  aug_df = pd.DataFrame(columns=df.columns)
  written = 0

  for idx, row in df.iterrows():
    img_name = row.iloc[0]
    img_class = row.iloc[1]
    
    class_n = df[df['class'] == img_class].shape[0]
    if class_n > max_count:
      continue
    
    img_path = os.path.join(data_path, img_name)
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    aug_img = transforms(image=image)['image']

    aug_name = f"aug_{idx}_" + img_name
    save_path = os.path.join(save_folder, aug_name)
   
    if cv2.imwrite(save_path, cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR)):
      written +=1
      aug_df.loc[len(aug_df)] = [aug_name, img_class]

  print(f"Written {written} augmented images total. Path: {save_folder}")

  return aug_df

# **Dataset**

In [None]:
class ImgDataset(Dataset):
  def __init__(self, img_df, img_path, transform=None, with_label=True):
    self.img_df = img_df
    self.img_path = img_path
    self.transform = transform
    self.with_label = with_label

  def __len__(self):
    return len(self.img_df)
    
  def __getitem__(self, index):
    path = os.path.join(self.img_path, self.img_df.iloc[index, 0])
    if self.with_label:
      label = int(self.img_df.iloc[index, 1])

    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    if self.transform:
          image = self.transform(image=image)['image']

    if self.with_label:
      return image, label
    else:
      return image, self.img_df.iloc[index, 0]
  

In [None]:
# TRAIN_PATH = "/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/train"
TEST_PATH = "/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/test"
# train_df = pd.read_csv("/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/train.csv")
test_df = pd.read_csv("/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/test.csv")
# print(train_df.shape)
# train_df.head()

In [None]:
# train_df['class'].value_counts()

In [None]:
# # UPSAMPLING
# upsampled_df = []
# max_count = 600
# for i in range(n_classes):
#   class_df = train_df[train_df['class'] == i]
#   if class_df.shape[0] < max_count:
#     sum = class_df.shape[0]
#     while sum < max_count:
#       samples = train_df[train_df['class'] == i].sample(150)
#       upsampled_df.append(samples)
#       sum+=samples.shape[0]
# upsampled_df.append(train_df)
# upsampled_df = pd.concat(upsampled_df, axis=0).reset_index().drop('index', axis=1)
# upsampled_df['class'].value_counts()

1.0    1586
3.0     969
4.0     797
6.0     743
5.0     664
7.0     644
0.0     621
2.0     616
Name: class, dtype: int64

In [None]:
# !mkdir augmented
# transforms = A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.20, rotate_limit=30, p=1)
# aug_df = augment_and_save(upsampled_df,750, TRAIN_PATH, "/content/augmented/",transforms)
# !cd augmented && ls | wc -l
# !cd /content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/ && mkdir upsampled
# !cp /content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/train/* /content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/upsampled
# !cp /content/augmented/* /content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/upsampled
# augmented_df = pd.concat([train_df, aug_df])
# augmented_df.shape

Written 3288 augmented images total. Path: /content/augmented/
3288


(8278, 2)

In [None]:
# augmented_df.to_csv('/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/upsampled.csv', index=False)

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/upsampled"

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/upsampled.csv')

In [None]:
n_classes = len(train_df['class'].unique())
n_classes

8

In [None]:
train_df['class'].value_counts()

1.0    1586
5.0    1178
6.0    1036
3.0     969
0.0     942
2.0     932
7.0     838
4.0     797
Name: class, dtype: int64

In [None]:
# UNDERSAMPLING
undersampled_df = []
max_count = 1000
for i in range(n_classes):
  if train_df[train_df['class'] == i].shape[0] > max_count:
    samples = train_df[train_df['class'] == i].sample(max_count)
    train_df = train_df.drop(train_df[train_df['class'] == i].index)
    undersampled_df.append(samples)
undersampled_df.append(train_df)
train_df = pd.concat(undersampled_df, axis=0)
train_df['class'].value_counts()

1.0    1000
5.0    1000
6.0    1000
3.0     969
0.0     942
2.0     932
7.0     838
4.0     797
Name: class, dtype: int64

In [None]:
geometric_transforms = A.OneOf(
    [A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.30, rotate_limit=15),
     A.GridDistortion(num_steps=5),
     A.Perspective()                        
                ], p=0.5)
    
color_transforms = A.OneOf([
    A.RandomBrightnessContrast(),
    A.ColorJitter(),
    A.Equalize(),
    A.HueSaturationValue(hue_shift_limit=7),
], p=0.33)

noise_transforms = A.OneOf([A.GaussNoise(var_limit=(10.0, 50.0)),
                              A.GaussianBlur(blur_limit=(1,5)),
                              A.Downscale(0.5,0.5),
                              A.ISONoise()
                              ], p = 0.33)

before_input_transforms = A.Compose([A.Resize(128,128),
                                      A.Normalize(), ToTensorV2()
                                    ], p=1)

train_transforms = A.Compose([A.HorizontalFlip(p=0.5),
                          geometric_transforms,
                          color_transforms,
                          noise_transforms,
                          before_input_transforms
                            ])

In [None]:
test_transforms = A.Compose([A.Resize(128,128), A.Normalize(), ToTensorV2()])

In [None]:
train_df2, val_df2 = train_test_split(train_df,test_size=0.15, random_state=SEED)

In [None]:
#TRAIN DATA
train_dataset = ImgDataset(train_df2, TRAIN_PATH, transform=train_transforms, with_label=True)
# sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, batch_size=70, shuffle=True, drop_last=True)

#VAL DATA
val_dataset = ImgDataset(val_df2, TRAIN_PATH, transform=test_transforms, with_label=True)
# sampler = RandomSampler(train_dataset)
val_dataloader = DataLoader(val_dataset, batch_size=70, shuffle=True, drop_last=True)

# **Models**

In [None]:
def get_resnet18(grad=False):
  model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad
      
  model.fc = nn.Linear(model.fc.in_features, n_classes)

  return model

In [None]:
# def get_resnet50(grad=False):
#   model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
#   for param in model.parameters():
#       param.requires_grad = grad

#   model.fc = nn.Linear(model.fc.in_features, n_classes)

#   return model

In [None]:
def get_resnet152(grad=False):
  model = models.resnet152(weights=ResNet152_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad

  model.fc = nn.Linear(model.fc.in_features, n_classes)

  return model

In [None]:
def get_wide_resnet101(grad=False):
  model = models.wide_resnet101_2(weights=Wide_ResNet101_2_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad
  model.fc = nn.Linear(model.fc.in_features, n_classes)

  return model

In [None]:
def get_efficientnet_b7(grad=False):
  model = models.efficientnet_b7(weights=EfficientNet_B7_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad
  model.classifier[1] = nn.Linear(model.classifier[1].in_features, n_classes)

  return model

In [None]:
def get_efficientnet_b7_mod(grad=False):
  model = models.efficientnet_b7(weights=EfficientNet_B7_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad
  model.classifier[1] = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.classifier[1].in_features, 64),nn.ReLU(),
                         nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))
  return model

In [None]:
# def get_densenet_201(grad=False):
#   model = models.densenet201(weights=DenseNet201_Weights.DEFAULT)
#   for param in model.parameters():
#       param.requires_grad = grad
#   model.classifier = nn.Linear(model.classifier.in_features, n_classes)

#   return model

In [None]:
def get_densenet161(grad=False):
  model = models.densenet161(weights=DenseNet161_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad
  model.classifier = nn.Linear(model.classifier.in_features, n_classes)

  return model

In [None]:
# IMG SIZE!!!
def get_vit_b16(grad=False):
  model = models.vit_b_16(weights=ViT_B_16_Weights.DEFAULT)
  for param in model.parameters():
      param.requires_grad = grad
  model.heads[0] = nn.Linear(model.heads[0].in_features, n_classes)

  return model

In [None]:
# def get_efficientnet_v2_s(grad=False):
#   model = models.efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)
#   for param in model.parameters():
#       param.requires_grad = grad
#   model.classifier[1] = nn.Linear(model.classifier[1].in_features, n_classes)

#   return model

In [None]:
def get_efficientnet_v2_m(grad=False):
  model = models.efficientnet_v2_m(weights=EfficientNet_V2_M_Weights.DEFAULT)
  for param in model.parameters():
    param.requires_grad = grad
  model.classifier[1] = nn.Linear(model.classifier[1].in_features, n_classes)

  return model

In [None]:
def get_efficientnet_v2_m_mod(grad=False):
  model = models.efficientnet_v2_m(weights=EfficientNet_V2_M_Weights.DEFAULT)
  for param in model.parameters():
    param.requires_grad = grad
  model.classifier[1] = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.classifier[1].in_features, 64),nn.ReLU(),
                         nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))

  return model

In [None]:
def get_efficientnet_v2_s_mod(grad=False):
  model = models.efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)
  for param in model.parameters():
    param.requires_grad = grad
  model.classifier[1] = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.classifier[1].in_features, 64),nn.ReLU(),
                         nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))

  return model

In [None]:
def get_swin_s(grad=False):
  model = models.swin_s(weights=models.Swin_S_Weights.DEFAULT)
  for param in model.parameters():
    param.requires_grad = False
  model.head = nn.Linear(model.head.in_features, n_classes)
  return model

In [None]:
# def get_swin_s_mod():
#   model = models.swin_s(weights=models.Swin_S_Weights.DEFAULT)
#   for param in model.parameters():
#     param.requires_grad = True
#   model.head = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.head.in_features, 64),nn.ReLU(),
#                          nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))
#   return model

In [None]:
# def get_efficientnet_v2_m_mod(freeze_param_n=0):
#   model = models.efficientnet_v2_m(weights=EfficientNet_V2_M_Weights.DEFAULT)
#   if freeze_param_n > 0:
#     ct = 0
#     for param in model.parameters():
#       ct += 1
#       if ct < freeze_param_n:
#         param.requires_grad = False

#   model.classifier[1] = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.classifier[1].in_features, 64),nn.ReLU(),
#                          nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))

#   return model

In [None]:
# def get_efficientnet_v2_s_mod(freeze_param_n=0):
#   model = models.efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)

  
#   if freeze_param_n > 0:
#     ct = 0
#     for param in model.parameters():
#       ct += 1
#       if ct < freeze_param_n:
#         param.requires_grad = False


#   model.classifier[1] = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.classifier[1].in_features, 64),nn.ReLU(),
#                          nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))

#   return model

In [None]:
def get_resnet18_mod( ):
  model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
  
  for param in model.parameters():
      param.requires_grad = True


  model.fc = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.fc.in_features, 64),nn.ReLU(),
                         nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))
  # unfreeze last layers
  # for param in model.layer4.parameters():
  #     param.requires_grad = True

  return model

In [None]:
# def get_resnet50_mod():
#   model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
#   for param in model.parameters():
#       param.requires_grad = True

#   model.fc = nn.Sequential(nn.Dropout(0.4),nn.Linear(model.fc.in_features, 64),nn.ReLU(),
#                          nn.BatchNorm1d(num_features=64),nn.Dropout(0.3), nn.Linear(64,n_classes))
#   return model

# **CROSS-VALIDATION**

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [None]:
cross_validation([get_efficientnet_v2_s_mod,  get_efficientnet_v2_m,\
                  get_efficientnet_v2_m_mod],\
                 train_df, n_splits=5, \
                 batch_size=50,grad=True, transforms=[train_transforms, test_transforms],\
                 train_path=TRAIN_PATH, \
                 n_epochs=10,metric=recall_score, device=device)

In [None]:
model = get_efficientnet_b7_mod(True)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-3)

best_model, best_loss, best_metric = train_model(model,loss_func=loss,\
                                            train_dataloader=train_dataloader, \
                                            val_dataloader=val_dataloader,\
                                            optimizer=optimizer, n_epochs=10,\
                                            device=device,metric=recall_score,\
                                            )

# **Predict and Submission** 

In [None]:
submission_df = pd.read_csv("/content/drive/MyDrive/hacksAI2022/Datasets/KarachaevoCherkessk/test.csv")
submission_df.head()

Unnamed: 0,ID_img,class
0,0.jpg,0
1,1.jpg,0
2,2.jpg,0
3,3.jpg,0
4,4.jpg,0


In [None]:
test_dataset = ImgDataset(submission_df, TEST_PATH, transform=test_transforms, with_label=False)
test_dataloader = DataLoader(test_dataset, batch_size=20)

In [None]:
pred_labels, test_img_names = predict(best_model, test_dataloader, device)

100%|██████████| 107/107 [00:10<00:00, 10.05it/s]


In [None]:
for name, label in zip(test_img_names, pred_labels):
  submission_df.loc[submission_df['ID_img'] == name , 'class'] = label

In [None]:
submission_df.head()

Unnamed: 0,ID_img,class
0,0.jpg,5
1,1.jpg,5
2,2.jpg,1
3,3.jpg,1
4,4.jpg,6


In [None]:
submission_df['class'].value_counts()

1    665
3    416
4    351
5    229
0    138
2    136
6    133
7     70
Name: class, dtype: int64

In [None]:
submission_df.to_csv('submit.csv', index=False)