Link Kaggle: https://www.kaggle.com/longhainguyen/nh-m-1-chuy-n-c-ng-ngh-submission

In [None]:
import glob
import os.path as osp

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pickle

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset

from torchvision import datasets, models
from torchvision.utils import make_grid

import os
from PIL import Image
from IPython.display import display
from tqdm import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings('ignore')

In [None]:
class Config:
    num_classes = 12
    img_size = 224
    batch_size = 64
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    min_lr = 10**-12
    max_lr = 10
    pretrained = False
    criterion = nn.CrossEntropyLoss()
    epochs = 30 

In [None]:
 label_dict = ({
    'encoded_label': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
    'labels': ['complex', 'frog_eye_leaf_spot', 'frog_eye_leaf_spot complex', \
                'healthy', 'powdery_mildew', 'powdery_mildew complex', 'rust', \
               'rust complex', 'rust frog_eye_leaf_spot', 'scab', \
               'scab frog_eye_leaf_spot', 'scab frog_eye_leaf_spot complex']
})
df_labels_idx = pd.DataFrame(label_dict, index=label_dict['encoded_label'])
display(df_labels_idx)

In [None]:
from sklearn.model_selection import train_test_split

def make_datapath_list():
    phase_path = "test_images"
        
    rootpath = "/kaggle/input/plant-pathology-2021-fgvc8/test_images/"
    
    target_path = osp.join(rootpath+"/*.jpg")
    path_list = []
    
    for path in glob.glob(target_path):
        path_list.append(path)

    return path_list

In [None]:
test_list = make_datapath_list()
print(f'The length of testing set: {len(test_list)}')

In [None]:
import albumentations as A
from albumentations import Compose
from albumentations.pytorch import ToTensorV2
import cv2

In [None]:
transform = Compose([
    A.Resize(Config.img_size, Config.img_size),
    A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ToTensorV2()
])

In [None]:
class PlantDataset(Dataset):
    """
    Class to create a Dataset
    
    Attributes
    ----------
    df_train : DataFrame
        DataFrame containing the image labels.
    file_list : list
        A list containing the paths to the images
    transform : object
        Instance of the preprocessing transform object
    """
    def __init__(self, file_list, transform=None):
        self.df_labels_idx = df_labels_idx
        self.file_list = file_list
        self.transform = transform
        
    def __len__(self):
        """
        Returns the number of images.
        """
        return len(self.file_list)
    
    def __getitem__(self, index):
        """
        Get data in Tensor format and labels of preprocessed images.
        """
        
        # Load the index number image.
        img_path = self.file_list[index]
        img = Image.open(img_path)
        
        # Preprocessing images
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_transformed = self.transform(image=img)
        
        # image name
        image_name = img_path[-20:]
        
        # Extract the labels
        label = -1
        
        return img_transformed, label, image_name

In [None]:
test_dataset = PlantDataset(test_list, transform=transform)

index = 0

print("\n【test dataset】")
print(f"img num : {test_dataset.__len__()}")
# print(f"img : {test_dataset.__getitem__(index)[0].size()}")
print(f"label : {test_dataset.__getitem__(index)[1]}")
print(f"image name : {test_dataset.__getitem__(index)[2]}")

In [None]:
test_dataloader = DataLoader(test_dataset, batch_size=Config.batch_size, shuffle=False)

In [None]:
for i, image_data in enumerate(test_dataloader):
    break
    
plt.figure(figsize=(20, 20))

im = make_grid(image_data[0]['image'], nrow=8)
plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))

In [None]:
Pkl_Filename = '../input/nh-m-1-chuy-n-c-ng-ngh-densenet/2 FCs, 0.0001 Lr, 30 Epochs.pkl'
with open(Pkl_Filename, 'rb') as file:  
    model = pickle.load(file)

In [None]:
class PlantPredictor():
    """
    Class for predicting labels from output results
    
    Attributes
    ----------
    df_labels_idx: DataFrame
        DataFrame that associates INDEX with a label name
    """
    
    def __init__(self, model, df_labels_idx,):
        self.model = model
        self.df_labels_idx = df_labels_idx
        self.df_submit = pd.DataFrame()
        
    
    def __predict_max(self, out):
        """
        Get the label name with the highest probability.
        
        Parameters
        ----------
        predicted_label_name: str
            Name of the label with the highest prediction probability
        """
        maxid = np.argmax(out.detach().numpy(), axis=1)
        df_predicted_label_name = self.df_labels_idx.iloc[maxid]
        return df_predicted_label_name
    
    def inference(self):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        df_pred_list = []
        for i, data in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
            image_name = data[2]
            self.model.to(device)
            inputs = data[0]['image']
            inputs = inputs.to(device)
            out = self.model(inputs)
            device = torch.device("cpu")
            out = out.to(device)
            df_pred = self.__predict_max(out).reset_index(drop=True)
            df_pred["image"] = image_name
            df_pred_list.append(df_pred)
            
        self.df_submit = pd.concat(df_pred_list, axis=0)
        self.df_submit = self.df_submit[["image", "labels"]].reset_index(drop=True)

In [None]:
predictor = PlantPredictor(model, df_labels_idx)
predictor.inference()

df_submit = predictor.df_submit.copy()

df_submit.to_csv('submission.csv', index=False)
df_submit