In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torchvision import datasets, models, transforms
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import cv2
import timm

import time
import os
import copy
from tqdm import tqdm
import random
from madgrad import MADGRAD
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

In [17]:
device = torch.device('cuda')

In [19]:
train = pd.read_csv("train.csv")

In [22]:
# image path

In [20]:
train

Unnamed: 0,image,label
0,images/0.jpg,maclura_pomifera
1,images/1.jpg,maclura_pomifera
2,images/2.jpg,maclura_pomifera
3,images/3.jpg,maclura_pomifera
4,images/4.jpg,maclura_pomifera
...,...,...
18348,images/18348.jpg,aesculus_glabra
18349,images/18349.jpg,liquidambar_styraciflua
18350,images/18350.jpg,cedrus_libani
18351,images/18351.jpg,prunus_pensylvanica


In [23]:
species_name_list = sorted(set(train['label']))  # get label types set


In [25]:
species_name_list = sorted(set(train['label']))  # get label types set
num_class = len(species_name_list)
species_to_num = dict(zip(species_name_list, range(num_class)))  # label to num 
num_to_species = {value : key for key, value in species_to_num.items()} # reverse

In [69]:
class CFG:
    model_dir = "C:\\Users\\87985\\Downloads\\Compressed\\classify-leaves\\model\\"
    debug=False
    print_freq=100
    num_workers=4
    model_name='inception_resnet_v2'
    size=299
    epochs=50 # not to exceed 9h
    batch_size=32
    learning_rate=1e-4
    weight_decay=1e-9
    scheduler='ReduceLROnPlateau' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    factor=0.2 # ReduceLROnPlateau
    patience=4 # ReduceLROnPlateau
    eps=1e-6 # ReduceLROnPlateau
    T_max=20 # CosineAnnealingLR
    #T_0=4 # CosineAnnealingWarmRestarts
    min_lr=1e-6 # ['CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    seed=42 #[42,2021]
    n_fold=5
    train=True

In [33]:
def init_logger(log_file="./train.log"):
    from logging import  getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__) # 
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

In [34]:
LOGGER = init_logger()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [36]:
folds = train.copy()
Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds['label'])):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
print(folds.groupby(['fold']).size())


# StratifiedKFold 和 KFold的区别在于  需要输入label,保证分层的不同类别占比与原始样本保持一致

fold
0    3671
1    3671
2    3671
3    3670
4    3670
dtype: int64


In [37]:
folds

Unnamed: 0,image,label,fold
0,images/0.jpg,maclura_pomifera,3
1,images/1.jpg,maclura_pomifera,0
2,images/2.jpg,maclura_pomifera,2
3,images/3.jpg,maclura_pomifera,2
4,images/4.jpg,maclura_pomifera,0
...,...,...,...
18348,images/18348.jpg,aesculus_glabra,0
18349,images/18349.jpg,liquidambar_styraciflua,3
18350,images/18350.jpg,cedrus_libani,4
18351,images/18351.jpg,prunus_pensylvanica,1


In [40]:
image = cv2.imread("images/0.jpg")

In [51]:
# ====================================================
# Data transforms
# ====================================================
# I applied slightly different transforms for different groups of models
# resnet50d, efficientnet_b3: flip only
# resnext50_32x4d, resnest50d tf_efficientnet_b4_ns, resnest200e, mixnet_s: add ColorJitter
# inception_resnet_v2, vit_base_patch16_224, tf_efficientnet_b3_ns: use RandomResizedCrop instead of Resize, and use [0.5, 0.5, 0.5] as mean and std
data_transforms = {
    # 融合各个transforms 
    'train': transforms.Compose([  
        transforms.ToTensor(),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0),
        transforms.RandomResizedCrop([CFG.size, CFG.size]),
        #transforms.Resize([CFG.size, CFG.size]),
        transforms.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5],

        ),
        #transforms.RandomErasing(),
    ]),
    'valid': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize([CFG.size, CFG.size]),
        transforms.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5],
        ),
    ]),
}

In [None]:
# 对图像做不同的变化

In [52]:
# ====================================================
# Dataset
# ====================================================
# note: here I made a small mistake. I did not notice the cv2.imread generate BGR image until the last week when I went through other's codes, so most of my models were trained using BGR image
# I think using RGB image may make the training converge faster when we start from imagenet pre-trained model 
class TrainDataset(Dataset):
    def __init__(self, df, species_to_num, transform=None):
        super().__init__()
        self.df = df
        self.species_to_num = species_to_num
        self.file_paths = get_image_file_path(df['image'].values)
        self.labels = df['label'].values
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        image = cv2.imread(file_path)
#         image = Image.open(file_path)
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        label = self.species_to_num[label]
        return image, label
    

class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        super().__init__()
        self.df = df
        self.file_paths = get_image_file_path(df['image'].values)
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        image = cv2.imread(file_path)
#         image = Image.open(file_path)
        if self.transform:
            image = self.transform(image)
        return image

In [53]:
def get_scheduler(optimizer):
    if CFG.scheduler=='ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
    elif CFG.scheduler=='CosineAnnealingLR':
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
    elif CFG.scheduler=='CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
    return scheduler

In [64]:
# ====================================================
# training 
# ====================================================
def train_model(fold):
    since = time.time()
    model_path = './models/' + CFG.model_name + '_fold' + str(fold) + '_best.pth'
    LOGGER.info(f"============================== fold: {fold} result ==============================")
    # ====================================================
    # Data Loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds['label'].values
    train_dataset = TrainDataset(train_folds, species_to_num, transform=data_transforms['train'])
    valid_dataset = TrainDataset(valid_folds, species_to_num, transform=data_transforms['valid'])

    train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, 
                              shuffle=True, num_workers=CFG.num_workers, 
                              pin_memory=True, drop_last=True,)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size, 
                              shuffle=False, num_workers=CFG.num_workers,
                              pin_memory=True, drop_last=False)
    
    # ====================================================
    # model, optimizer, scheduler & loss function
    # ====================================================   
    model = timm.create_model(CFG.model_name, pretrained=True, num_classes=num_class)
    # use the following model function if you want to fine-tune the last layer only, which is not what I did here. But I explored it during the competition.
    #model = leave_classifier(CFG.model_name, num_class)
    model = model.to(device)
    
    #optimizer = Adam(model.parameters(), lr=CFG.learning_rate, weight_decay=CFG.weight_decay, amsgrad=False)
    optimizer = MADGRAD(model.parameters(), lr=CFG.learning_rate, weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(optimizer)
    
    criterion = nn.CrossEntropyLoss()
    # ====================================================
    # loop
    # ====================================================  
    
    best_acc = 0.95 # do not save the model if the acc is less than 0.95

    for epoch in range(CFG.epochs):
        
        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()
        # These are used to record information in training.
        train_loss = []
        train_accs = []

        global_step = 0
        # Iterate the training set by batches.
        for step, (imgs, labels) in enumerate(train_loader):
            # A batch consists of image data and corresponding labels.
            imgs = imgs.to(device)
            labels = labels.to(device)
            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs)
            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels)

            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()
            # Compute the gradients for parameters.
            loss.backward()
            # Update the parameters with computed gradients.
            optimizer.step()

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels).float().mean()

            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_accs.append(acc)

        # The average loss and accuracy of the training set is the average of the recorded values.
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)


        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()
        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []

        # Iterate the validation set by batches.
        for step, (imgs, labels) in enumerate(valid_loader):
            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))

            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_accs.append(acc)

        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)
        
        
        # learning rate update
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(valid_acc)
        else:
            scheduler.step()
        
        elapsed = time.time() - since
        
        # Print the information.
        LOGGER.info(f'Epoch {epoch+1}/{CFG.epochs}: train_loss = {train_loss:.4f}, valid_loss = {valid_loss:.4f}, train_acc = {train_acc:.4f}, valid_acc = {valid_acc:.4f}, time: {elapsed:.0f}s')
        #print(f'learning_rate = {scheduler.optimizer.param_groups[0]['lr']}')
        # if the model improves, save a checkpoint at this epoch
        
        if valid_acc > best_acc:
            best_acc = valid_acc
            torch.save(model.state_dict(), model_path)
            LOGGER.info(f'Save Best Score: {best_acc:.4f} Model to {model_path}')
            #print('saving model with acc {:.3f}'.format(best_acc))

In [72]:
def get_image_file_path(image_path):
    #INPUT_DIR = '../input/classify-leaves/'
    return image_path

In [73]:
def cv_prob(fold):
    test_dataset = TestDataset(folds[folds['fold']==fold], transform=data_transforms['valid'])
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    model_path = CFG.model_dir + CFG.model_name + '_fold' + str(fold) + '_best.pth'
    model = timm.create_model(CFG.model_name, pretrained=False, num_classes=num_class)
    model = model.to(device)
 #   model.load_state_dict(torch.load(model_path))
    model.eval()
    
    prob_list = []

    # Iterate the testing set by batches.
    for batch in tqdm(test_loader):
        imgs = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
            prob_list.append(logits.softmax(1))
    probs_np = torch.cat(prob_list, axis=0).to('cpu').numpy()
    return probs_np

In [75]:
test_dataset = TestDataset(folds[folds['fold']==fold], transform=data_transforms['valid'])
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
model_path = CFG.model_dir + CFG.model_name + '_fold' + str(fold) + '_best.pth'
model = timm.create_model(CFG.model_name, pretrained=False, num_classes=num_class)
model = model.to(device)


In [78]:
model = timm.create_model(CFG.model_name, pretrained=False, num_classes=num_class)


In [76]:
model.load_state_dict(torch.load(model_path))
model.eval()

prob_list = []

# Iterate the testing set by batches.
for batch in tqdm(test_loader):
    imgs = batch
    with torch.no_grad():
        logits = model(imgs.to(device))
        prob_list.append(logits.softmax(1))
probs_np = torch.cat(prob_list, axis=0).to('cpu').numpy()

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\87985\\Downloads\\Compressed\\classify-leaves\\model\\inception_resnet_v2_fold0_best.pth'

In [74]:
for fold in range(CFG.n_fold):
    probs_np = cv_prob(fold)
    folds.loc[(folds['fold']==fold),CFG.model_name] = np.argmax(probs_np,axis=1)
    if(fold==0):
        probs_np_copy = probs_np
    else:
        probs_np_copy = np.concatenate((probs_np_copy, probs_np), axis=0)
    print(probs_np_copy.shape)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\87985\\Downloads\\Compressed\\classify-leaves\\model\\inception_resnet_v2_fold0_best.pth'

In [68]:
pwd

'C:\\Users\\87985\\Downloads\\Compressed\\classify-leaves'