# Train image models Notebook

- Notebook modified from https://www.kaggle.com/code/markwijkhuizen/planttraits2024-eda-training-pub.
- Training only, EDA part not included.
- Image model only, tabular data not used.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import imageio.v3 as imageio
import albumentations as A

from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler

import torch
import timm
import glob
import torchmetrics
import time
import psutil
import os

tqdm.pandas()

In [2]:
class Config():
    IMAGE_SIZE = 196# 256 # 384
    BACKBONE = 'eva_large_patch14_196.in22k_ft_in22k_in1k'#'convnext_base.clip_laion2b_augreg_ft_in12k_in1k' # 'caformer_b36.sail_in22k_ft_in1k_384' # 'swin_large_patch4_window12_384.ms_in22k_ft_in1k'
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    N_TARGETS = len(TARGET_COLUMNS)
    TRAIN_MODEL = True
    BUILD_PKL_DATASET = False
    USE_SMALL_DATASET = False
    USE_MODIFIED_TRAIN = False
    EXTRACT_FEATURES = False
    BATCH_SIZE = 10
    LR_MAX = 1e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 6
    TRAIN_MODEL = True
    IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
        
CONFIG = Config()

# Load the dataset and clean it a bit th

In [3]:
if CONFIG.BUILD_PKL_DATASET is True:
    train = pd.read_csv('/kaggle/input/planttraits2024/train.csv')
    train['file_path'] = train['id'].apply(lambda s: f'/kaggle/input/planttraits2024/train_images/{s}.jpeg')
    train['jpeg_bytes'] = train['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())
    train.to_pickle('train.pkl')
    test = pd.read_csv('/kaggle/input/planttraits2024/test.csv')
    test['file_path'] = test['id'].apply(lambda s: f'/kaggle/input/planttraits2024/test_images/{s}.jpeg')
    test['jpeg_bytes'] = test['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())
    test.to_pickle('test.pkl')
else:
    if CONFIG.USE_SMALL_DATASET is True:
        train = pd.read_pickle('/kaggle/input/dataset-with-validation/small_train.pkl')       
    elif CONFIG.USE_MODIFIED_TRAIN is True:
        train = pd.read_pickle('/kaggle/input/dataset-with-validation/train_set.pkl')
    else:
         train = pd.read_pickle('/kaggle/input/baseline-model/train.pkl')
    test = pd.read_pickle('/kaggle/input/baseline-model/test.pkl')
    
for column in CONFIG.TARGET_COLUMNS:
    lower_quantile = train[column].quantile(0.005)
    upper_quantile = train[column].quantile(0.985)  
    train = train[(train[column] >= lower_quantile) & (train[column] <= upper_quantile)]

CONFIG.N_TRAIN_SAMPLES = len(train)
CONFIG.N_STEPS_PER_EPOCH = (CONFIG.N_TRAIN_SAMPLES // CONFIG.BATCH_SIZE)
CONFIG.N_STEPS = CONFIG.N_STEPS_PER_EPOCH * CONFIG.N_EPOCHS + 1

print('N_TRAIN_SAMPLES:', len(train), 'N_TEST_SAMPLES:', len(test))

N_TRAIN_SAMPLES: 49168 N_TEST_SAMPLES: 6545


In [4]:
LOG_FEATURES = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']

y_train = np.zeros_like(train[CONFIG.TARGET_COLUMNS], dtype=np.float32)
for target_idx, target in enumerate(CONFIG.TARGET_COLUMNS):
    v = train[target].values
    if target in LOG_FEATURES:
        v = np.log10(v)
    y_train[:, target_idx] = v

SCALER = StandardScaler()
y_train = SCALER.fit_transform(y_train)

# Create the transforms for the train and the test datasets and make the dataloaders

In [5]:
MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])

TRAIN_TRANSFORMS = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomSizedCrop(
            [448, 512],
            CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, w2h_ratio=1.0, p=0.75),
        A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.25),
        A.ImageCompression(quality_lower=85, quality_upper=100, p=0.25),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

TEST_TRANSFORMS = A.Compose([
        A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

class Dataset(Dataset):
    def __init__(self, X_jpeg_bytes, y, transforms=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.X_jpeg_bytes)

    def __getitem__(self, index):
        X_sample = self.transforms(
            image=imageio.imread(self.X_jpeg_bytes[index]),
        )['image']
        y_sample = self.y[index]
        
        return X_sample, y_sample

train_dataset = Dataset(
    train['jpeg_bytes'].values,
    y_train,
    TRAIN_TRANSFORMS,
)

train_dataloader = DataLoader(
        train_dataset,
        batch_size=CONFIG.BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=psutil.cpu_count(),
)

test_dataset = Dataset(
    test['jpeg_bytes'].values,
    test['id'].values,
    TEST_TRANSFORMS,
)

# Create a model to make the predictions based on a pretrained one

In [6]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            CONFIG.BACKBONE,
            num_classes=CONFIG.N_TARGETS,
            pretrained=True,
        )
        
    def forward(self, inputs):
        return self.backbone(inputs)

# A model where we freeze the first couple of layers

In [7]:
class FreezeModel(nn.Module):
    def __init__(self, freeze_till_stage=2):
        super().__init__()
        self.backbone = timm.create_model(
            CONFIG.BACKBONE,
            num_classes=0, 
            pretrained=True
        )
    
        # Freeze layers up 
        for name, param in self.backbone.named_parameters():
            # This assumes layer names include the stage they belong to
            if 'layers.' in name and int(name.split('.')[1]) < freeze_till_stage:
                param.requires_grad = False
        self.classifier = nn.Linear(self.backbone.num_features, CONFIG.N_TARGETS)

    def forward(self, inputs):
        x = self.backbone(inputs)
        return self.classifier(x)

# A model to exract features from the images based on a pretrained model

In [8]:
class FeatureExtractModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            CONFIG.BACKBONE,
            pretrained=True,
            num_classes=0,
#             global_pool='',
        )
    
    def forward(self, images):
        image_features = self.backbone(images)
        
        return image_features

In [9]:
model = FreezeModel() #Model() # FreezeModel()
model = model.to('cuda')
# print(model)

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

# Set upthe various parameters for training

In [10]:
def get_lr_scheduler(optimizer):
    return torch.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer,
        max_lr=CONFIG.LR_MAX,
        total_steps=CONFIG.N_STEPS,
        pct_start=0.1,
        anneal_strategy='cos',
        div_factor=1e1,
        final_div_factor=1e1,
    )

class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        self.avg = self.sum / self.count

MAE = torchmetrics.regression.MeanAbsoluteError().to('cuda')
R2 = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to('cuda')
LOSS = AverageMeter()

Y_MEAN = torch.tensor(y_train).mean(dim=0).to('cuda')
EPS = torch.tensor([1e-6]).to('cuda')

def r2_loss(y_pred, y_true):
    ss_res = torch.sum((y_true - y_pred)**2, dim=0)
    ss_total = torch.sum((y_true - Y_MEAN)**2, dim=0)
    ss_total = torch.maximum(ss_total, EPS)
    r2 = torch.mean(ss_res / ss_total)
    return r2

LOSS_FN = nn.SmoothL1Loss() # r2_loss

optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=CONFIG.LR_MAX,
    weight_decay=CONFIG.WEIGHT_DECAY,
)

LR_SCHEDULER = get_lr_scheduler(optimizer)

# Extract features from the images based on a model of choice

In [11]:
def extract_features(dataloader, model):
    model.eval()
    features_list = []
    
    with torch.no_grad():
        for step, (X_batch, _) in enumerate(dataloader):
            X_batch = X_batch.unsqueeze(0).to('cuda')
            # get the features and bring them back to the cpu so we can use them 
            # to make the dataset
            y_pred = model(X_batch).cpu()
            features_list.extend(y_pred)
    
    features_array = np.array(features_list)
    
    # Convert the features array into a DataFrame
    features_df = pd.DataFrame(features_array)
    
    features_df.columns = [f'feature_{i}' for i in range(features_array.shape[1])]
    
    return features_df

if CONFIG.EXTRACT_FEATURES:
    feature_extract_model = FeatureExtractModel()
    feature_extract_model = model.to('cuda')

    df = extract_features(train_dataloader, feature_extract_model)
    df.to_pickle("swin_features.pkl")
    df = extract_features(test_dataset, feature_extract_model)
    df.to_pickle("swin_features_test.pkl")

# Train the model

In [12]:
if CONFIG.TRAIN_MODEL is True:
    print("Start Training:")
    for epoch in range(CONFIG.N_EPOCHS):
        MAE.reset()
        R2.reset()
        LOSS.reset()
        model.train()

        for step, (X_batch, y_true) in enumerate(train_dataloader):
            X_batch = X_batch.to('cuda')
            y_true = y_true.to('cuda')
            t_start = time.perf_counter_ns()
            y_pred = model(X_batch)
            loss = LOSS_FN(y_pred, y_true)
            LOSS.update(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            LR_SCHEDULER.step()
            MAE.update(y_pred, y_true)
            R2.update(y_pred, y_true)

            if not CONFIG.IS_INTERACTIVE and (step+1) == CONFIG.N_STEPS_PER_EPOCH:
                print(
                    f'EPOCH {epoch+1:02d}, {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' + 
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                )
            elif CONFIG.IS_INTERACTIVE:
                print(
                    f'\rEPOCH {epoch+1:02d}, {step+1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' + 
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                    end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
                )

    torch.save(model, 'model.pth')

Start Training:
EPOCH 01, 4916/4916 | loss: 0.3018, mae: 0.6394, r2: 0.3140, step: 1.674s, lr: 9.87e-05
EPOCH 02, 4916/4916 | loss: 0.2804, mae: 0.6096, r2: 0.3684, step: 1.119s, lr: 8.45e-05
EPOCH 03, 4916/4916 | loss: 0.2389, mae: 0.5513, r2: 0.4696, step: 1.130s, lr: 5.91e-05
EPOCH 04, 4916/4916 | loss: 0.1819, mae: 0.4675, r2: 0.6052, step: 1.133s, lr: 3.09e-05
EPOCH 05, 4916/4916 | loss: 0.1231, mae: 0.3750, r2: 0.7400, step: 1.130s, lr: 9.14e-06
EPOCH 06, 4916/4916 | loss: 0.0835, mae: 0.3049, r2: 0.8271, step: 1.134s, lr: 1.00e-06


# Submit the results

In [13]:
SUBMISSION_ROWS = []
model.eval()

for X_sample_test, test_id in tqdm(test_dataset):
    with torch.no_grad():
        y_pred = model(X_sample_test.unsqueeze(0).to('cuda')).detach().cpu().numpy()
    
    y_pred = SCALER.inverse_transform(y_pred).squeeze()
    row = {'id': test_id}
    
    for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred):
        if k in LOG_FEATURES:
            row[k.replace('_mean', '')] = 10 ** v
        else:
            row[k.replace('_mean', '')] = v

    SUBMISSION_ROWS.append(row)
    
submission_df = pd.DataFrame(SUBMISSION_ROWS)
submission_df.to_csv('submission.csv', index=False)
print("Submit!")

  0%|          | 0/6545 [00:00<?, ?it/s]

Submit!
