In [1]:
!pip install -q evaluate

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
import os
import time
import math
from tifffile import tifffile as tif

import pandas as pd
import numpy as np

import torch
import torch.utils.data as data

import torchvision
from torchvision.transforms import v2
from torchvision.io import read_image

import evaluate
import transformers
from transformers import TrainingArguments, Trainer
from transformers.models.convnext.modeling_convnext import ConvNextForImageClassification
from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight

import matplotlib.pyplot as plt
import wandb


torch.manual_seed(0)

In [None]:
def feature_engineering(image):
    """
    Normalized Difference Vegetation Index: NDVI = (NIR - R) / (NIR + R)
    Normalized Difference Water Index: NDWI = (NIR - G) / (NIR + G)
    Normalized Difference Moisture Index: NDMI = (NIR - SWIR_1) / (NIR + SWIR_1)
    Enhanced Vegetation Index: EVI = 2.5 * (NIR - RED) / ((NIR + 6 * RED - 7.5 * BLUE) + 1)

    """
    endmi = ((image[..., 7] + image[..., 8]) - (image[..., 10] + image[..., 11])) / (image[..., 7] + image[..., 8] + image[..., 10] + image[..., 10] + image[..., 11] + 1e-10)
    ndvi = (image[..., 7] - image[..., 3]) / (image[..., 7] + image[..., 3] + 1e-10)
    ndmi = (image[..., 7] - image[..., 10]) / (image[..., 7] + image[..., 10] + 1e-10)
    ndwi = (image[..., 7] - image[..., 2]) / (image[..., 7] + image[..., 2] + 1e-10)
    image = np.concatenate(
        [
            image, # scale band data from -1 to 1
            np.expand_dims(endmi, axis=-1),
            np.expand_dims(ndvi, axis=-1),
            np.expand_dims(ndmi, axis=-1),
            np.expand_dims(ndwi, axis=-1),
        ],
        axis=-1,
    )
    return image

class MSCLF(data.Dataset):
    def __init__(self, cfg,train=True,transforms=None, msclf=None):
        self.data_path = cfg['data_path']
        self.csv_file = cfg['train_set']
        self.fe = cfg['feature_engineering']
        self.train_split = cfg['train_split']
        
        self.msclf = msclf
        self.transforms = transforms
        self.train = train
        self.band_scalers = torch.Tensor([ 
            1.2680000066757202,1.5047999620437622,                               
            1.5046600103378296,1.511856198310852,                               
            1.5050400495529175,1.5010639429092407,                               
            1.5003302097320557,1.5195592641830444,                               
            1.49590003490448,1.5194000005722046,                               
            1.1084500551223755,1.2992000579833984
        ]).unsqueeze(-1).unsqueeze(-1)
        
        if msclf is None:
            self.msclf = pd.read_csv(self.csv_file)
            if self.train_split != None:
                if self.train:
                    self.msclf = self.msclf[:math.ceil(len(self.msclf) * self.train_split)]
                else:
                    self.msclf = self.msclf[math.ceil(len(self.msclf) * self.train_split):]
                    self.msclf.index = [i for i in range(len(self.msclf))] 
                    
        labels_weight = compute_class_weight(class_weight="balanced",
                         classes=pd.unique(self.msclf.iloc[:,1]),
                         y=self.msclf.iloc[:,1])
        self.labels_weight = torch.tensor(
           [labels_weight[1],labels_weight[0]] # index 0 is positive sample
        )
    def __len__(self):
        return len(self.msclf)
    
    def set_transforms(self, transforms):
        self.transforms = transforms
        
    def __getitem__(self, idx):
        image, label = self.msclf.iloc[idx,0],self.msclf.iloc[idx,1]
        image = tif.imread(os.path.join(self.data_path,image))
        image *= 2 - 1
        if self.fe:
            image = feature_engineering(image)
        label = np.stack([label,1-label])
        image = torch.from_numpy(image)
        label = torch.from_numpy(np.array(label))
        label = label.to(torch.float32)
        image = torch.transpose(image,0,2)  
        if self.transforms:
            image, label = self.transforms(image,label)
        return {'pixel_values':image, 'label':label}



In [None]:
def get_ds(cfg):
    train_ds = MSCLF(cfg,train=True)
    val_ds = MSCLF(cfg,train=False)
    return train_ds,val_ds

def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    label_ids = np.argmax(eval_pred.label_ids, axis=1)
    return clf_metrics.compute(predictions=predictions, references=label_ids)

def collate_fn(examples):
#     print([example['label'] for example in examples])
    pixel_values = torch.stack([example['pixel_values'] for example in examples])
    labels = torch.stack([example['label'] for example in examples])
    return {'pixel_values':pixel_values, 'labels':labels}

class MSCLFTrainer(Trainer):
    def set_labels_weight(self, labels_weight):
        self.labels_weight = labels_weight
    
    def compute_loss(self, model, inputs, return_outputs=False):
        device = model.device if (
            isinstance(model == ConvNextForImageClassification)
        ) else model.module.device
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        probs = torch.nn.functional.softmax(logits,dim=1)
        loss_fct = torch.nn.CrossEntropyLoss(
            weight=self.labels_weight.to(device),
            label_smoothing=0.1
        )
        loss = loss_fct(probs.view(-1, self.model.config.num_labels), labels)
        return (loss, outputs) if return_outputs else loss


In [None]:

CFG = {
    'data_path':'/kaggle/input/finding-mining-sites/train/train',
    'train_set':'/kaggle/input/finding-mining-sites/train/answer.csv',
    'feature_engineering':True,
    'train_split':None,
    'input_size':(256,256),
    'model_type':'ConvNeXt-B',
}
convnext_type = {
    "ConvNeXt-T": {"hidden_sizes": (96, 192, 384, 768), "depths": (3, 3, 9, 3)},
    "ConvNeXt-S": {"hidden_sizes": (96, 192, 384, 768), "depths": (3, 3, 27, 3)},
    "ConvNeXt-B": {"hidden_sizes": (128, 256, 512, 1024), "depths": (3, 3, 27, 3)},
    "ConvNeXt-L": {"hidden_sizes": (192, 384, 768, 1536), "depths": (3, 3, 27, 3)},
    "ConvNeXt-XL":{"hidden_sizes": (256, 512, 1024, 2048), "depths": (3, 3, 27, 3)},
}


convnext_type = 

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)
df = pd.read_csv(CFG['train_set'])

train_transforms = v2.Compose([
    v2.RandomChoice([
        v2.RandomResizedCrop(CFG['input_size'],antialias=True),
        v2.Resize(CFG['input_size'],antialias=True),
        v2.RandomCrop(CFG['input_size']),
        v2.Compose([
            v2.RandomRotation(degrees=45),
            v2.CenterCrop(CFG['input_size']),
        ]),
    ]),
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(), 
])
val_transforms = v2.Compose([
        v2.Resize(CFG['input_size'],antialias=True),
])

clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])


for i, (train_index, val_index) in enumerate(skf.split(df.iloc[:,0],df.iloc[:,1].copy())):
    train_msclf, val_msclf = df.iloc[train_index,:], df.iloc[val_index,:] 
    train_ds, val_ds = MSCLF(CFG, msclf=train_msclf), MSCLF(CFG, msclf=val_msclf)

    train_ds.set_transforms(train_transforms)
    val_ds.set_transforms(val_transforms)


    model_config = transformers.ConvNextConfig(
        num_channels=16,
        num_labels=2,
        hidden_sizes=convnext_type[CFG['model_type']]['hidden_sizes'],
        depths=convnext_type[CFG['model_type']]['depths'],
    )

    model = transformers.ConvNextForImageClassification(model_config)

    training_args = TrainingArguments(
        f'msclf{i}',
        overwrite_output_dir=False,
        remove_unused_columns=False,
        evaluation_strategy = 'epoch',
        save_strategy = "epoch",
        learning_rate=9e-5,
        per_device_train_batch_size=700,
        per_device_eval_batch_size=300,
        gradient_accumulation_steps=4,
        auto_find_batch_size=True,
        num_train_epochs=1000,
        warmup_ratio=0.1,
        logging_strategy='epoch', 
        save_total_limit=2,
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        push_to_hub=False,
    )
    trainer = MSCLFTrainer(
        model,
        training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        compute_metrics=compute_metrics,
        data_collator=collate_fn,
    )
    trainer.train(resume_from_checkpoint=True)

