In [None]:
import os
import sys
import cv2
import numpy as np
import pandas as pd
import random
from glob import glob
import torch.nn as nn
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import segmentation_models_pytorch as smp
%matplotlib inline
import pickle
from pickle import load
import torch
from torch.utils.data import Dataset, DataLoader
from torch.amp import autocast, GradScaler

In [None]:
dataset_root = r'D:\datasets\ecti2021'
train_dir = os.path.join(dataset_root, 'train/train')
test_dir = os.path.join(dataset_root, 'val_without_ref_labels/val')

n_train_regions = len(glob(os.path.join(train_dir, '*')))
n_test_regions  = len(glob(os.path.join(test_dir, '*')))

print('Number of training temporal-regions: {}'.format(n_train_regions))
print('Number of test temporal-regions: {}'.format(n_test_regions))


In [None]:
def get_filename(filepath,split_symbol='\\'):
    return filepath.split(split_symbol)[-1]

def read_csv(csvpath):
    path_list = np.loadtxt(csvpath, delimiter=" ", dtype=str).tolist()
    return [os.path.basename(p) for p in path_list]


In [None]:
def visualize(df_row, figsize=[25, 15]):
    vv_image_path = df_row['vv_image_path']
    vh_image_path = df_row['vh_image_path']
    flood_label_path = df_row['flood_label_path']
    water_body_label_path = df_row['water_body_label_path']

    rgb_name = get_filename(vv_image_path)
    vv_image = cv2.imread(vv_image_path, 0) / 255.0
    vh_image = cv2.imread(vh_image_path, 0) / 255.0
    rgb_image = s1_to_rgb(vv_image, vh_image)

    water_body_label_image = cv2.imread(water_body_label_path, 0) / 255.0

    plt.figure(figsize=tuple(figsize))
    if df_row.isnull().sum() > 0:
        plt.subplot(1,2,1)
        plt.imshow(rgb_image)
        plt.title(rgb_name)

        plt.subplot(1,2,2)
        plt.imshow(water_body_label_image)
        plt.title('Water body mask')
    else:
        flood_label_image = cv2.imread(flood_label_path, 0) / 255.0
        plt.subplot(1,3,1)
        plt.imshow(rgb_image)
        plt.title(rgb_name)

        plt.subplot(1,3,2)
        plt.imshow(flood_label_image)
        plt.title('Flood mask')

        plt.subplot(1,3,3)
        plt.imshow(water_body_label_image)
        plt.title('Water body mask')

def s1_to_rgb(vv_image, vh_image):
    eps=1e-06
    ratio_image = np.clip(np.nan_to_num(vv_image/(vh_image+eps), 0), 0, 1) # outside [0,1] will be clipped
    rgb_image = np.stack((vv_image, vh_image, ratio_image), axis=2) #different from lab01: np.abs(red) / np.abs(green) 
    return rgb_image

def visualize_result(df_row, prediction, figsize=[25, 15]):
    vv_image = cv2.imread(df_row['vv_image_path'], 0) / 255.0
    vh_image = cv2.imread(df_row['vh_image_path'], 0) / 255.0
    rgb_input = s1_to_rgb(vv_image, vh_image)

    plt.figure(figsize=tuple(figsize))
    plt.subplot(1,2,1)
    plt.imshow(rgb_input)
    plt.title('RGB w/ result')
    plt.subplot(1,2,2)
    plt.imshow(prediction)
    plt.title('Result')

In [None]:
water_image_names = read_csv(r'D:\datasets/ecti2021\water_tiles.csv')     

background_image_names = read_csv(r"D:\datasets\ecti2021\background_tiles.csv")

region_name_dates0 = ['_'.join(os.path.basename(n).split('_')[:2]) for n in water_image_names]
region_name_dates1 = ['_'.join(os.path.basename(n).split('_')[:2]) for n in background_image_names]

vv_image_paths, vh_image_paths, flood_label_paths, water_body_label_paths = [], [], [], []

water_image_paths,background_image_paths = [],[]

for i in range(len(water_image_names)):
    vv_image_path = os.path.join(train_dir, region_name_dates0[i], 'tiles', 'vv', water_image_names[i])
    vv_image_paths.append(vv_image_path)
    water_image_paths.append(vv_image_path)
    
    vh_image_name = water_image_names[i].replace('vv', 'vh')
    vh_image_path = os.path.join(train_dir, region_name_dates0[i], 'tiles', 'vh', vh_image_name)
    vh_image_paths.append(vh_image_path)

    flood_image_name = water_image_names[i].replace('_vv', '')
    flood_label_path = os.path.join(train_dir, region_name_dates0[i], 'tiles', 'flood_label', flood_image_name)
    flood_label_paths.append(flood_label_path)

    water_body_label_name = water_image_names[i].replace('_vv', '')
    water_body_label_path = os.path.join(train_dir, region_name_dates0[i], 'tiles', 'water_body_label', water_body_label_name)
    water_body_label_paths.append(water_body_label_path)
    
for i in range(len(background_image_names)):
    vv_image_path = os.path.join(train_dir, region_name_dates1[i], 'tiles', 'vv', background_image_names[i])
    vv_image_paths.append(vv_image_path)
    background_image_paths.append(vv_image_path)
    
    vh_image_name = background_image_names[i].replace('vv', 'vh')
    vh_image_path = os.path.join(train_dir, region_name_dates1[i], 'tiles', 'vh', vh_image_name)
    vh_image_paths.append(vh_image_path)

    flood_image_name = background_image_names[i].replace('_vv', '')
    flood_label_path = os.path.join(train_dir, region_name_dates1[i], 'tiles', 'flood_label', flood_image_name)
    flood_label_paths.append(flood_label_path)

    water_body_label_name = background_image_names[i].replace('_vv', '')
    water_body_label_path = os.path.join(train_dir, region_name_dates1[i], 'tiles', 'water_body_label', water_body_label_name)
    water_body_label_paths.append(water_body_label_path)

print(os.path.exists(vv_image_paths[0]))
print(water_image_names[0])


In [None]:
n = len(vv_image_paths)
indices = np.arange(n)
np.random.shuffle(indices)

train_end = int(0.7 * n)
valid_end = int(0.85 * n)

train_idx = indices[:train_end]
valid_idx = indices[train_end:valid_end]
test_idx  = indices[valid_end:]
print("Number of tiles in training set:",train_idx.size)
print("Number of tiles in validation set:",valid_idx.size)
print("Number of tiles in test set:",test_idx.size)
print("Number of tiles in the training and validation set:",train_idx.size+valid_idx.size+test_idx.size) 

In [None]:
vv_image_paths_train = list(np.array(vv_image_paths)[train_idx])
vh_image_paths_train = list(np.array(vh_image_paths)[train_idx])
flood_label_paths_train = list(np.array(flood_label_paths)[train_idx])
water_body_label_paths_train = list(np.array(water_body_label_paths)[train_idx])

train_paths = {'vv_image_path': vv_image_paths_train,
        'vh_image_path': vh_image_paths_train,
        'flood_label_path': flood_label_paths_train,
        'water_body_label_path': water_body_label_paths_train,
}

train_df = pd.DataFrame(train_paths)

print(train_df.shape)

In [None]:

pd.set_option('max_colwidth',200)
train_df.head()

In [None]:
vv_image_paths_valid = list(np.array(vv_image_paths)[valid_idx])
vh_image_paths_valid = list(np.array(vh_image_paths)[valid_idx])
flood_label_paths_valid = list(np.array(flood_label_paths)[valid_idx])
water_body_label_paths_valid = list(np.array(water_body_label_paths)[valid_idx])

valid_paths = {'vv_image_path': vv_image_paths_valid,
        'vh_image_path': vh_image_paths_valid,
        'flood_label_path': flood_label_paths_valid,
        'water_body_label_path': water_body_label_paths_valid,
}


valid_df = pd.DataFrame(valid_paths)

print(valid_df.shape)
valid_df.head()

In [None]:
test_df = pd.DataFrame({
    'vv_image_path': np.array(vv_image_paths)[test_idx],
    'vh_image_path': np.array(vh_image_paths)[test_idx],
    'flood_label_path': np.array(flood_label_paths)[test_idx],
    'water_body_label_path': np.array(water_body_label_paths)[test_idx],
})
print(test_df.shape)
test_df.head()

In [None]:
background_image_paths_train = [path for path in background_image_paths if path in vv_image_paths_train]
background_num_train = len(background_image_paths_train)
print('Number of background tiles included in training:',background_num_train)

water_image_paths_train = [path for path in water_image_paths if path in vv_image_paths_train]
water_image_names_train = [get_filename(pth) for pth in water_image_paths_train]
region_name_dates2 = ['_'.join(n.split('_')[:2]) for n in water_image_names_train]
water_num_train = len(water_image_paths_train)
print('Number of water tiles included in training:',water_num_train)

In [None]:
num_samples = water_num_train
arr = np.arange(int(water_num_train)) 
np.random.shuffle(arr) 
background_image_paths_train_undersampled = list(np.array(background_image_paths_train)[arr[0:num_samples]])
background_image_names_train_undersampled = [get_filename(pth) for pth in background_image_paths_train_undersampled]
print('Number of background tiles included in training after undersampling:',len(background_image_names_train_undersampled))
region_name_dates3 = ['_'.join(n.split('_')[:2]) for n in background_image_names_train_undersampled]

vh_image_paths_train_undersampled, flood_label_paths_train_undersampled, water_body_label_paths_train_undersampled = [], [], []
for i in range(len(water_image_names_train)):
    vh_image_name = water_image_names_train[i].replace('vv', 'vh')
    vh_image_path = os.path.join(train_dir, region_name_dates2[i], 'tiles', 'vh', vh_image_name)
    vh_image_paths_train_undersampled.append(vh_image_path)

    flood_image_name = water_image_names_train[i].replace('_vv', '')
    flood_label_path = os.path.join(train_dir, region_name_dates2[i], 'tiles', 'flood_label', flood_image_name)
    flood_label_paths_train_undersampled.append(flood_label_path)

    water_body_label_name = water_image_names_train[i].replace('_vv', '')
    water_body_label_path = os.path.join(train_dir, region_name_dates2[i], 'tiles', 'water_body_label', water_body_label_name)
    water_body_label_paths_train_undersampled.append(water_body_label_path)

vv_image_paths_train_undersampled = water_image_paths_train
print('Number of water body label included in training after undersampling:',len(water_body_label_paths_train_undersampled))
for i in range(len(background_image_names_train_undersampled)):
    vv_image_paths_train_undersampled.append(background_image_paths_train_undersampled[i])
    
    vh_image_name = background_image_names_train_undersampled[i].replace('vv', 'vh')
    vh_image_path = os.path.join(train_dir, region_name_dates3[i], 'tiles', 'vh', vh_image_name)
    vh_image_paths_train_undersampled.append(vh_image_path)

    flood_image_name = background_image_names_train_undersampled[i].replace('_vv', '')
    flood_label_path = os.path.join(train_dir, region_name_dates3[i], 'tiles', 'flood_label', flood_image_name)
    flood_label_paths_train_undersampled.append(flood_label_path)

    water_body_label_name = background_image_names_train_undersampled[i].replace('_vv', '')
    water_body_label_path = os.path.join(train_dir, region_name_dates3[i], 'tiles', 'water_body_label', water_body_label_name)
    water_body_label_paths_train_undersampled.append(water_body_label_path)
assert len(vv_image_paths_train_undersampled)==len(vh_image_paths_train_undersampled)==len(flood_label_paths_train_undersampled)==len(water_body_label_paths_train_undersampled)
print('Number of overall images  included in training after undersampling:',len(water_body_label_paths_train_undersampled))

In [None]:
train_paths_undersample = {'vv_image_path': vv_image_paths_train_undersampled,
        'vh_image_path': vh_image_paths_train_undersampled,
        'flood_label_path': flood_label_paths_train_undersampled,
        'water_body_label_path': water_body_label_paths_train_undersampled
}
train_df_undersample = pd.DataFrame(train_paths_undersample)


MAX_TRAIN_SAMPLES = 1000  # or even 500 for POC

train_df_undersample = train_df_undersample.sample(
    n=min(MAX_TRAIN_SAMPLES, len(train_df_undersample)),
    random_state=42
).reset_index(drop=True)




print(train_df_undersample.shape)
train_df_undersample.head() 

In [None]:
missing_vv = train_df_undersample['vv_image_path'].isnull().sum()
missing_vh = train_df_undersample['vh_image_path'].isnull().sum()
print(f"Missing Values for VV: {missing_vv}, VH: {missing_vh}")

In [None]:
region_counts = train_df_undersample['vv_image_path'].apply(lambda path: os.path.basename(path).split('_')[0]).value_counts(normalize=True)
region_counts.sort_values(ascending=False)

In [None]:
sample_indices = random.sample(range(len(train_df_undersample)), 3)

for idx in sample_indices:
    sample_row = train_df_undersample.iloc[idx]
    print(f"Visualizing sample index: {idx}")
    visualize(sample_row)
    plt.show()

In [None]:
class ETCIDataset(Dataset):
    def __init__(self, dataframe, split, transform=None):
        self.split = split
        self.dataset = dataframe
        self.transform = transform

    def __len__(self):
        return self.dataset.shape[0]


    def __getitem__(self, index):
        example = {}
        
        df_row = self.dataset.iloc[index]

        vv_image = cv2.imread(df_row['vv_image_path'], 0) / 255.0
        vh_image = cv2.imread(df_row['vh_image_path'], 0) / 255.0
        
        rgb_image = s1_to_rgb(vv_image, vh_image)

        if self.split == 'test':
            example['image'] = rgb_image.transpose((2,0,1)).astype('float32')  #HWC->CHW
        else:
            flood_mask = cv2.imread(df_row['flood_label_path'], 0) / 255.0

            
            if self.transform:
                augmented = self.transform(image=rgb_image, mask=flood_mask)
                rgb_image = augmented['image']
                flood_mask = augmented['mask']

            example['image'] = rgb_image.transpose((2,0,1)).astype('float32') #HWC->CHW
            example['mask'] = flood_mask.astype('int64')

        return example

In [None]:
import albumentations as A

train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
])

val_transform = A.Compose([A.Resize(height=192, width=192)])

train_dataset = ETCIDataset(train_df, split='train', transform=train_transform)
valid_dataset = ETCIDataset(valid_df, split='valid', transform=val_transform)  
test_dataset  = ETCIDataset(test_df,  split='test',  transform=None)

print('Trainining set size:',len(train_dataset))
print('Validation set size:',len(valid_dataset))
print('Test set size:',len(test_dataset))

In [None]:
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)

In [None]:
train_undersampled_dataset = ETCIDataset(train_df_undersample, split='train', transform=train_transform)
train_undersampled_loader = DataLoader(train_undersampled_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
print('Undersampled Trainining set size:',len(train_undersampled_dataset))

In [None]:
device = 'cuda'


def create_model():
    model = smp.Unet(
    encoder_name="resnet34",        
    encoder_weights="imagenet",         
    in_channels=3,                  
    classes=2,                      
    )

    return model

In [None]:
from sklearn.metrics import confusion_matrix

class EvalTracker:
    def __init__(self, n_classes=2, smooth=0.0001):
        self.n_classes = n_classes
        self.reset()
        self.smooth = smooth

    def reset(self):
        self.cm = np.zeros((self.n_classes, self.n_classes))
        self.count = 0
    
    def update(self, pred, target):
        self.count += pred.shape[0]

        pred = pred.argmax(dim=1).flatten()  # [B*H*W]
        target = target.flatten()  # [B*H*W]

        pred = pred.detach().cpu().numpy()
        target = target.detach().cpu().numpy()

        self.cm += confusion_matrix(target, pred)

    def get_mean(self):
        tn, fp, fn, tp = self.cm.ravel()

        iou = tp / (tp + fp + fn + self.smooth)
        prec = tp / (tp + fp + self.smooth)
        rec = tp / (tp + fn + self.smooth)
        f1 = 2.0*prec*rec/(prec+rec)

        return iou, prec, rec, f1

In [None]:
best_val_loss = float('inf')
patience = 5          
counter = 0           
min_delta = 1e-4      
epochs = 5
learning_rate = 1e-4

In [None]:
model_2 = create_model()
model_2 = model_2.to(device).to(memory_format=torch.channels_last)

optimizer = torch.optim.AdamW(
    model_2.parameters(),
    lr=learning_rate,
    weight_decay=1e-4
)

criteria_no_weights = nn.CrossEntropyLoss()
scaler = GradScaler()

In [None]:
train_loss_dict_2 = {}
val_loss_dict_2 = {}

for epoch in range(epochs):
    print('Epoch: [{}/{}]'.format(epoch+1, epochs))

    pbar = tqdm(train_undersampled_loader)
    train_loss = 0.0
    model_2.train()
    eval_logger = EvalTracker()

    for batch_idx, batch in enumerate(pbar):
        image = batch['image'].to(device, non_blocking=True)\
                              .to(memory_format=torch.channels_last)
        mask = batch['mask'].to(device, non_blocking=True)
        
        optimizer.zero_grad(set_to_none=True)
        
        with autocast(device_type='cuda'):
            pred = model_2(image)
            loss = criteria_no_weights(pred, mask)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        # ch·ªâ update metric m·ªói 10 batch (r·∫•t quan tr·ªçng)
        if batch_idx % 10 == 0:
            eval_logger.update(pred, mask)
            mIoU, Prec, Rec, f1 = eval_logger.get_mean()
            pbar.set_description(
                f'Loss: {loss.item():.4f} | mIoU {mIoU:.4f} | F1 {f1:.4f}'
            )
        
        train_loss += loss.detach() * image.size(0)
        
    train_loss /= len(train_undersampled_loader.dataset)
    train_loss_dict_2[epoch] = train_loss.item()
    
    pbar = tqdm(valid_loader)
    model_2.eval()
    eval_logger = EvalTracker()
    val_loss = 0.0
    
    with torch.no_grad(), autocast(device_type='cuda'):
        for batch in pbar:
            image = batch['image'].to(device, non_blocking=True)\
                                  .to(memory_format=torch.channels_last)
            mask = batch['mask'].to(device, non_blocking=True).long()
    
            pred = model_2(image)
            loss = criteria_no_weights(pred, mask)
    
            val_loss += loss.detach() * image.size(0)
            eval_logger.update(pred, mask)
    
    val_loss /= len(valid_loader.dataset)
    val_loss_dict_2[epoch] = val_loss.item()
    
    mIoU, Prec, Rec, f1 = eval_logger.get_mean()
    
    print(
        f'[VAL] Epoch {epoch+1} | Loss: {val_loss:.4f} | mIoU {mIoU:.4f} | F1 {f1:.4f}'
    )
    if val_loss < best_val_loss - min_delta:
        best_val_loss = val_loss
        counter = 0
    
        # l∆∞u model t·ªët nh·∫•t
        torch.save(model_2.state_dict(), 'best_model_2d_BCE.pt')
        print('‚úÖ Validation loss improved ‚Üí save best model')
    
    else:
        counter += 1
        print(f'‚è∏ No improvement: {counter}/{patience}')
    
    if counter >= patience:
        print('üõë Early stopping triggered ‚Üí model has converged')
        break
with open('./train_loss_2d_BCE.pkl', 'wb') as f:
    pickle.dump(train_loss_dict_2, f)

with open('./val_loss_2d_BCE.pkl', 'wb') as f:
    pickle.dump(val_loss_dict_2, f)

# save model
torch.save(model_2.state_dict(), 'model_2d_BCE.pt')

In [None]:
print(torch.cuda.is_available())
print(next(model_2.parameters()).device)


In [None]:
model_test = create_model()
model_test.load_state_dict(
    torch.load('model_2d_BCE.pt', map_location=device)
)
model_test = model_test.to(device)
model_test.eval()

print("Final trained model loaded for inference")



In [None]:
with torch.no_grad():
    logits = model_test(image)
    probs = torch.softmax(logits, dim=1)

print(
    probs[0, 1].min().item(),
    probs[0, 1].max().item(),
    probs[0, 1].mean().item()
)


In [None]:
import random

# pick 3 random validation samples
sample_indices = random.sample(range(len(valid_df)), 3)

with torch.no_grad():
    for idx in sample_indices:
        row = valid_df.iloc[idx]

        vv = cv2.imread(row['vv_image_path'], 0) / 255.0
        vh = cv2.imread(row['vh_image_path'], 0) / 255.0
        rgb = s1_to_rgb(vv, vh)

        image = torch.tensor(
            rgb.transpose(2,0,1),
            dtype=torch.float32
        ).unsqueeze(0).to(device)

        with autocast(device_type='cuda'):
            pred = model_test(image)
            pred_mask = torch.argmax(pred, dim=1).squeeze().cpu().numpy()

        print(f" Visualizing validation sample {idx}")
        visualize_result(row, pred_mask)
        plt.show()


In [None]:
batch = next(iter(train_undersampled_loader))
print(batch['image'].shape, batch['mask'].shape)
