In [1]:
###### This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import tifffile as tiff
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

# relative path to dataset, change according to need
DATASET_PATH = '/kaggle/input/cloud-masking-dataset/'

# Paths
data_path = DATASET_PATH + 'content/train/data'
mask_path = DATASET_PATH + 'content/train/masks'

# Filenames
image_files = sorted([f for f in os.listdir(data_path) if f.endswith('.tif')])
mask_files = sorted([f for f in os.listdir(mask_path) if f.endswith('.tif')])
image_mask_pairs = list(zip(image_files, mask_files))

# Load existing mislabeled flags if available
csv_path = 'mislabeled_flags.csv'
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path)
    mislabeled_status = {row['filename']: row['mislabeled'] for _, row in df.iterrows()}
    print("✅ Loaded existing mislabeled_flags.csv")
else:
    mislabeled_status = {filename: 'no' for filename in image_files}

# Widgets
index_slider = widgets.IntSlider(min=0, max=len(image_mask_pairs)-1, step=1, description='Index')
status_label = widgets.Label()
toggle_button = widgets.Button(description="Toggle Mislabeled", button_style='')
export_button = widgets.Button(description="Export CSV", button_style='success')
output = widgets.Output()

def normalize_rgb(img):
    img = img.astype('float32')
    normalize_band = lambda band: (band - band.min() + 1e-5) / (band.max() - band.min() + 1e-5)
    for i in range(3):
        img[..., i] = normalize_band(img[..., i])
    return img[..., :3]

def update_display(i):
    output.clear_output(wait=True)
    with output:
        img_name, mask_name = image_mask_pairs[i]
        image = normalize_rgb(tiff.imread(os.path.join(data_path, img_name)))
        ir = tiff.imread(os.path.join(data_path, img_name))[..., 3]
        if ir.dtype != 'uint8':
            ir = ir.astype('float32')
            ir = (ir - ir.min() + 1e-5) / (ir.max() - ir.min() + 1e-5)
        mean_image = np.mean(image, axis=2)
        if mean_image.dtype != 'uint8':
            mean_image = mean_image.astype('float32')
            mean_image = (mean_image - mean_image.min() + 1e-5) / (mean_image.max() - mean_image.min() + 1e-5)
        diff = ir - mean_image
        diff_mask = np.zeros(diff.shape, np.dtype('float32'))
        idx = np.all([ir > 0.4, abs(diff) < 0.3], axis=0)
        diff_mask[idx] = 1.0
        mask = tiff.imread(os.path.join(mask_path, mask_name))
        fig, axes = plt.subplots(1, 3, figsize=(12, 6))
        axes[0].imshow(image)
        axes[0].set_title(f'Image (RGB): {img_name}')
        axes[0].axis('off')

        axes[1].imshow(mask, cmap='gray')
        axes[1].set_title(f'Mask: {mask_name}')
        axes[1].axis('off')
        
        axes[2].imshow(diff_mask, cmap='gray')
        axes[2].set_title(f'Diff Mask: {img_name}')
        axes[2].axis('off')


        plt.show()

        # Update label
        label = mislabeled_status.get(img_name, 'no')
        status_label.value = f"Mislabeled: {label.upper()}"

def on_toggle_clicked(_):
    i = index_slider.value
    filename = image_mask_pairs[i][0]
    current = mislabeled_status.get(filename, 'no')
    mislabeled_status[filename] = 'no' if current == 'yes' else 'yes'
    update_display(i)

def on_export_clicked(_):
    df = pd.DataFrame([
        {'id': i, 'filename': fname, 'mislabeled': mislabeled_status.get(fname, 'no')}
        for i, fname in enumerate(image_files)
    ])
    df.to_csv(csv_path, index=False)
    with output:
        print("✅ CSV file 'mislabeled_flags.csv' has been saved.")

def on_slider_change(change):
    update_display(change['new'])

# Event connections
index_slider.observe(on_slider_change, names='value')
toggle_button.on_click(on_toggle_clicked)
export_button.on_click(on_export_clicked)

# Display everything
ui = widgets.VBox([index_slider, toggle_button, status_label, export_button, output])
display(ui)

# Initial display
update_display(0)


✅ Loaded existing mislabeled_flags.csv


VBox(children=(IntSlider(value=0, description='Index', max=10572), Button(description='Toggle Mislabeled', sty…