In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/flood-area-segmentation/metadata.csv
/kaggle/input/flood-area-segmentation/Mask/1017.png
/kaggle/input/flood-area-segmentation/Mask/2015.png
/kaggle/input/flood-area-segmentation/Mask/1084.png
/kaggle/input/flood-area-segmentation/Mask/2021.png
/kaggle/input/flood-area-segmentation/Mask/1012.png
/kaggle/input/flood-area-segmentation/Mask/48.png
/kaggle/input/flood-area-segmentation/Mask/3012.png
/kaggle/input/flood-area-segmentation/Mask/1037.png
/kaggle/input/flood-area-segmentation/Mask/2009.png
/kaggle/input/flood-area-segmentation/Mask/1023.png
/kaggle/input/flood-area-segmentation/Mask/3070.png
/kaggle/input/flood-area-segmentation/Mask/37.png
/kaggle/input/flood-area-segmentation/Mask/1048.png
/kaggle/input/flood-area-segmentation/Mask/3068.png
/kaggle/input/flood-area-segmentation/Mask/1000.png
/kaggle/input/flood-area-segmentation/Mask/35.png
/kaggle/input/flood-area-segmentation/Mask/2030.png
/kaggle/input/flood-area-segmentation/Mask/1081.png
/kaggle/input/flood

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("faizalkarim/flood-area-segmentation")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/flood-area-segmentation


In [3]:
import pandas as pd
import numpy as np
import cv2
import random
import os

# --- CRITICAL ASSUMPTIONS & CONFIGURATION ---

# 1. PIXEL-TO-AREA CONVERSION: 
# ASSUMPTION: 1 pixel represents 10 square meters (m^2).
# **USER ACTION: REPLACE 10.0 WITH YOUR ACTUAL VALUE.**
PIXEL_TO_SQ_METER_FACTOR = 10.0

# 2. FILE NAMES 
METADATA_FILE = '/kaggle/input/flood-area-segmentation/metadata.csv'
IMAGE_DIR = '/kaggle/input/flood-area-segmentation/Image/'  # Subdirectory for images (used for path context, not loading)
MASK_DIR = '/kaggle/input/flood-area-segmentation/Mask/'    # Subdirectory for masks

# 3. SIMULATED INFERENCE TIME RANGE (in milliseconds)
MIN_INFERENCE_MS = 150
MAX_INFERENCE_MS = 350
# ---------------------------------------------

In [4]:


def calculate_flooded_area(mask_path):
    """
    Calculates the total flooded area using the mask and the conversion factor.
    """
    try:
        # Load the mask image in grayscale
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            raise FileNotFoundError(f"Mask not found at {mask_path}")

        # Count the number of water pixels (assuming non-zero/white pixels are water)
        # Ensure the mask is treated as an 8-bit image for accurate counting
        water_pixel_count = np.sum(mask > 0)
        
        # Calculate the area in square meters and convert to square kilometers
        flooded_area_sq_m = water_pixel_count * PIXEL_TO_SQ_METER_FACTOR
        flooded_area_sq_km = flooded_area_sq_m / 1_000_000 
        
        return flooded_area_sq_km
    except Exception as e:
        print(f"Error processing mask {mask_path}: {e}")
        return np.nan

def simulate_iou(mask_path):
    """
    Simulates the Jaccard Index (IoU) by comparing the ground truth mask 
    to a slightly degraded (simulated) prediction mask.
    
    IoU = Intersection / Union
    """
    try:
        mask_gt = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask_gt is None:
            raise FileNotFoundError(f"Mask not found at {mask_path}")

        # Ensure mask is binary (0 or 1) for IoU calculation
        mask_gt = (mask_gt > 0).astype(np.uint8)

        # --- SIMULATION LOGIC: Create a slightly "imperfect" prediction ---
        mask_pred = mask_gt.copy()
        
        # Simulate small, random errors (1% error rate is used here)
        num_pixels = mask_gt.size
        error_rate = 0.01 
        num_errors = int(num_pixels * error_rate)
        
        # Get random indices to flip (toggle water/non-water classification)
        random_indices = np.random.choice(num_pixels, num_errors, replace=False)
        
        # Flatten and toggle the values at random indices
        mask_pred_flat = mask_pred.flatten()
        mask_pred_flat[random_indices] = 1 - mask_pred_flat[random_indices]
        mask_pred = mask_pred_flat.reshape(mask_gt.shape)
        
        # --- IoU CALCULATION ---
        intersection = np.sum(np.logical_and(mask_gt, mask_pred))
        union = np.sum(np.logical_or(mask_gt, mask_pred))

        # Calculate IoU score
        iou_score = intersection / union if union != 0 else 0.0
        
        return iou_score
        
    except Exception as e:
        print(f"Error simulating IoU for {mask_path}: {e}")
        return np.nan

def simulate_inference_time():
    """
    Simulates the Model Inference Time within the defined range.
    """
    # Randomly assign a time between the min and max
    return random.uniform(MIN_INFERENCE_MS, MAX_INFERENCE_MS)

def generate_kpi_data():
    """
    Main function to process metadata, calculate KPIs, and return a DataFrame.
    """
    print(f"--- Starting KPI Calculation (Factor: {PIXEL_TO_SQ_METER_FACTOR} m^2/pixel) ---")
    
    if not os.path.exists(METADATA_FILE):
        print(f"Error: {METADATA_FILE} not found.")
        print("Please ensure the file is in the current directory. Exiting.")
        return pd.DataFrame()
    
    df_metadata = pd.read_csv(METADATA_FILE)

    
    results = []
    
    # Check if required columns exist (only image_id and mask_id are needed now)
    required_cols = ['Image', 'Mask']
    if not all(col in df_metadata.columns for col in required_cols):
        print(f"Error: {METADATA_FILE} must contain columns: {required_cols}. Exiting.")
        return pd.DataFrame()

    
    for index, row in df_metadata.iterrows():
        # Construct the full path to the mask file
        mask_filename = row['Mask']
        mask_path = os.path.join(MASK_DIR, mask_filename) # Assumes MASK_DIR exists
        
        # Calculate/Simulate the 3 KPIs
        area_sq_km = calculate_flooded_area(mask_path)
        iou = simulate_iou(mask_path)
        inference_ms = simulate_inference_time()
        
        # Store results
        result = {
            'image_id': row['Image'],
            'mask_id': row['Mask'],
            'flooded_area_sq_km': area_sq_km,
            'jaccard_index_iou': iou,
            'inference_time_ms': inference_ms,
        }
        results.append(result)

    df_results = pd.DataFrame(results)
    return df_results



In [5]:
# --- EXECUTION ---

# Create the output DataFrame
df_kpis = generate_kpi_data()

# Clean up (remove rows where calculation failed)
df_kpis = df_kpis.dropna()

--- Starting KPI Calculation (Factor: 10.0 m^2/pixel) ---


In [6]:

# Check for the key performance metrics (Most/Least Efficient Time)
if not df_kpis.empty:
    most_efficient_time = df_kpis['inference_time_ms'].min()
    least_efficient_time = df_kpis['inference_time_ms'].max()
    print("\n--- Summary ---")
    print(f"Total Rows Processed: {len(df_kpis)}")
    print(f"Most Efficient Inference Time (min): {most_efficient_time:.2f} ms")
    print(f"Least Efficient Inference Time (max): {least_efficient_time:.2f} ms")

    # Export the final clean CSV for the dashboard mock-up
    OUTPUT_FILE = 'kpi_results.csv'
    df_kpis.to_csv(OUTPUT_FILE, index=False)
    print(f"\nSUCCESS: Data saved to {OUTPUT_FILE} for dashboard use.")
else:
    print("\nFAILURE: No data was generated. Check file paths and data structure.")


--- Summary ---
Total Rows Processed: 290
Most Efficient Inference Time (min): 150.20 ms
Least Efficient Inference Time (max): 349.48 ms

SUCCESS: Data saved to kpi_results.csv for dashboard use.


In [7]:
print(f'Average Flooded Area : {df_kpis["flooded_area_sq_km"].mean():,.2f} sq km')
print(f'Model Accuracy = {df_kpis["jaccard_index_iou"].mean():,.2f}%')
print(f'Average Inference Time = {df_kpis["inference_time_ms"].mean():,.2f} ms')

Average Flooded Area : 6.30 sq km
Model Accuracy = 0.97%
Average Inference Time = 245.80 ms


image 

In [8]:
import cv2
import numpy as np
import os

# --- CONFIGURATION ---

# Directories (Must match Phase 1 paths)
IMAGE_DIR = '/kaggle/input/flood-area-segmentation/Image/'
MASK_DIR = '/kaggle/input/flood-area-segmentation/Mask/'

# Output directory for the final visual assets
OUTPUT_VISUALS_DIR = 'dashboard_visuals/'

# Color for the flood overlay (BGR format for OpenCV)
# Bright Cyan/Blue for high visibility: (B=255, G=255, R=0)
OVERLAY_COLOR = (255, 255, 0) 

# Transparency level (Alpha channel): 0.0 (fully transparent) to 1.0 (fully opaque)
ALPHA = 0.5 

# --- IMAGE SELECTION (USER MUST SPECIFY) ---
# List the image_id and mask_id pairs you chose for the dashboard
SELECTED_IMAGES = [
    # Image 1 (High Flood Example)
    {'image_id': '0.jpg', 'mask_id': '0.png'},
    # Image 2 (Complex Boundary Example)
    {'image_id': '1.jpg', 'mask_id': '1.png'},
    # Image 3 (Low Flood Example)
    {'image_id': '10.jpg', 'mask_id': '10.png'},
    # Add more as needed (e.g., Image 4, 5)
]
# ---------------------------------------------

def generate_overlay(image_filename, mask_filename):
    """
    Loads an image and its mask, creates a transparent colored overlay 
    of the segmented area, and saves the final visual asset.
    """
    try:
        # 1. Load Original Image (Color)
        img_path = os.path.join(IMAGE_DIR, image_filename)
        img_original = cv2.imread(img_path)
        if img_original is None:
            raise FileNotFoundError(f"Original image not found: {img_path}")

        # 2. Load Mask (Grayscale)
        mask_path = os.path.join(MASK_DIR, mask_filename)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            raise FileNotFoundError(f"Mask not found: {mask_path}")

        # Ensure mask and image have the same dimensions (crucial step)
        if img_original.shape[:2] != mask.shape[:2]:
            print(f"Warning: Mask and Image dimensions mismatch for {image_filename}. Skipping.")
            return

        # 3. Prepare the Overlay Layer
        # Create a blank overlay image with the same dimensions
        overlay = img_original.copy()
        
        # Find where the water is (where mask is non-zero)
        # Convert mask to 3 channels so it can be applied to the color image
        water_mask_3ch = cv2.merge([mask, mask, mask]) 
        
        # Color the overlay layer where the mask is active
        # Use np.where to apply the OVERLAY_COLOR only where the mask is > 0
        overlay = np.where(water_mask_3ch > 0, OVERLAY_COLOR, overlay)
        
        # 4. Blend the Original Image and the Colored Overlay
        # Formula: blended = alpha * overlay + (1 - alpha) * img_original
        blended_img = cv2.addWeighted(img_original, 1 - ALPHA, 
                                      overlay.astype(img_original.dtype), ALPHA, 0)

        # 5. Save the Result
        output_filename = f"overlay_{image_filename}"
        output_path = os.path.join(OUTPUT_VISUALS_DIR, output_filename)
        cv2.imwrite(output_path, blended_img)
        print(f"Successfully created: {output_filename}")

    except Exception as e:
        print(f"An error occurred during processing {image_filename}: {e}")

def main():
    # 1. Create output directory if it doesn't exist
    if not os.path.exists(OUTPUT_VISUALS_DIR):
        os.makedirs(OUTPUT_VISUALS_DIR)
        print(f"Created output directory: {OUTPUT_VISUALS_DIR}")
        
    # 2. Process all selected images
    for item in SELECTED_IMAGES:
        generate_overlay(item['image_id'], item['mask_id'])
        
    print("\nPhase 2 Complete: Visual assets are ready for dashboard embedding.")

if __name__ == "__main__":
    main()

Created output directory: dashboard_visuals/
Successfully created: overlay_0.jpg
Successfully created: overlay_1.jpg
Successfully created: overlay_10.jpg

Phase 2 Complete: Visual assets are ready for dashboard embedding.
