In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/tgs-salt-identification-challenge/depths.csv
/kaggle/input/tgs-salt-identification-challenge/sample_submission.csv
/kaggle/input/tgs-salt-identification-challenge/train.zip
/kaggle/input/tgs-salt-identification-challenge/competition_data.zip
/kaggle/input/tgs-salt-identification-challenge/test.zip
/kaggle/input/tgs-salt-identification-challenge/train.csv
/kaggle/input/tgs-salt-identification-challenge/flamingo.zip


In [2]:
# ===========================================
# Step 8: Prediction and Submission File Generation (Using Overall Best Model)
# ===========================================

# --- Find the Overall Best Model ---
overall_best_iou = -1.0
best_config_name = None
for config_name, metrics in best_model_metrics.items():
    if metrics['best_val_iou'] > overall_best_iou:
        overall_best_iou = metrics['best_val_iou']
        best_config_name = config_name

print(f"\n--- Overall Best Model based on Validation IoU: {best_config_name} (IoU: {overall_best_iou:.4f}) ---")

# --- Run-Length Encoding Function ---
def rle_encode(img):
    pixels = img.flatten(order='F')
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

# --- Prediction Function ---
def predict_test(model, test_loader, device, threshold=0.5):
    model.eval()
    predictions = {}
    test_pbar = tqdm(test_loader, desc="Predicting", leave=False)
    with torch.no_grad():
        for images, image_ids in test_pbar:
            images = images.to(device)
            outputs = model(images)
            probs = torch.sigmoid(outputs)
            probs_resized = F.interpolate(probs, size=(101, 101), mode='bilinear', align_corners=False)
            preds_binary = (probs_resized > threshold).cpu().numpy().astype(np.uint8)
            for i, img_id in enumerate(image_ids):
                 pred_mask = preds_binary[i].squeeze()
                 predictions[img_id] = pred_mask
    return predictions

# --- Load the Overall Best Model ---
# Re-instantiate the model architecture for the best config
best_backbone = best_config_name.split('_')[0]
best_model = UNet(backbone_name=best_backbone, pretrained=False).to(DEVICE) # No need for pretrained weights now

best_model_path = f"models/best_model_{best_config_name}.pth"
if os.path.exists(best_model_path):
    print(f"Loading overall best model from: {best_model_path}")
    best_model.load_state_dict(torch.load(best_model_path, map_location=DEVICE))
else:
    print(f"Error: Best model path not found ({best_model_path}). Cannot generate submission.")
    # Handle error appropriately, maybe exit or use a default model if available

# --- Perform Prediction with Best Model ---
if os.path.exists(best_model_path):
    test_predictions = predict_test(best_model, test_loader, DEVICE, threshold=0.5) # Adjust threshold if needed

    # --- Generate Submission File ---
    submission_data = []
    for img_id in tqdm(test_ids, desc="Encoding"): # Use sorted test_ids
        if img_id in test_predictions:
            rle = rle_encode(test_predictions[img_id])
        else:
            rle = ''
            print(f"Warning: Prediction missing for image ID: {img_id}")
        submission_data.append({'id': img_id, 'rle_mask': rle})

    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv('submission.csv', index=False)

    print("\n--- Submission File Generated: submission.csv ---")
    print(submission_df.head())
else:
    print("Submission file not generated due to missing best model.")

NameError: name 'best_model_metrics' is not defined

In [3]:
print("\nZipping output files...")
if os.path.exists("models"):
    !zip -rq models.zip /kaggle/working/models # Use -q for quiet, -r for recursive
    print("models.zip created.")
if os.path.exists("results"):
    !zip -rq results.zip /kaggle/working/results
    print("results.zip created.")

print("\n--- Script Finished ---")


Zipping output files...

--- Script Finished ---
