In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

from PIL import Image

import logging

logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Create Submission

From eval instructions:

> Your output should be a single file, submission.csv, with this run-length encoded information. This should have a header with two columns, Id and Predicted, and with one row for every directory under test/.


In [None]:
def rle(img, threshold=0.5):
    # TODO: Histogram of image to see where threshold should be
    flat_img = img.flatten()
    flat_img = np.where(flat_img > threshold, 1, 0).astype(np.uint8)
    starts = np.array((flat_img[:-1] == 0) & (flat_img[1:] == 1))
    ends = np.array((flat_img[:-1] == 1) & (flat_img[1:] == 0))
    starts_ix = np.where(starts)[0] + 2
    ends_ix = np.where(ends)[0] + 2
    lengths = ends_ix - starts_ix
    return starts_ix, lengths


def write_to_submission(
    test_dir: str = "data/test",
    image_label_filename: str = "mask.png",
    submission_filepath: str = "submission.csv",
):
    assert os.path.exists(test_dir), f"File {test_dir} does not exist"
    # Create submission file if it does not exist
    if not os.path.exists(submission_filepath):
        with open(submission_filepath, 'w') as f:
            # Write header
            f.write("Id,Predicted\n")
    # Walk through the test directory
    for subtest_name in os.listdir(test_dir):
        log.info(f"Writing submission for {subtest_name}")
        # Name of sub-directory inside test dir
        subtest_filepath = os.path.join(test_dir, subtest_name)
        # Get mask image path inside directory
        image_label_filepath = os.path.join(
            subtest_filepath, image_label_filename)
        assert os.path.exists(
            image_label_filepath), f"File {image_label_filepath} does not exist"
        inklabels = np.array(Image.open(image_label_filepath), dtype=np.uint8)
        starts_ix, lengths = rle(inklabels)
        inklabels_rle = " ".join(map(str, sum(zip(starts_ix, lengths), ())))
        with open(submission_filepath, 'a') as f:
            f.write(f"{subtest_name},{inklabels_rle}\n")

In [None]:
TEST_DIR = '/home/tren/dev/ashenvenus/data/test/'
# TEST_DIR = '/kaggle/input/vesuvius-challenge-ink-detection/test'

# Baseline is to use image mask to create guess submission
for subtest_name in os.listdir(TEST_DIR):
    # Name of sub-directory inside test dir
    subtest_filepath = os.path.join(TEST_DIR, subtest_name)
    # Get mask image path inside directory
    image_label_filepath = os.path.join(subtest_filepath, 'mask.png')
    assert os.path.exists(image_label_filepath), f"File {image_label_filepath} does not exist"
    mask = np.array(Image.open(image_label_filepath).convert('L'))
    # Binarize the mask by setting all non-zero pixels to 1
    mask = np.where(mask > 0, 1, 0)

    # Create a prediction image of the same size
    pred = np.zeros(mask.shape)

    # Generate a uniform distribution over 0 and 1 of the same size as the mask
    uniform_dist = np.random.rand(*mask.shape)

    # Multiply the uniform distribution by the mask
    pred = np.multiply(uniform_dist, mask)

    # Convert the prediction to an 8-bit grayscale image with values ranging from 0 to 255
    pred_8bit = (pred * 255).astype(np.uint8)

    # Save prediction image
    pred_img = Image.fromarray(pred_8bit, mode='L')
    pred_img.save(os.path.join(subtest_filepath, 'pred.png'))
        
write_to_submission(
    test_dir = TEST_DIR,
    image_label_filename = "pred.png",
)