In [None]:
try:
    from google.colab import drive
    drive.mount("/content/drive")
    %cd /content/drive/MyDrive/Colab\ Notebooks/kaggle
    from setup_colab import setup_colab_for_kaggle
    setup_colab_for_kaggle(check_env=False, local_working=True)
except:
    print("Not in Colab")

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/kaggle
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Content of Drive Kaggle data dir (/content/drive/MyDrive/kaggle): ['/content/drive/MyDrive/kaggle/input', '/content/drive/MyDrive/kaggle/working', '/content/drive/MyDrive/kaggle/.ipynb_checkpoints', '/content/drive/MyDrive/kaggle/output']
Content of Kaggle data dir (/kaggle): ['/kaggle/input', '/kaggle/output', '/kaggle/working']
Content of Kaggle data subdir (/kaggle/input): ['/kaggle/input/cassava-model', '/kaggle/input/cassava-leaf-disease-classification', '/kaggle/input/googlebitemperedloss', '/kaggle/input/vbdyolo', '/kaggle/input/.ipynb_checkpoints', '/kaggle/input/vinbigdata', '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection']
Content of Kaggle data subdir (/kaggle/output): ['/kaggle/output/vbdyolo_out_1_300epochs', '/kaggle/output/vbdyolo_out', '/kaggle/output/.ipynb_c

In [3]:
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm import tqdm

INPUT_FOLDER = Path("/kaggle/input/vinbigdata-chest-xray-abnormalities-detection")
INPUT_FOLDER_ORIGINAL_PNG = Path("/kaggle/input/vinbigdata-chest-xray-original-png")
INPUT_FOLDER_YOLO_OUT = Path("/kaggle/output/vbdyolo_out")
WORK_FOLDER = Path("/kaggle/working")

In [23]:
def read_prediction_labels(filename: Path, image_w: int, image_h: int):
    if not filename.exists():
        return "14 1 0 0 1 1"

    labels: pd.DataFrame = pd.read_csv(filename, delimiter=" ", header=None)
    labels.columns = ["class_id", "x_centre", "y_centre", "bw", "bh", "conf"]

    # Convert YOLO format (x_centre, y_centre, bw, bh) to competition format (x_min, y_min, x_max, y_max)
    labels["x_min"] = labels["x_centre"] - labels["bw"] / 2
    labels["y_min"] = labels["y_centre"] - labels["bh"] / 2
    labels["x_max"] = labels["x_centre"] + labels["bw"] / 2
    labels["y_max"] = labels["y_centre"] + labels["bh"] / 2
    labels = labels.drop(columns=["x_centre", "y_centre", "bw", "bh"])
    # After dropping, conf column should become the second one.
    assert(labels.columns.to_list() == ["class_id", "conf", "x_min", "y_min", "x_max", "y_max"])

    # Scale coordinates to image's size. Clip to make sure it's not out of bounds of the image.
    labels[["x_min", "x_max"]] = (labels[["x_min", "x_max"]] * image_w).round().astype(np.int32).clip(0, image_w - 1)
    labels[["y_min", "y_max"]] = (labels[["y_min", "y_max"]] * image_h).round().astype(np.int32).clip(0, image_h - 1)

    # Convert all rows to one prediction string
    return " ".join(labels.to_string(header=False, index=False).split())

In [28]:
from IPython.display import clear_output

results_df = pd.DataFrame(columns=["image_id", "PredictionString"])

test_metadata = pd.read_csv(INPUT_FOLDER_ORIGINAL_PNG / "test_meta.csv")
test_metadata = test_metadata.set_index("image_id").to_dict("index")

for image_id, image_dims in tqdm(test_metadata.items(), total=len(test_metadata)):
    prediction_str = read_prediction_labels(
        INPUT_FOLDER_YOLO_OUT / "labels_pred" / f"{image_id}.txt", image_dims["dim0"], image_dims["dim1"]
    )
    results_df = results_df.append({"image_id": image_id, "PredictionString": prediction_str}, ignore_index=True)

clear_output()

results_df.to_csv(WORK_FOLDER / "submission.csv", index=False)
display(results_df.sample(10))

Unnamed: 0,image_id,PredictionString
1492,82cb7ac2b8ad46f2be437a640d6744f5,0 0.787598 1313 858 1795 1309
1309,735ea124180d108f676db6adb5b1ba98,3 0.914551 1302 1500 2592 2178
2669,e413f0b40162af48d87bece11e6812b4,14 1 0 0 1 1
2181,bb5ffb34f5baa01ec1e47df249046609,0 0.77002 1425 825 1706 1102
166,0e580dda54b8d1df7e3a888c41e69704,14 1 0 0 1 1
1559,880331cec567a33bdfa422b8cae7f77c,5 0.685059 1441 480 2130 1829 3 0.802246 987 1...
1356,777e5578741dea8a02a689fe907a4215,0 0.658691 1272 669 1679 1070 3 0.665039 902 1...
174,0eca5bbff12b841c18d231fc8cda0eeb,14 1 0 0 1 1
729,42a70465f4b5859407233bb00aa64e7a,14 1 0 0 1 1
1130,6581602a6339fe446da34c3bbc628dc8,0 0.387451 1264 585 1532 902


In [29]:
!mkdir -p /kaggle/output/vbdsubmit
!cp {WORK_FOLDER}/submission.csv /kaggle/output/vbdsubmit