**About** : This notebook is used to downscale images in the train and test set, in order to speed-up training and inference
  - Use the `FACTOR` parameter to specify the downscaling factor. We recommend generating data of downscaling 2 and 4.
  - For training data, we save extra time by also computing downscaling rles. Use the `NAME` parameter to specify which rle to downscale.
  - It is only require to save the downscaled images once, use the `SAVE_IMG` parameters to this extent.

In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Imports

In [1]:
import os
import gc
import cv2
import sys
import tifffile
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from collections import Counter
from matplotlib import pyplot as plt

sys.path.append("../code/")

In [2]:
from data.dataset import load_image
from utils.rle import *
from params import *

In [3]:
FACTOR = 2

### Train

In [40]:
out_dir = DATA_PATH + f"train_{FACTOR}/"
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [41]:
NAME = "_onlyfc"  # unhealthy class
# NAME = "_fix"  # healthy class with fixed issues
# NAME = ""  # original data
SAVE_IMG = False

df_masks = pd.read_csv(DATA_PATH + "train" + NAME + ".csv").set_index("id")

In [42]:
df_masks

Unnamed: 0_level_0,encoding
id,Unnamed: 1_level_1
2f6ecfcdf,296084587 4 296115835 6 296115859 14 296147109...
8242609fa,96909968 56 96941265 60 96972563 64 97003861 6...
aaa6a05cc,30989109 59 31007591 64 31026074 68 31044556 7...
cb2d976f4,78144363 5 78179297 15 78214231 25 78249165 35...
b9a3865fc,61271840 4 61303134 13 61334428 22 61365722 30...
b2dc8411c,56157731 21 56172571 45 56187411 51 56202252 5...
0486052bb,101676003 6 101701785 8 101727568 9 101753351 ...
e79de561c,7334642 14 7350821 41 7367001 67 7383180 82 73...
095bf7a1f,113277795 21 113315936 53 113354083 87 1133922...
54f2eec69,124967057 36 124997425 109 125027828 147 12505...


In [43]:
masks = {}

for index, encs in tqdm(df_masks.iterrows(), total=len(df_masks)):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH, index + ".tiff"))
    mask = enc2mask(encs, (img.shape[1], img.shape[0]))

    if SAVE_IMG:
        img = cv2.resize(
            img,
            (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
            interpolation=cv2.INTER_AREA,
        )
        tifffile.imsave(out_dir + f"{index}.tiff", img)

    mask = cv2.resize(
        mask,
        (mask.shape[1] // FACTOR, mask.shape[0] // FACTOR),
        interpolation=cv2.INTER_NEAREST,
    )
    
    rle = mask2enc(mask)
    
    masks[index] = rle
    
#     break

  0%|          | 0/15 [00:00<?, ?it/s]

In [44]:
df_masks = pd.DataFrame.from_dict(masks).T.reset_index().rename(columns={0: "encoding", "index": "id"})

df_masks.to_csv(f"{DATA_PATH}train_{FACTOR}{NAME}.csv", index=False)

print(f"Saved data to {DATA_PATH}train_{FACTOR}{NAME}.csv")

Saved data to ../input/train_2_fix.csv


### Test

In [49]:
out_dir = DATA_PATH + f"test_{FACTOR}/"

if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [50]:
df = pd.read_csv(DATA_PATH + "sample_submission.csv")

In [51]:
for index in tqdm(df['id']):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH_TEST, index + ".tiff"))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )
    
    tifffile.imsave(out_dir + f"{index}.tiff", img)

  0%|          | 0/5 [00:00<?, ?it/s]

In [52]:
for index in tqdm(EXTRA_IMGS):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH_TEST, index + ".tiff"))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )
    
#     tifffile.imsave(out_dir + f"{index}.tiff", img)

  0%|          | 0/2 [00:00<?, ?it/s]