# Do I need to use `cv2.cvtColor(..., cv2.COLOR_BGR2RGB)`?
* __Reason:__
    * the 3rd place solution's code uses it:
    ```
    def preprocess_image(image_names, run_root=DATA_ROOT, out_root=OUTPUT_DIR, size=SIZE):
        for i in tqdm(range(len(image_names))):
            image_name = image_names[i]
            path = run_root+image_name
            img = cv2.imread(path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            H, W, C = img.shape
            new_H = int(SIZE)
            new_W = int(W/H*SIZE)
            img = cv2.resize(img, (new_W, new_H))
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            cv2.imwrite(OUTPUT_DIR + image_name, img)
    ```
    * Mine looks like this:
    ```
    def convert_images(filename, arch_out, file_type, out_shape=(640, 320)):
    """
    Reads an image and converts it to a desired file format
    """
    img = np.array(cv2.imread(filename))

    img = cv2.resize(img, out_shape)
    output = cv2.imencode(file_type, img)[1]
    name = f"{Path(filename).stem}{file_type}"
    arch_out.writestr(name, output)
    ```

The experiment here is to test the tangible differences between my pipeline and the 3rd place one. Both are:
* resized to (384, 576) ((576, 384) for cv2)
* saved as .jpg files in `dset_dir/output`

In [4]:
import os
import cv2

dset_dir = r"C:\Users\jchen\Desktop\Datasets\Understanding Clouds"
ex_img_fname = "test_image.jpg"

def preprocess_3rd_place(img_name, in_dir, out_dir, resize_size=(576, 384)):
    img = cv2.imread(os.path.join(in_dir, img_name))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, resize_size)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    return img
    
def preprocess_mine(img_name, in_dir, out_dir, resize_size=(576, 384)):
    img = cv2.imread(os.path.join(in_dir, img_name))
    img = cv2.resize(img, resize_size)
    return img

fn_kwargs = {
    "img_name": ex_img_fname,
    "in_dir": dset_dir,
    "out_dir": os.path.join(dset_dir, "output"),
    "resize_size": (576, 384),
}

In [6]:
import numpy as np
img_3rd = np.array(preprocess_3rd_place(**fn_kwargs))
img_mine = np.array(preprocess_mine(**fn_kwargs))
img_3rd.shape, img_mine.shape

((384, 576, 3), (384, 576, 3))

In [7]:
np.array_equal(img_3rd, img_mine)

True

There is no difference. Maybe resolution is a reason for the difference in performance?

# Testing Mask Creation (3rd Place v. Mine)

# Comparing 3rd Place Pipeline with Mine