In [1]:
!pip install tqdm



# Download Kaggle dataset

In [2]:
!git clone https://github.com/SIH-22-Kyogre/EyeSea_Image-Preprocessing-Algorithms.git


Cloning into 'EyeSea_Image-Preprocessing-Algorithms'...
remote: Enumerating objects: 960, done.[K
remote: Counting objects: 100% (960/960), done.[K
remote: Compressing objects: 100% (568/568), done.[K
remote: Total 960 (delta 535), reused 776 (delta 375), pack-reused 0 (from 0)[K
Receiving objects: 100% (960/960), 9.51 MiB | 9.51 MiB/s, done.
Resolving deltas: 100% (535/535), done.


In [3]:
import os
import json
import pathlib
import httpx
from tqdm.notebook import tqdm
from threading import Thread
from queue import Queue

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
train_json = pathlib.Path('/content/drive/MyDrive/fathomnet-out-of-sample-detection/object_detection/train.json')
eval_json = pathlib.Path('/content/drive/MyDrive/fathomnet-out-of-sample-detection/object_detection/eval.json')

In [6]:
with train_json.open() as f:
    train_data = json.load(f)

with eval_json.open() as f:
    eval_data = json.load(f)

In [7]:
from threading import Thread
from queue import Queue


def download_image(url, path):
    while True:
        try:
            r = httpx.get(url)
            break
        except:
            pass

    with open(path, 'wb') as f:
        f.write(r.content)


def download_images(images, path):
    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
    q = Queue()
    for image in images:
        q.put(image)


    def worker():
        while not q.empty():
            image = q.get()
            download_image(image['coco_url'], os.path.join(path, image['file_name']))
            q.task_done()

    for _ in range(32):
        t = Thread(target=worker)
        t.daemon = True
        t.start()
    q.join()

In [None]:
download_images(train_data['images'], '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train')


# Convert RGBA to RGB

The processing algorithm only works wirh RGB image, so we have to convert RGBA to RGB

In [None]:
from PIL import Image
import os

def convert_rgba_to_rgb(data_path):
    for root, _, files in os.walk(data_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                file_path = os.path.join(root, file)
                try:
                    with Image.open(file_path) as img:
                        if img.mode == 'RGBA':
                            print(f"Converting {file} from RGBA to RGB...")
                            rgb_img = img.convert('RGB')
                            rgb_img.save(file_path)
                except Exception as e:
                    print(f"Error processing {file}: {e}")

convert_rgba_to_rgb('/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/')


Converting 286d6058-1cef-4ea2-8c74-48fbb4e92da9.png from RGBA to RGB...
Converting 30be6c64-1f81-44ea-ad16-6ed5e35098a3.png from RGBA to RGB...
Converting fe44ec61-2552-4cca-9699-b0cbe4980d3d.png from RGBA to RGB...
Converting 1abec2b7-88c7-4fb1-a0f8-3ce68d0d6ec5.png from RGBA to RGB...
Converting ae1f1804-5cbd-45b8-b612-c90f2b2f59ee.png from RGBA to RGB...
Converting 024cba58-2d78-4026-bd01-0b04069c65a5.png from RGBA to RGB...
Converting 7a6d1aa3-96d9-442e-8bb8-354b3975b9d2.png from RGBA to RGB...
Converting 0f8a9ac8-79ee-4b98-9662-149dfaa977d0.png from RGBA to RGB...
Converting 0fa6d0b4-78e2-42b2-b832-52d6edf77d6a.png from RGBA to RGB...
Converting 149210bd-2127-4de3-a2ad-698df8c9179f.png from RGBA to RGB...
Converting ca0269ae-070e-4be4-8022-034a25b69395.png from RGBA to RGB...
Converting b47f3d66-2e98-4a53-9ebd-a9e1c8a4c2b3.png from RGBA to RGB...
Converting c53d6ca9-9427-44c2-94b3-1049142d9113.png from RGBA to RGB...
Converting 63aba599-b29a-4be5-9a0e-7f189be1f1c5.png from RGBA to

# Install libraries

The requirements available on the github did not work in my computer, so I manually installed the necessary libraries

In [8]:
!pip install PyWavelets

Collecting PyWavelets
  Downloading pywavelets-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Downloading pywavelets-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/4.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/4.5 MB[0m [31m66.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m4.5/4.5 MB[0m [31m76.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyWavelets
Successfully installed PyWavelets-1.8.0


In [9]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118


In [10]:
!pip install scikit-image



In [11]:
!pip install matplotlib



In [12]:
!pip install opencv-python




In [13]:
!pip install click



In [14]:
!git clone https://github.com/felipeinagaki/UnderWaterPreprocessing.git

Cloning into 'UnderWaterPreprocessing'...
remote: Enumerating objects: 97, done.[K
remote: Total 97 (delta 0), reused 0 (delta 0), pack-reused 97 (from 1)[K
Receiving objects: 100% (97/97), 55.87 MiB | 15.60 MiB/s, done.
Resolving deltas: 100% (37/37), done.


In [15]:
%cd UnderWaterPreprocessing

/content/UnderWaterPreprocessing


In [None]:
!ls

checkpoints	  download_data.ipynb  models		   requirements.txt  train_multi.py
Comparison.ipynb  eval.json	       MS_SSIM_L1_loss.py  results
dataset		  Images	       output.png	   test_multi.py
dataset_nyu.py	  LICENSE	       README.md	   train.json


In [None]:
os.path.exists('checkpoints/model.pth')

True

In the code below, change --test_size with the number of images that will be processed, otherwise it will only process 890

In [16]:
!python test_multi.py train --test_dataset train --data_path /content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/original/ --model_load_path checkpoints/model.pth --test_size 5950

  model.load_state_dict(torch.load(model_load_path))
5950it [3:02:08,  1.84s/it]


In [None]:
import os

def count_images(directory):
  """Counts the number of image files in a directory.

  Args:
    directory: The path to the directory.

  Returns:
    The number of image files found.
  """
  image_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')  # Add more if needed
  image_count = 0
  for filename in os.listdir(directory):
    if filename.lower().endswith(image_extensions):
      image_count += 1
  return image_count

# Example usage:
directory_path = '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/'  # Replace with your directory path
num_images = count_images(directory_path)
print(f"Number of images in '{directory_path}': {num_images}")

Number of images in '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/': 5950


## Change names
If desired, change the name of the outputs

In [None]:
import os
from PIL import Image

input_folder = "results/eval/None/"

output_folder = "results/png/eval"
os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    if filename.endswith(".png_out.jpg"):
        input_path = os.path.join(input_folder, filename)

        new_name = filename.replace(".png_out.jpg", ".png")

        output_path = os.path.join(output_folder, new_name)

        try:
            with Image.open(input_path) as img:
                img.save(output_path, "PNG")
        except Exception as e:
            print(f"Error when processing {filename}: {e}")


In [None]:
# import shutil
# import os

# # Define source and destination directories
# source_dir = '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/train/None'
# destination_dir = '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train'

# # Iterate through files and directories in the source directory
# for item in os.listdir(source_dir):
#     source_path = os.path.join(source_dir, item)
#     destination_path = os.path.join(destination_dir, item)

#     # Move the item to the destination directory
#     shutil.move(source_path, destination_path)

# print(f"Files moved from '{source_dir}' to '{destination_dir}'")

Files moved from '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/train/None' to '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train'


In [None]:
# Example usage:
directory_path = '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/'  # Replace with your directory path
num_images = count_images(directory_path)
print(f"Number of images in '{directory_path}': {num_images}")

Number of images in '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/': 4874


Show specific images to compare

In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt

original_folder = "/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/original/"
processed_folder = "/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/"

image_files = [
    "0a1cca8f-9e74-488d-a92a-eadd0e9d529a.png",
    "0a1ddbaf-3db9-41ef-84a8-d1bba2c8329d.png",
    "0a4e1af0-c332-4211-bf10-246682ebd32d.png",
    "0a5ee259-d0e6-4885-adf9-7a62a57043fb.png",
    "0a6c68cb-28ef-4126-9615-c3bf76f7a44b.png"
]

original_images = [Image.open(os.path.join(original_folder, img)) for img in image_files]
processed_images = [Image.open(os.path.join(processed_folder, img)) for img in image_files]

fig, axes = plt.subplots(len(image_files), 2, figsize=(10, 3 * len(image_files)))

axes[0, 0].set_title("Original", fontsize=14)
axes[0, 1].set_title("Result", fontsize=14)

for i, (orig_img, proc_img) in enumerate(zip(original_images, processed_images)):
    axes[i, 0].imshow(orig_img)
    axes[i, 0].axis("off")

    axes[i, 1].imshow(proc_img)
    axes[i, 1].axis("off")

fig.suptitle("Preprocessing Results", fontsize=16)

plt.tight_layout()  # Reservar espaço para o título
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/fathomnet-out-of-sample-detection/preprocessed/train/0a4e1af0-c332-4211-bf10-246682ebd32d.png_out.jpg'