In [None]:
import os
import time
import urllib.request

import numpy as np

%load_ext memory_profiler

### Download a big image and plot a part of it

In [None]:
# Download image -- this can take a bit (approx. 100Mb)
url = "https://stsci-opo.org/STScI-01EVSZWCFZVP2R5ZRV7HEZAGP6.tif"

hubble_image = os.path.join("data", "hubble.tif")
urllib.request.urlretrieve(url, hubble_image)

In [None]:
# Load image and convert to float

from skimage.io import imread
from skimage.util import img_as_float

img = img_as_float(imread(hubble_image))

print(f"The image has shape {img.shape}")
print(
    f"The full image has {np.prod(img.shape[:2]) / 1e6 : .0f} MPix, and occupies {img.nbytes / 1e9:.0f} Gb in RAM"
)

In [None]:
# Plot a small portion of the image
import matplotlib.pyplot as plt

plt.imshow(img[:1024, :1024]);

### Apply a simple Gaussian filter

Perform a smoothing of the image with the `gaussian_filter` from `scipy.ndimage.filters` library. 

1. Check how memory usage increases (i.e., you can use the command `htop` in your terminal).
2. Check how long it takes to perform this operation (use the `%time` magic line). 

In [None]:
import scipy.ndimage as snd

%time result = snd.gaussian_filter(img, sigma=10)

This takes really long time, and it occupies a lot of RAM

## Let's crop it to be able to process it more efficiently later on. 

In [None]:
from pathlib import Path

from utils import chunk_image

# Crop/chunk size
chunk_size = (256, 256, 3)

# Save directory
output_dir = os.path.join("imgs", "hubble")
Path(os.path.join("imgs", "hubble")).mkdir(exist_ok=True)

# Run cropping function
chunk_image(img, chunk_size, output_dir)

In [None]:
from utils import show_images

n_images = len(os.listdir(output_dir))

# Read some example image-crops
img1 = img_as_float(imread(os.path.join(output_dir, "image-00-02-00.png")))
img2 = img_as_float(imread(os.path.join(output_dir, "image-00-20-00.png")))
img3 = img_as_float(imread(os.path.join(output_dir, "image-02-10-00.png")))

# Plot
show_images(images=[img1, img2, img3], titles="Hubble Space")

print(f"There are {n_images}.")
print(f"Each image has shape {img1.shape}")
print(
    f"Each image has {np.prod(img1.shape[:2]) / 1e3 : .0f} KPix, and occupies {img1.nbytes / 1e6:.0f} Mb in RAM"
)

### We can see different types of elements in the above images

1. In the first image we see a star. 

2. In the second image we see a developed galaxy. 

3. In the last image we see two galaxies in their very early stages (in red). 

##### In the following notebooks we'll see how can we process these large images efficiently in parallel to get some interesting features out of them. 