In [26]:
# Install and import
import os, sys, numpy as np
from util.image_util import get_border_indices
from glob import glob
try: # Check platform (Colab or Jupyter)
  import google.colab
  my_drive = "/content/drive/My Drive/"
except:
  my_drive = os.path.expanduser("~") + "/drive/"
path = my_drive + "joklar/"
sys.path.append(path + "src")
from util.util import install_import
rasterio = install_import("rasterio")

In [2]:
# Define data to use
datafolder = "joklar/data"
dataname = "lang"
datapath = path + "data/" + dataname + "/"

In [3]:
# Define functions
def read_tile(datapath, n):
    #dataname = os.path.basename(datapath.rstrip('/'))
    imgfile = datapath + f"/img-{n:03d}.tif"
    maskfile = datapath + f"/mask-{n:03d}.tif"
    with rasterio.open(imgfile) as f:
        img_channels = f.meta["count"]
        img = np.array([f.read(b) for b in range(1, img_channels + 1)])
        x = f.meta["transform"][2]
        y = f.meta["transform"][5]
    with rasterio.open(maskfile) as f:
        mask_channels = f.meta["count"]
        masks = np.array([f.read(b) for b in range(1, mask_channels + 1)])
        mask = np.any(masks, axis=0)
    return img, mask, x, y

In [4]:
img, mask, x, y = read_tile(datapath, 21)
img.dtype, mask.dtype

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


(dtype('uint16'), dtype('bool'))

In [23]:
def read_tiles(datapath):
    # Read a whole glacier
    nfiles = len(glob(f"{datapath}/img-*"))
    img = nfiles*[None]
    mask = np.zeros((nfiles, 256, 256), bool)
    x = np.zeros(nfiles)
    y = np.zeros(nfiles)
    for n in range(nfiles):
        img[n], mask[n], x[n], y[n] = read_tile(datapath, n)
        if n % 10 == 0:
            print(f"{n}, ", end="")
    col = ((x - x.min())/2560).astype(int)
    row = ((y - y.min())/2560).astype(int)
    I = np.lexsort((col, row))
    row = row[I]
    col = col[I]
    mask = mask[I]
    data = np.transpose(np.array(img)[I], (0, 2, 3, 1))
    return data, mask, row, col

In [27]:
image, mask, row, col = read_tiles(datapath)
print(image.dtype)

0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, uint16


In [19]:
image.dtype

dtype('uint16')

In [47]:
def print_info(border_indices, image, mask, row, col):
    # Print summary and tif-meta information
    w = (col[-1] + 1)*2.56
    h = (row[-1] + 1)*2.56
    pct_glacier = np.sum(mask)/np.size(mask)*100
    n_border = len(border_indices)
    pct_border = len(border_indices)/len(mask)*100
    print()
    print(f"Image shape: {image.shape}, data type: {image.dtype}")
    print(f"Region width {w} km, height {h} km")
    print(f"Pixels labeled glacier: {pct_glacier:.1f}%")
    print(f"Border indices: {n_border} ({pct_border:.1f}%)")
    print(f"Mask datatype: {mask.dtype}")
    print()

def save_data_and_report(outfile, border_indices, image, mask, row, col):
    np.savez_compressed(outfile, border_indices=border_indices,
                        image=image, mask=mask, row=row, col=col)
    size = os.path.getsize(outfile)
    print(size)
    compress_ratio = size/(image.nbytes + mask.nbytes + row.nbytes + col.nbytes)
    print(f"Wrote {size/2**20:.1f} Mb of data to {outfile}")
    print(f"  Compression ratio: {compress_ratio:.2f}")
    print()

In [40]:
# Read data and report
image, mask, row, col = read_tiles(datapath)
border_indices = get_border_indices(image, mask)
print_info(border_indices, image, mask, row, col)

0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 
Image shape: (197, 256, 256, 13), data type: uint16
Region width 48.64 km, height 48.64 km
Pixels labeled glacier: 68.5%
Border indices: 105 (53.3%)
Mask datatype: bool


In [48]:
# Save to compressed file and report
dataname = os.path.basename(datapath.rstrip("/"))
outfile = datapath + dataname + ".npz"
save_data_and_report(outfile, border_indices, image, mask, row, col)

152809754
Wrote 145.7 Mb of data to /Users/jonasson/drive/joklar/data/lang/lang.npz
  Compression ratio: 0.44


In [49]:
d = np.load(outfile)
list(d.keys())

['border_indices', 'image', 'mask', 'row', 'col']

In [55]:
%%time
# Test reading of the file
(image, mask) = load_data
print(outfile)
data = np.load(outfile)
border_indices = data['border_indices']
#border_indices = range(0, len(data['mask']))
print(len(data['mask']))
image = data['image'][border_indices]
mask = data['mask'][border_indices]
row = data['row']
col = data['col']
image.shape, mask.shape, row.shape, col.shape

/Users/jonasson/drive/joklar/data/lang/lang.npz
197
CPU times: user 1.09 s, sys: 87.8 ms, total: 1.18 s
Wall time: 1.24 s


((105, 256, 256, 13), (105, 256, 256), (105,), (105,))