In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
%%time
import os, numpy as np, sys
from glob import glob
try: # Check platform (Colab or Jupyter)
  import google.colab
  my_drive = "/content/drive/My Drive/"
except:
  my_drive = (os.path.expanduser("~") +
              "/Library/CloudStorage/GoogleDrive-jonasson2@gmail.com/My Drive/")
path = my_drive + "jöklar/"
sys.path.append(path + "src")
from util.util import install_import
rasterio = install_import("rasterio")
#
folder = "jöklar/data"
os.chdir(f"{my_drive}/{folder}")
dataname = "lang"

def read_tiles(dataname=".", kind="img"):
    # Read individual image files
    files = glob(f"{dataname}/{kind}-*")
    n_channels = 13
    nfiles = len(files)
    data = np.zeros((nfiles, 13, 256, 256), "uint16")
    x = np.zeros(nfiles)
    y = np.zeros(nfiles)
    print(f"{dataname}-{kind}: ", end="")
    for (i,file) in enumerate(files):
        with rasterio.open(file) as f:
            if i % 10 == 0:
                print(f"{i}, ", end="")
            data[i] = [f.read(b) for b in range(1, n_channels + 1)]
            x[i] = f.meta["transform"][2]
            y[i] = f.meta["transform"][5]
    print()
    col = ((x - x.min())/2560).astype(int)
    row = ((y - y.min())/2560).astype(int)
    I = np.lexsort((col, row))
    row = row[I]
    col = col[I]
    data = np.transpose(data[I], (0, 2, 3, 1))
    return data, row, col

def print_info(image, mask, row, col):
    w = (col[-1] + 1)*2.56
    h = (row[-1] + 1)*2.56
    pct_glacier = np.sum(mask)/np.size(mask)*100
    print()
    print(f"Image shape: {image.shape}, data type: {image.dtype}")
    print(f"Region width {w} km, height {h} km")
    print(f"Pixels labeled glacier: {pct_glacier:.1f}%")
    print()

def save_data_and_report(outfile, image, mask, row, col):
    np.savez_compressed(outfile, image=image, mask=mask, row=row, col=col)
    size = os.path.getsize(outfile)
    print(size)
    compress_ratio = size/(image.nbytes + mask.nbytes + row.nbytes + col.nbytes)
    print(f"Wrote {size/2**20:.1f} Mb of data to {folder}/{outfile}")
    print(f"  Compression ratio: {compress_ratio:.2f}")
    print()

image, row, col = read_tiles(dataname, "img")
mask_img,_,_ = read_tiles(dataname, "mask")
mask = np.any(mask_img, axis=3)
print_info(image, mask, row, col)
outfile = f"{dataname}/data.npz"
save_data_and_report(outfile, image, mask, row, col)

path= /content/drive/My Drive/jöklar/
sys.path= ['/content', '/env/python', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.10/dist-packages/IPython/extensions', '/root/.ipython', '/content/drive/My Drive/jöklar/src']
lang-img: 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 
lang-mask: 0, 

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 

Image shape: (197, 256, 256, 13), data type: uint16
Region width 48.64 km, height 48.64 km
Pixels labeled glacier: 68.5%

152809341
Wrote 145.7 Mb of data to jöklar/data/lang/data.npz
  Compression ratio: 0.44

CPU times: user 25.7 s, sys: 1.62 s, total: 27.3 s
Wall time: 1min


In [7]:
# Test reading of the file
%%time
print(os.getcwd())
print(outfile)
data = np.load(outfile)
[img, mask, row, col] = data.values()
img.shape, mask.shape, row.shape, col.shape

/content/drive/MyDrive/jöklar/data
lang/data.npz
CPU times: user 2.47 s, sys: 324 ms, total: 2.79 s
Wall time: 2.89 s


((197, 256, 256, 13), (197, 256, 256), (197,), (197,))