In [1]:
import imageio
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
from glob import glob
import numpy as np
import random
import xarray as xr

from convml_tt.data.dataset import TRIPLET_TILE_FILENAME_FORMAT

In [2]:
TRIPLET_TILE_FILENAME_FORMAT

'{triplet_id:05d}_{tile_type}.png'

### Make a set of tiles

In [3]:
# working folder
folder = "AquaHkmLabSea2022b"
filepath = "/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/"+folder

#### Make tiles from png images

In [4]:
# image files
filenames = glob(filepath+"/sliced/*.png")

In [5]:
len(filenames)

71

In [6]:
tile_nx = tile_ny = 256
img_tiles = []

for file in filenames:
    img = imageio.imread(file)
    img = img[:,:,:3] # last term gets rid of alpha channel
    ny, nx, _ = img.shape
    for i in range(0, nx-tile_nx, 64):
        for j in range(0, ny-tile_ny, 64):
            img_tile = img[j:j+tile_ny, i:i+tile_nx, :]
            img_tiles.append(img_tile)

len(img_tiles)

  img = imageio.imread(file)


51120

##### Singlet dataset

In [31]:
fp_path_dataset = Path(filepath+"/tiles")
fp_path_dataset.mkdir(exist_ok=True, parents=True)

In [32]:
stride = 5 # What fraction of the tiles to keep?
n = len(img_tiles)
for n, i in enumerate(tqdm(range(n)[::stride], total=len(img_tiles)/stride)):
    fn = TRIPLET_TILE_FILENAME_FORMAT.format(triplet_id=n, tile_type="anchor")
    fp_tile = fp_path_dataset / fn
    img_tile = img_tiles[i]
    imageio.imwrite(uri=fp_tile, im=img_tile, format="png")

100%|██████████| 10224/10224.0 [02:23<00:00, 71.30it/s]


##### Triplet dataset

In [10]:
fp_path_dataset = Path(filepath+"/triplets")
fp_path_dataset.mkdir(exist_ok=True, parents=True)

In [11]:
stride = 5 # What fraction of the tiles to keep?
n = len(img_tiles)-2
for n, i in enumerate(tqdm(range(n)[::stride], total=len(img_tiles)/stride)):
    #anchor
    fn = TRIPLET_TILE_FILENAME_FORMAT.format(triplet_id=n, tile_type="anchor")
    fp_tile = fp_path_dataset / fn
    img_tile = img_tiles[i]
    imageio.imwrite(uri=fp_tile, im=img_tile, format="png")
    #neighbour - overlap half of the tile
    fn = TRIPLET_TILE_FILENAME_FORMAT.format(triplet_id=n, tile_type="neighbor")
    fp_tile = fp_path_dataset / fn
    img_tile = img_tiles[i+2] # not totally satisfactory, tiles from edges may not actually be neighbours, also randomise direction?
    imageio.imwrite(uri=fp_tile, im=img_tile, format="png")
    #distant - sampled from random image (how to ensure not from same image?)
    fn = TRIPLET_TILE_FILENAME_FORMAT.format(triplet_id=n, tile_type="distant")
    fp_tile = fp_path_dataset / fn
    img_tile = img_tiles[random.randint(0, len(img_tiles)-1)]
    imageio.imwrite(uri=fp_tile, im=img_tile, format="png")


100%|██████████| 10224/10224.0 [07:04<00:00, 24.08it/s]


#### Make tiles from nc files

In [5]:
# open nc files
filenames = glob(filepath+"/true_colour_netcdf/*.nc")


In [8]:
ds = xr.open_dataset(filenames[0])
ds

In [None]:
# make tiles but also write lat and lon to csv along with tile id
# where to take lat/lon from? centre of tile? corner of tile?

#### Make tiles from hdf files?

In [1]:
# Would need satpy and convml libraries in same environment