In [1]:
import shutil
import os
import sys
from pathlib import Path
import multiprocessing
import subprocess
from osgeo import gdal
import numpy as np

In [2]:
def progress_callback(complete, message, unknown):
    print('progress: {}, message: "{}", unknown {}'.format(complete, message, unknown))
    return 1

In [3]:
path = "/home/jovyan/work/satellite_data/ask_new/"

In [4]:
n_jobs = 5
njobs_cog = int(np.floor(os.cpu_count()/n_jobs))

warp_options = "-overwrite -multi -wm 20% -t_srs WGS84 -srcnodata 0 -dstnodata 0 -co TILED=YES \
                -co COMPRESS=DEFLATE -co BIGTIFF=YES -wo NUM_THREADS="+str(njobs_cog)+" -co NUM_THREADS="+str(njobs_cog) #-co BLOCKSIZE=256"
#vrt_options = gdal.BuildVRTOptions(resampleAlg='average', addAlpha=True)
#translate_options = "-b 1 -b 2 -b 3 -mask 4 -co BIGTIFF=YES -co TILED=YES --config GDAL_TIFF_INTERNAL_MASK YES -co ALPHA=YES --config GDAL_CACHEMAX 8096 -co NUM_THREADS=ALL_CPUS -co COMPRESS=DEFLATE -co PREDICTOR=2"

cog_options = "-co BIGTIFF=YES -co NUM_THREADS="+str(njobs_cog)+" -co COMPRESS=DEFLATE --config GDAL_CACHEMAX 8096 -co PREDICTOR=2" 
gdal.SetConfigOption('GDAL_CACHEMAX', '8096')

In [5]:
parts = {i: os.path.join(path,i) for i in os.listdir(path) if os.path.isdir(os.path.join(path,i))}

In [6]:
fmapping = {}
for part,path_part in parts.items():
    flist = []
    f = os.listdir(path_part)
    for i in f:  
        if ".tif" in i:
            #print(f"COG {part} exists! Skip")
            continue
    
    for p, d, files in os.walk(path_part):
        for i in files:
            if i.endswith(".tif"):
                  flist.append(os.path.join(p,i))
        fmapping[part] = flist

In [7]:
params = []
for k,v in fmapping.items():
    params.append([k,v])

In [8]:
params = params[::-1]

In [9]:
def worker(args):
    part,file_list = args
    subpath = os.path.join(path,part)
    tmp_file = os.path.join(subpath,"tmp_"+str(part)+".tif")
    out_file = os.path.join(subpath,"cog_"+str(part)+".tif")
    
    if (os.path.isfile(tmp_file)) or os.path.isfile(out_file):
        print(f"{str(part)} exists!")
        return
    
    #### Transform in lossless format + WGS84 format
    ds = gdal.Warp(tmp_file, file_list, format="GTiff",options=warp_options,callback = progress_callback)
    ds = None
    del ds

    cmd = ['gdal_translate', '-of','COG'] + cog_options.split(" ") + [tmp_file,out_file]
    process = subprocess.Popen(cmd)
    try:
        process.wait(timeout=86400) #one day
    except subprocess.TimeoutExpired:
        print('Timed out - killing', process.pid)
        process.kill()
    
    try:
        os.kill(process.pid, 0)
    except OSError:
        pass
    finally:
        os.remove(tmp_file)

In [10]:
pool = multiprocessing.Pool(n_jobs)
try:
    _ = pool.map(worker,params)
except Exception as e:
    print(e)
    pool.close()
    pool.join()
pool.close()
pool.join()

6306 exists!6384 exists!

6275 exists!6206 exists!6327 exists!

6322 exists!
6171 exists!6137 exists!6135 exists!
6397 exists!


6157 exists!6391 exists!6347 exists!
6196 exists!


6178 exists!
6160 exists!6349 exists!6307 exists!6284 exists!




6211 exists!6107 exists!6395 exists!6270 exists!6098 exists!




6080 exists!6260 exists!6190 exists!6310 exists!6396 exists!




6285 exists!6055 exists!6213 exists!6146 exists!6333 exists!




6225 exists!6379 exists!6346 exists!6216 exists!
6224 exists!

6067 exists!

6309 exists!6255 exists!

6163 exists!6096 exists!
6166 exists!


6321 exists!6228 exists!6189 exists!6111 exists!6334 exists!



6174 exists!
6369 exists!6078 exists!
6361 exists!
6186 exists!

6368 exists!6167 exists!6398 exists!
6304 exists!

6082 exists!

6302 exists!

6210 exists!
6367 exists!6068 exists!

6052 exists!6128 exists!

6150 exists!6381 exists!
6258 exists!

6230 exists!6240 exists!

6312 exists!
6366 exists!6324 exists!

6314 exists!6305 exists!

6085 exists!

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

