In [1]:
import shutil
import os
import sys
from pathlib import Path
import multiprocessing
import subprocess
from osgeo import gdal
import numpy as np

In [2]:
def progress_callback(complete, message, unknown):
    print('progress: {}, message: "{}", unknown {}'.format(complete, message, unknown))
    return 1

In [3]:
path = "/home/jovyan/work/satellite_data/ask_new/"

In [4]:
n_jobs = 5
njobs_cog = int(np.floor(os.cpu_count()/n_jobs))

warp_options = "-overwrite -multi -wm 20% -t_srs WGS84 -srcnodata 0 -dstnodata 0 -co TILED=YES \
                -co COMPRESS=DEFLATE -co BIGTIFF=YES -wo NUM_THREADS="+str(njobs_cog)+" -co NUM_THREADS="+str(njobs_cog) #-co BLOCKSIZE=256"
#vrt_options = gdal.BuildVRTOptions(resampleAlg='average', addAlpha=True)
#translate_options = "-b 1 -b 2 -b 3 -mask 4 -co BIGTIFF=YES -co TILED=YES --config GDAL_TIFF_INTERNAL_MASK YES -co ALPHA=YES --config GDAL_CACHEMAX 8096 -co NUM_THREADS=ALL_CPUS -co COMPRESS=DEFLATE -co PREDICTOR=2"

cog_options = "-co BIGTIFF=YES -co NUM_THREADS="+str(njobs_cog)+" -co COMPRESS=DEFLATE --config GDAL_CACHEMAX 8096 -co PREDICTOR=2" 
gdal.SetConfigOption('GDAL_CACHEMAX', '8096')

In [5]:
parts = {i: os.path.join(path,i) for i in os.listdir(path) if os.path.isdir(os.path.join(path,i))}

In [6]:
fmapping = {}
for part,path_part in parts.items():
    flist = []
    f = os.listdir(path_part)
    for i in f:  
        if ".tif" in i:
            #print(f"COG {part} exists! Skip")
            continue
    
    for p, d, files in os.walk(path_part):
        for i in files:
            if i.endswith(".tif"):
                  flist.append(os.path.join(p,i))
        fmapping[part] = flist

In [7]:
params = []
for k,v in fmapping.items():
    params.append([k,v])

In [8]:
#params = params[::-1]

In [8]:
def worker(args):
    part,file_list = args
    subpath = os.path.join(path,part)
    tmp_file = os.path.join(subpath,"tmp_"+str(part)+".tif")
    out_file = os.path.join(subpath,"cog_"+str(part)+".tif")
    
    if (os.path.isfile(tmp_file)) or os.path.isfile(out_file):
        print(f"{str(part)} exists!")
        return
    
    #### Transform in lossless format + WGS84 format
    ds = gdal.Warp(tmp_file, file_list, format="GTiff",options=warp_options,callback = progress_callback)
    ds = None
    del ds

    cmd = ['gdal_translate', '-of','COG'] + cog_options.split(" ") + [tmp_file,out_file]
    process = subprocess.Popen(cmd)
    try:
        process.wait(timeout=86400) #one day
    except subprocess.TimeoutExpired:
        print('Timed out - killing', process.pid)
        process.kill()
    
    try:
        os.kill(process.pid, 0)
    except OSError:
        pass
    finally:
        os.remove(tmp_file)

In [None]:
pool = multiprocessing.Pool(n_jobs)
try:
    _ = pool.map(worker,params)
except Exception as e:
    print(e)
    pool.close()
    pool.join()
pool.close()
pool.join()

6179 exists!6060 exists!

6234 exists!6050 exists!6165 exists!


6244 exists!6154 exists!

progress: 0.0, message: "Processing /home/jovyan/work/satellite_data/ask_new/6057/6/8/6057_686_1km.tif [1/39]", unknown None
progress: 0.0, message: "Processing /home/jovyan/work/satellite_data/ask_new/6372/5/8/6372_585_1km.tif [1/49]", unknown None
progress: 0.02564102564102564, message: "Processing /home/jovyan/work/satellite_data/ask_new/6057/6/8/6057_685_1km.tif [2/39]", unknown None
progress: 0.02040816326530612, message: "Processing /home/jovyan/work/satellite_data/ask_new/6372/5/8/6372_580_1km.tif [2/49]", unknown None
progress: 0.05128205128205128, message: "Processing /home/jovyan/work/satellite_data/ask_new/6057/6/8/6057_684_1km.tif [3/39]", unknown None
progress: 0.04081632653061224, message: "Processing /home/jovyan/work/satellite_data/ask_new/6372/5/8/6372_582_1km.tif [3/49]", unknown None
progress: 0.0, message: "Processing /home/jovyan/work/satellite_data/ask_new/6298/4/8/6298_484_