In [93]:
import time

## How to make a cake?

In [94]:
makeCake = lambda: ['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [95]:
cake = makeCake()
cake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

## But how do we eat a cake?

### The old boring way -  for loop

In [26]:
def EatSlice(cakeSlice):
    return f"{cakeSlice}_eaten"

In [9]:
for cakeSlice_i, cakeSlice in enumerate(cake):
    
    print(f"Eating slice {cakeSlice_i}")
    
    cake[cakeSlice_i] = EatSlice(cakeSlice)
    time.sleep(.5)

Eating slice 0
Eating slice 1
Eating slice 2
Eating slice 3


In [10]:
cake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

### The old boring way -  for loop 2, modify cake in loop

In [97]:
bananaCake = makeCake()
bananaCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [101]:
def EatSliceOfCake(cakeSlice_i):
    bananaCake[cakeSlice_i] =  f"{bananaCake[cakeSlice_i]}_eaten"

In [102]:
for cakeSlice_i in range(len(bananaCake)):
    
    print(f"Eating slice {cakeSlice_i}")
    
    EatSliceOfCake(cakeSlice_i)
    time.sleep(.5)

Eating slice 0
Eating slice 1
Eating slice 2
Eating slice 3


In [103]:
bananaCake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

### The old boring way -  map and lambda functions

In [11]:
EatSliceLambda = lambda cakeSlice: f"{cakeSlice}_eaten"

In [12]:
EatSliceLambda('Slice')

'Slice_eaten'

In [13]:
chocolateCake = makeCake()
chocolateCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [32]:
chocolateCake = list(map(EatSliceLambda, chocolateCake))

In [32]:
chocolateCake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

## Parallel computing - Multiprocessing

In [104]:
import multiprocess ## multiprocess for within notebook, otherwise you can use multiprocessing

In [105]:
strawberryCake = makeCake()
strawberryCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [106]:
def EatSliceParallel(cakeSlice_i):
    strawberryCake[cakeSlice_i] = EatSlice(strawberryCake[cakeSlice_i])

In [107]:
with multiprocess.Pool(2) as pool:
    pool.map(EatSliceParallel, range(len(strawberryCake)))

In [108]:
strawberryCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

<b> Nothing happened?</b>

### Correclty return result from multiprocessing

In [109]:
def EatSliceParallel(cakeSlice_i):
    return EatSlice(strawberryCake[cakeSlice_i])

In [110]:
with multiprocess.Pool(2) as pool:
    strawberryCake = pool.map(EatSliceParallel, range(len(strawberryCake)))

In [111]:
strawberryCake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

### More memory efficient, only pass slice we need

In [112]:
lemonCake = makeCake()
lemonCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [113]:
def EatSliceParallel(cakeSlice):
    return EatSlice(cakeSlice)

In [114]:
with multiprocess.Pool(2) as pool:
    lemonCake = pool.map(EatSliceParallel, lemonCake)

In [115]:
lemonCake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

### What if we need multiple arguments? - partial

In [132]:
from functools import partial

In [133]:
borcoliCake = makeCake()
borcoliCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [134]:
def EatSliceParallelWithArgument(cakeSlice, mood):
    return f"Ate {EatSlice(cakeSlice)} and was {mood}"

In [135]:
with multiprocess.Pool(2) as pool:
    borcoliCake = pool.map(partial(EatSliceParallelWithArgument, mood='Happy'), borcoliCake)

In [137]:
borcoliCake

['Ate slice_0_eaten and was Happy',
 'Ate slice_1_eaten and was Happy',
 'Ate slice_2_eaten and was Happy',
 'Ate slice_3_eaten and was Happy']

### What if we have different values for each element of a list? - zip

In [155]:
cheeseCake = makeCake()
cheeseCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [156]:
moods = ['happy', 'sad', 'full', 'yes']

In [157]:
list(zip(cheeseCake, moods))

[('slice_0', 'happy'),
 ('slice_1', 'sad'),
 ('slice_2', 'full'),
 ('slice_3', 'yes')]

In [158]:
def EatSliceParallelWithZip(cakeSliceWithMood):
    cakeSlice = cakeSliceWithMood[0]
    mood = cakeSliceWithMood[1]
    if mood == 'happy':
        return f"Ate {EatSlice(cakeSlice)} and was super happy about it"
    if mood == 'sad':
        return f"Did not eat {cakeSlice} because sad"
    
    return f"{cakeSlice} ?"

In [159]:
with multiprocess.Pool(2) as pool:
    cheeseCake = pool.map(EatSliceParallelWithZip, list(zip(cheeseCake, moods)))

In [160]:
cheeseCake

['Ate slice_0_eaten and was super happy about it',
 'Did not eat slice_1 because sad',
 'slice_2 ?',
 'slice_3 ?']

## Other library: joblib

In [163]:
import joblib

In [162]:
melonCake = makeCake()
melonCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [165]:
def EatSliceParallelJoblib(cakeSlice_i):
    return EatSlice(melonCake[cakeSlice_i])

In [168]:
with joblib.Parallel(2) as parallel:
    melonCake = parallel(joblib.delayed(EatSliceParallelJoblib)(index) for index in range(len(melonCake)))

In [169]:
melonCake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

### with slices

In [180]:
orangeCake = makeCake()
orangeCake

['slice_0', 'slice_1', 'slice_2', 'slice_3']

In [181]:
def EatSliceParallelJoblibSlices(cakeSlice):
    return EatSlice(cakeSlice)

In [182]:
with joblib.Parallel(2) as parallel:
    orangeCake = parallel(joblib.delayed(EatSliceParallelJoblibSlices)(cakeSlice) for cakeSlice in orangeCake)

In [183]:
orangeCake

['slice_0_eaten', 'slice_1_eaten', 'slice_2_eaten', 'slice_3_eaten']

## Concrete example of what I use parallel computing for

In [34]:
import rasterio as rio
from pathlib import Path
from py_linq import Enumerable
import numpy as np
import multiprocess

### Calculate NDVI values of Landsat images for a large number of raster images

In [140]:
rasterPath = Path('Data')/'Raster'

In [141]:
landsatImagesPath = rasterPath/'LandsatImagesExample'

In [142]:
rasters = landsatImagesPath.iterdir()

In [143]:
rasters = Enumerable(rasters).where(lambda f: f.suffix == '.tif')

In [144]:
rasters

[PosixPath('Data/Raster/LandsatImagesExample/Raster_0.tif'), PosixPath('Data/Raster/LandsatImagesExample/Raster_1.tif'), PosixPath('Data/Raster/LandsatImagesExample/Raster_999.tif')]

In [148]:
outPath = landsatImagesPath.parent/'NDVI'
outPath.mkdir(exist_ok=True)

In [146]:
GetNDVI = lambda RED, NIR: (NIR - RED) / (NIR + RED)

In [147]:
def generateNDVIRaster(file):
    with rio.open(file) as r:
            ndviFile = outPath/file.name
            
            if ndviFile.exists():
                return
            
            profile = r.profile.copy()

            data = r.read()
            mask = data==r.nodata
            data = data * 1.0
            data[mask] = np.NaN
            BLUE = data[0,::]
            RED = data[2,::]
            NIR = data[3,::]

            profile.update(count = 1, dtype='float32')
            
            NDVI = GetNDVI(RED, NIR)
            
            NDVI = np.clip(np.interp(NDVI, (-1, 1), (0.0, 1.0)), 0.0, 1.0)

            NDVI[np.isnan(NDVI)] = r.nodata
            NDVI[np.isinf(NDVI)] = r.nodata

            with rio.open(ndviFile, 'w', **profile) as dst:
                dst.write(NDVI, 1)

In [31]:
for file in rasters:
    generateNDVIRaster(file)

In [42]:
with multiprocess.Pool(2) as pool:
    pool.map(generateNDVIRaster, rasters)

### Download multiple images from earth engine

In [43]:
import geemap
import ee

In [51]:
_ = geemap.Map()

In [47]:
tucson = ee.Geometry.Polygon(\
        [[[-111.13183568891769, 32.314901949559655],\
          [-111.13183568891769, 32.15401640825052],\
          [-110.82833837446456, 32.15401640825052],\
          [-110.82833837446456, 32.314901949559655]]])

In [67]:
landsat = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2")

In [120]:
tucsonLandsat = landsat.filterDate(ee.Date('2023-09-03'), ee.Date('2023-09-04'))\
                       .filterBounds(tucson)

In [121]:
crs = tucsonLandsat.first().projection().crs().getInfo()

In [122]:
scale = tucsonLandsat.first().projection().nominalScale().getInfo()

In [123]:
tucsonLandsat = tucsonLandsat.mosaic().setDefaultProjection(crs=crs, scale=scale)

In [124]:
coveringGrid = tucson.coveringGrid(proj='EPSG:4326', scale=256*30)

In [125]:
Map = geemap.Map()
Map.addLayer(tucson)
Map.centerObject(tucson)
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

In [126]:
Map = geemap.Map()
Map.centerObject(tucson)
Map.addLayer(tucsonLandsat)
Map.addLayer(coveringGrid.draw('red'))
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

In [127]:
n = coveringGrid.size().getInfo()

In [128]:
gridList = coveringGrid.toList(n)

In [134]:
outputPath = rasterPath/'LandsatTucson'
outputPath.mkdir(exist_ok=True)
    
def downloadGridElement(grid_element_i):
    grid_element = ee.Feature(gridList.get(grid_element_i))
    ID = grid_element.id().getInfo()
    grid_element = grid_element.geometry()

    outputFilePath = outputPath / f'{ID}.tif'
    
    if outputFilePath.exists():
        return

    geemap.download_ee_image(tucsonLandsat,\
                             outputFilePath,\
                             crs='EPSG:4326',\
                             region=grid_element,\
                             shape=(256, 256))

In [133]:
for grid_element_i in range(n):
    downloadGridElement(grid_element_i)

In [135]:
with multiprocess.Pool(2) as pool:
    pool.map(downloadGridElement, range(n))

### Alternative, but does not check for existing files:

In [None]:
geemap.download_ee_image_tiles_parallel(tucsonLandsat, coveringGrid, out_dir=outputPath, shape=(256, 256), crs='EPSG:4326', prefix=f"tucsonImage_")