# NDVI Calculation

In [None]:
import sys
sys.path.append('../')

In [None]:
import datetime as dt
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import rasterio
import lithops
import time
import shutil
import pylab
import os
import gc
from rasterio.io import MemoryFile
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
from lithops import Storage

import cloudbutton_geospatial.s2froms3 as s2froms3
from cloudbutton_geospatial.utils import notebook as notebook_utils
from cloudbutton_geospatial.io_utils.ndvi import get_ndvi_params, ndvi_calculation, ndvi_tile_sentinel, get_subset_raster, lonlat_to_utm, get_poly_within
from cloudbutton_geospatial.io_utils.plot import tiff_overview, plot_map

%matplotlib inline

## Input parameters

Select the date interval in which tiles will be processed:

In [None]:
BUCKET_NAME = "daniel-lithops-geospatial"

In [None]:
from_date, to_date = notebook_utils.pick_date_range()

Select the tile's cloud percentage threshold:

In [None]:
percentage = notebook_utils.pick_percentage_slider()

Select the area which delimites the tiles you want to process (left click to mark a point in the map, right click to erase current selection):

In [None]:
map_region = notebook_utils.MapRegion()

In [None]:
coords = []
lats = []
lons = []
points = []

for value in map_region.get_region()[:-1]:
    coords.append(value)
    lats.append(value[1])
    lons.append(value[0])

start_date = from_date.value  # Start date to search images
end_date = to_date.value  # End date to search images
what = ['B04', 'B08']  # What we want to download
cc = percentage.value  # Minimum cloud cover on each image, 25 is 25%

for lon, lat in zip(lons, lats):
    points.append([lon, lat])
    print([lon, lat], start_date, end_date, what, cc)


## Generate longitude and latency of the intermediate zones

In [None]:
import math

def distance(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371  # km

    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
         math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
         math.sin(dlon / 2) * math.sin(dlon / 2))
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = radius * c

    return d

In [None]:
i, p = 0, 0

while i != len(points):
    p = i + 1
    
    while p != len(points):
        
        dis = distance(points[i], points[p])
        divisions = int(dis / 100)
        
        # If the zones are separated by more than 100 km, generate intermediate zones
        if divisions > 0:
            toSum = [(points[i][0] - points[p][0]) / (divisions + 1) , (points[i][1] - points[p][1]) / (divisions + 1)]
            
            while divisions != 0:
                point = points[i][0] - (toSum[0] * divisions)
                
                # Not add duplicated lons/lats
                if point not in lons:
                    lons.append(points[i][0] - (toSum[0] * divisions))
                    lats.append(points[i][1] - (toSum[1] * divisions))
                    
                divisions = divisions - 1
        p = p + 1 
    i = i + 1

## Get Sentinel-2 packages from a indefinite number of cells

In [None]:
scenes_f1 = []
scenes_f2 = []

for longitude, latency in zip(lons, lats):
    try:
        # Get scenes from intital date
        f1 = s2froms3.get_scene_list(lon=longitude, lat=latency, start_date=start_date, end_date=start_date,
        what=what, cloud_cover_le=cc)

        # Get scenes from end date
        f2 = s2froms3.get_scene_list(lon=longitude, lat=latency, start_date=end_date, end_date=end_date,
        what=what, cloud_cover_le=cc)

        # Not add duplicated scenes
        if len(scenes_f1) == 0 or f1 not in scenes_f1:
            scenes_f1.append(f1)
            scenes_f2.append(f2)

            print(f'Found scenes {start_date}:', f1)
            print(f'Found scenes {end_date}:', f2)
            print(f'Lon: {longitude}, Lat: {latency}')
            print(f'Cell: {f1[0][0].split("/")[2]} {f1[0][0].split("/")[3]} {f1[0][0].split("/")[4]}\n')
    
    except Exception:
        pass


if len(scenes_f1) == 0:
    raise Exception('No data found')

scene = scenes_f1[-1][-1]
scene_band = rasterio.open('s3://'+scene[0])
windows = list(scene_band.block_windows())

Using the selected parameters, get the identifiers of the selected tiles from Sentinel-2:

In [None]:
fexec = lithops.FunctionExecutor(
        backend='aws_lambda',
        storage='aws_s3',
        log_level='DEBUG',
        monitoring='rabbitmq',
        #max_workers=10,
        runtime='aws_lambda/lithops-ndvi-v39:01'  # Runtime for AWS Lambda
)

# Get data from all cells
iterdata = []
for scene_f1, scene_f2 in zip(scenes_f1, scenes_f2):
    for wd in windows:
        iterdata.append((scene_f1[0], scene_f2[0], wd))

# Execution
fexec.map(compute_ndvi_diff, iterdata)
results = fexec.get_result()
results

## 

## Get data statistics

In [None]:
# Generate plots
fexec.plot(dst='C:\\Users\\alega\\PycharmProjects\\geospatial-usecase\\ndvi-diff\\lithops' + str([lat, lon]))

In [None]:
# Function to generate throughput statistics
def create_agg_bdwth_plot(res_write, res_read, dst):
    def compute_times_rates(start_time, d):
        x = np.array(d)
        tzero = start_time
        tr_start_time = x[:, 0] - tzero
        tr_end_time = x[:, 1] - tzero
        rate = x[:, 2]

        N = len(tr_start_time)
        runtime_rate_hist = np.zeros((N, len(runtime_bins)))

        for i in range(N):
            s = tr_start_time[i]
            e = tr_end_time[i]
            a, b = np.searchsorted(runtime_bins, [s, e])
            if b-a > 0:
                runtime_rate_hist[i, a:b] = rate[i]

        return {'start_time': tr_start_time,
                'end_time': tr_end_time,
                'rate': rate,
                'runtime_rate_hist': runtime_rate_hist}
    
    start_time = min((min(t['t0'] for t in res_write), (min(t['t0'] for t in res_read)))) - 1

    fig = pylab.figure(figsize=(8, 6))
    ax = fig.add_subplot(1, 1, 1)
    for datum, l in [(res_write, 'Aggregate Write Bandwidth'), (res_read, 'Aggregate Read Bandwidth')]:
        mb_rates = [(res['t0'], res['t1'], res['bandwidth']) for res in datum]
        max_seconds = int(max([mr[1]-start_time for mr in mb_rates])*1.2)
        max_seconds = 8 * round(max_seconds/8)
        runtime_bins = np.linspace(0, max_seconds, max_seconds)

        mb_rates_hist = compute_times_rates(start_time, mb_rates)

        ax.plot(mb_rates_hist['runtime_rate_hist'].sum(axis=0)/1000, label=l)

    ax.set_xlabel('Execution Time (sec)')
    ax.set_ylabel("GB/sec")
    ax.set_xlim(0, )
    ax.set_ylim(0, )
    pylab.legend()
    pylab.grid(True, axis='y')

    dst = os.path.expanduser(dst) if '~' in dst else dst

    fig.tight_layout()
    fig.savefig(dst, format='pdf')

### Prepare data

In [None]:
# Prepare read data size 
get_sz = 0
for scene in scenes_f1:
    obj = storage.head_object('sentinel-cogs', scene[0][0].replace('sentinel-cogs/', ''))
    obj2 = storage.head_object('sentinel-cogs', scene[0][1].replace('sentinel-cogs/', ''))
    get_sz = get_sz + float(obj.get('content-length')) + float(obj2.get('content-length'))

In [None]:
# Prepare throughput data
list_throughput = []
for worker in results:
    list_throughput.append(worker[4])
    list_throughput.append(worker[5])

read_results = [stat['get'] for stat in list_throughput]
write_results = [stat['put'] for stat in list_throughput]


# Calculate throughput write data
size_total_write = 0
for value in write_results:
    size_total_write = size_total_write + value.get('size')
    
# Calculate throughput read data
size_total_read = 0
for value in read_results:
    size_per_worker = get_sz / numberWorkers
    size_total_read = size_total_read + size_per_worker
    value['size'] = size_per_worker
    value['bandwidth'] = size_per_worker / (value.get('t1') - value.get('t0')) / 1e6

    
# Throughput read numeric:
throughput_interpolation_read = size_total_read / duration  # Bytes/second

# Throughput write numeric:
throughput_interpolation_write = size_total_write / duration  # Bytes/second

In [None]:
# Prepare duration data: 
startTime = set()
endTime = set()

for future in fexec.futures:
    for key in future.stats.keys():
        if key.endswith("worker_func_start_tstamp"):
            startTime.add(future.stats[key])
        if key.endswith("worker_end_tstamp"):
            endTime.add(future.stats[key])
            
            
duration = max(endTime) - min(startTime)


# Prepare number of workers:
numberWorkers=len(fexec.futures)

In [None]:
# Generate throughput graphic 
create_agg_bdwth_plot(read_results, write_results, fexec.executor_id + '-storage-kpi.pdf')

### Results

In [None]:
# Print results:
print(f"Data size bytes: {get_sz}")
mb = get_sz / 2**20
print(f"Data size megabytes: {mb}")
gb = mb / 1000
print(f"Data size gigabytes: {gb}")

print(f"Number of workers: {numberWorkers}")

print(f"Throughput write: {throughput_interpolation_write / 1024**2} MiB/s")
print(f"Throughput read: {throughput_interpolation_read / 1024**2} MiB/s")

print(f"Duration: {duration}")

## Get and plot the computed jpg diff tile image

In [None]:
def get_jpg(data):
    file = '_'.join(data[0][1].key.split('_')[:5])
    
    if 'DIFF' in data[0][1].key:
        out_file = f'AwsData/{file}_NDVI_DIFF.jpg'
    else:
        out_file = f'AwsData/{file}_NDVI.jpg'
        
    jpgs = {}

    def get_window(data):
        ij_window, co_jpg = data
        row = ij_window[0][0]
        col = ij_window[0][1]
        jpg_stream = fexec.storage.get_cloudobject(co_jpg, stream=True)

        if row not in jpgs:
            jpgs[row] = [None]*11

        jpgs[row][col] = Image.open(jpg_stream)

    with ThreadPoolExecutor(max_workers=len(data)) as ex:
        fs = ex.map(get_window, data)

    # OJO CON EL SCENE_BAND PORQUE ESTO ESTA HECHO PARA 1 SOLA CELDA
    new_im = Image.new('RGB', (scene_band.width, scene_band.height))

    x_offset = 0
    y_offset = 0

    for row in sorted(jpgs.keys()):
        for im in jpgs[row]:
            new_im.paste(im, (x_offset, y_offset))
            x_offset += im.size[0]
        x_offset = 0
        y_offset += im.size[1]
        
    thumbnail_zise = (640, 640)
    new_im.thumbnail(thumbnail_zise)

    #fig = plt.figure(figsize=(10, 10))
    #plt.title(out_file)
    #plt.imshow(new_im)
    images[out_file] = new_im

In [None]:
co_jpgs_f1 = [(res[0], res[1]) for res in results]
co_jpgs_f2 = [(res[0], res[2]) for res in results]
co_jpgs_diff = [(res[0], res[3]) for res in results]

images = {}
with ThreadPoolExecutor(max_workers=3) as ex:
    fs = ex.map(get_jpg, [co_jpgs_f1, co_jpgs_f2, co_jpgs_diff])

f, ax = plt.subplots(1,3, figsize=(18, 18))
i = 0
for j in sorted(images.keys()):
    ax[i].set_title(j)
    ax[i].imshow(images[j])
    i = i+1
plt.show() 

## Delete temporal files

In [None]:
keys = storage.list_keys(bucket=BUCKET_NAME)

In [None]:
for key in keys:
    storage.delete_object(bucket=BUCKET_NAME, key=key)