##### useful stuff
- [export function](https://gis.stackexchange.com/questions/327839/export-multiple-images-in-one-task-gee-python-api)

In [None]:
%config Completer.use_jedi = False

In [None]:
import cv2
import ee
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from uuid import uuid4
from tqdm import tqdm

In [None]:
ee.Authenticate()

In [None]:
ee.Initialize()

In [None]:
def get_center_from_image_info(im_info):
    coords = im_info['properties']['system:footprint']['coordinates']
    #bottom-left
    bl = sorted(coords, key = lambda y: (y[0], y[1]))[0]
    tr = sorted(coords, key = lambda y: (y[0], y[1]))[-1]
    br = sorted(coords, key = lambda y: (y[1], y[0]))[0]
    tl = sorted(coords, key = lambda y: (y[1], y[0]))[-1]
    c = np.mean([bl ,tr, br, tl], axis=0 )
    return c

def compute_bands_perc(im, perc):
    min_perc = im.select(['B4', 'B3', 'B2']).reduceRegion(
        reducer=ee.Reducer.percentile([perc]), 
        geometry=im.geometry(),
        bestEffort=True).getInfo()
    max_perc = im.select(['B4', 'B3', 'B2']).reduceRegion(
        reducer=ee.Reducer.percentile([100-perc]), 
        geometry=im.geometry(),
        bestEffort=True).getInfo()
    perc = {'min': min_perc, 'max': max_perc}
    return perc

def compute_viz_params(im, perc=2):
    bands_perc = compute_bands_perc(im, perc)
    band_min = np.min(list(bands_perc['min'].values()))
    band_max = np.max(list(bands_perc['max'].values()))
    viz_params = {
        'bands':ee.List(["B4", "B3", "B2"]),
        'min': band_min,
        'max': band_max}
    return viz_params

def compute_bands_sigma(im, n_std=1, keys=['B4', 'B3', 'B2']):
    mean = im.select(keys).reduceRegion(
        reducer=ee.Reducer.mean(), 
        geometry=im.geometry(),
        bestEffort=True).getInfo()
    std = im.select(keys).reduceRegion(
        reducer=ee.Reducer.stdDev(), 
        geometry=im.geometry(),
        bestEffort=True).getInfo()
    mins = {k: mean[k] - std[k] * n_std for k in keys}
    maxs = {k: mean[k] + std[k] * n_std for k in keys}
    return {'min': mins, 'max': maxs}

def compute_viz_params_sigma(im, sigma=1):
    bands_perc = compute_bands_sigma(im, sigma)
    band_min = np.min(list(bands_perc['min'].values()))
    band_max = np.max(list(bands_perc['max'].values()))
    viz_params = {
        'bands':ee.List(["B4", "B3", "B2"]),
        'min': band_min,
        'max': band_max}
    return viz_params

def extend_point_to_square(p, d=1):
    rect = []
    for i in [1, -1]:
        for j in [1, -1]:
            rect.append([p[0] + i, p[1] + j])
    rect.append([p[0] + 1, p[1] + 1])
    return rect

def list_square_to_polygon(rect):
    geomtry_rect = ee.Geometry.Polygon(ee.List([ee.List(x) for x in rect]))
    return geomtry_rect

## Single sample test

## Iterate over list of places

### missing
- filter by dates: if there are already 1/2 pictures closer to that point skip all the others.
- filter by position: if there are closes matches, skip the frames

In [None]:
def no_samples_exception(e):
    return 'must be positive. Got: 0.' in str(e)

def compute_min_distance_to_samples(im_cent, df, key='center_coords'):
    if len(df):
        d = df[key].apply(lambda x: np.linalg.norm(x-im_cent))
        return np.min(d)
    else:
        return np.inf

def filter_unique_image_id_for_list(col_list_info):
    added_sat = []
    added_id = []
    added_i = []    
    for i, c in enumerate(col_list_info):
        im_sat_id = c['id'].split('/')[-1].split('_')[1]
        if im_sat_id not in added_sat:
            added_id.append(c)
            added_sat.append(im_sat_id)
            added_i.append(i)
    print(f' filtered unique sats{len(added_id)}/{len(col_list_info)}')
    return added_i, added_id, added_sat
    
    
def check_if_downloaded(download_path, s_id):
    downloaded_ids = [x.split('.')[0] for x in os.listdir(download_path)]
#     print(f'total_downloaded:         {len(downloaded_ids)}')    
#     print(f'total_downloaded in proc: {np.sum(is_downloaded)}')        
#     is_downloaded = [s.name in downloaded_ids for _, s in df.iterrows()]
#     return is_downloaded, downloaded_ids
    is_downloaded = s_id in downloaded_ids
    return is_downloaded

class MaxExports(Exception):
    def __init__(self, message="Max exports reached"):
        self.message = message
        super().__init__(self.message)
    

In [None]:
download_path = '/data/datasets/earth_images/raw_sea/'
proc_data_f_name = './proc_data_sea.json'
proc_data = pd.DataFrame() if not os.path.exists(proc_data_f_name) else pd.read_json(proc_data_f_name)

In [None]:
proc_data

In [None]:
check_dowloaded = [check_if_downloaded(download_path, x.name) for _, x in proc_data.iterrows()]
total_downloaded = np.sum(check_dowloaded)
print(f'downloaded : {total_downloaded}/{len(proc_data)}')

In [None]:
proc_data = proc_data[check_dowloaded]
proc_data.shape
proc_data.to_json(proc_data_f_name)

In [None]:
# queries = pd.read_csv('./bigqueries/bq-pacific-results-20210528-152231-yl0b4vrxmse0.csv') # done!
# queries = pd.read_csv('./bigqueries/bq-south-atlantic-results-20210528-153242-fsdvmae1sfzm.csv') # done
# queries = pd.read_csv('./bigqueries/bq-indic-results-20210528-152727-wskd3lmyrlgb.csv') # done
queries = pd.read_csv('./bigqueries/bq-north-atlantic-results-20210530-115218-hw0b5ee061u5.csv') # done
# queries.sort_values('cloud_cover')
# queries.sort_values(['wrs_path', 'wrs_row'])

In [None]:
sea_filteded = []
unique_path_row = queries[['wrs_path', 'wrs_row']].drop_duplicates()
for pr in unique_path_row.values:
    fq = queries[(queries['wrs_path']==pr[0]) & (queries['wrs_row']==pr[1])].sort_values('cloud_cover')
    sea_filteded.append(fq.iloc[0])
sea_filteded = pd.DataFrame(sea_filteded).reset_index(drop=True)    

In [None]:
sea_filteded.shape

In [None]:
# for _, entry in sea_filteded.iterrows():
#     pid = entry['product_id']
#     print(pid)
#     pids = pid.split('_')
#     im_full_id = f'LANDSAT/LC08/C{pids[5]}/{pids[6]}/LC08_{pids[2]}_{pids[3]}'
#     print(im_full_id)

In [None]:
im_scale = 30 # landstat 8 has 30
max_total_exports = 50
min_distance_neigh_thresh = 0.02
export_max_pixels = 1e10
export_folder = 'earthengine/python'

In [None]:
counter = 0
try:
    for _, entry in sea_filteded.iterrows():
        pid = entry['product_id']
        print(pid)
        pids = pid.split('_')
        im_full_id = f'LANDSAT/LC08/C{pids[5]}/{pids[6]}/LC08_{pids[2]}_{pids[3]}'
        print(im_full_id)
        try:
            im = ee.Image(im_full_id);
            im_info = im.getInfo()
            im_cent = get_center_from_image_info(im_info)

            # add here to look for if there is something close, and if not add it.
            coords = im_info['properties']['system:footprint']['coordinates']
            im_id = im.id().getInfo()
            min_dist_to_nei = compute_min_distance_to_samples(im_cent, proc_data, 'center_coords')
            is_downloaded = check_if_downloaded(download_path, im_id)
            print(f'{im_id} is downloaded: {is_downloaded}')
            if min_dist_to_nei >= min_distance_neigh_thresh and not is_downloaded:
                print(f'{im_id} distance check passed')
                im_coords = im_info['properties']['system:footprint']['coordinates']
                im_roi = ee.Geometry.Polygon(ee.List([ee.List(x) for x in im_coords]))
                viz_params = compute_viz_params_sigma(im, sigma=1)
                im_ready = im.visualize(**viz_params)

                # preview
                # im_ready.getThumbURL(dimensions='512x512')

                export_task = ee.batch.Export.image.toDrive(
                    im_ready,
                    description=im_id,        
                    scale=im_scale,
                    region=im_roi,
                    maxPixels=export_max_pixels,
                    folder=export_folder,
                )
                export_task.start()
                export_status = export_task.status()

                sample = {
                    'id': im_id,
                    'center_coords': im_cent,
                    'folder': export_folder,
                    'min_distance_to_neighbour': min_dist_to_nei,
                    'export_status': export_status, 
                    **im_info['properties']}
                proc_data = proc_data.append(pd.Series(sample, name=im_id))
                proc_data.to_json(proc_data_f_name)
                counter +=1

                if counter >= max_total_exports:
                    raise MaxExports
                print(f' counter: {counter}')
        except Exception as e:
            if no_samples_exception(e):
                print('no samples for params:')
                print(str(e))    
            if 'Image.load: Image asset' in str(e):
                print('image not found!')
            else:
                raise e
        print('_________')
except MaxExports:
    print('max exports reached, done!')

                