# Sentinel-2 Cloud Statistics

In [None]:
# Load Data Cube Configuration
# from odc_gee import earthengine
# dc = earthengine.Datacube(app='Cloud_Statistics')

# Import Data Cube API
# import utils.data_cube_utilities.data_access_api as dc_api  
# api = dc_api.DataAccessApi()

# Import Data Cube Utilities
import datacube
import sys, os
os.environ['USE_PYGEOS'] = '0'
from dea_tools.plotting import rgb, display_map
from dea_tools.bandindices import calculate_indices

### EASI tools
sys.path.append(os.path.expanduser('/home/jovyan/hub-notebooks/scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiNotebooks
import notebook_utils
easi = EasiNotebooks()

# Import Common Utilities
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))

In [None]:
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)

In [None]:
# Select a Product and Platform
product = "s2_l2a"

In [None]:
# Select a Latitude-Longitude point for the center of the analysis region
# Select the size of the box (in degrees) surrounding the center point

# Mombasa, Kenya
# lat_long = (-4.03, 39.62)
# box_size_deg = 0.15

# Calculate the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)

# Sydney Cricket Ground
# latitude = (-33.8951, -33.8902)
# longitude = (151.2219, 151.2276)

# Sydney, Australia
# latitude = (-34.039, -33.668)
# longitude = (150.867, 151.350)

# Suva, Fiji
# latitude = (-18.1725, -18.0492) 
# longitude = (178.3881, 178.5190) 

# An Giang Provence - Vietnam
# Test Region for EY Data Challenge
# SMALL RICE CROP AREA #23
# lat_long = (10.404, 105.236)
# box_size_deg = 0.005
# Calculate the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)

latitude = easi.latitude
longitude = easi.longitude

In [None]:
# Select a time range
# The inputs require a format (Min,Max) using this date format (YYYY-MM-DD)
# The Sentinel-2 allowable time range is: 2017-03-28 to current
time_extents = ('2018-01-01', '2018-12-31')

In [None]:
# Display the analysis region
# Click on the plot to get Lat-Lon coordinates to adjust the region
# Zoom in/out on the plot to move around the globe for other regions

display_map(longitude,latitude)

## Calculate cloud coverage percentage for each pixel 

In [None]:
# Create a custom cloud coverage table here

def build_cloud_coverage_table_sentinel(product,platform,latitude,longitude,
                                        time=None,dc=None,extra_band='green',extra_load_params={}):
    
    load_params = dict(product=product,latitude=latitude,
                       longitude=longitude,group_by='solar_day',measurements=[extra_band,'SCL'],**extra_load_params)
    
    if time is not None: 
        load_params["time"] = time
        
    geo_data = dc.load(**load_params)
    
    times = list(geo_data.time.values)
    dates = [dt.astype('datetime64[D]') for dt in geo_data.time.values]
    
    scene_slice_list = list(map(lambda t: geo_data.sel(time = t), times))
    
    nodata_mask_list = (geo_data.SCL.values == 0)
    
    cloud_mask_list = (geo_data.SCL.values == 1) | (geo_data.SCL.values == 3) | (geo_data.SCL.values == 8) | \
                      (geo_data.SCL.values == 9) | (geo_data.SCL.values == 10)
    
    clean_mask_list = (~nodata_mask_list & ~cloud_mask_list)
    
    clean_percent = [clean_mask.mean()*100 for clean_mask in clean_mask_list]
    cloud_percent = [cloud_mask.mean()*100 for cloud_mask in cloud_mask_list]
    nodata_percent = [nodata_mask.mean()*100 for nodata_mask in nodata_mask_list]
    
    clean_count = list(map(np.sum, clean_mask_list))
    total_count = list(map(np.sum, ~nodata_mask_list))
    
#     data = {"Dates": dates,
#             "clean_percentage": percentage_list,
#             "clean_count": clean_pixel_count_list }
    data = {"Date": dates,"Clean_percent": clean_percent,"Cloud_percent": cloud_percent,
            "NoData_percent": nodata_percent,"Clean_count": clean_count,"Total_count": total_count}
    
    return geo_data, pd.DataFrame(data=data, columns=list(data.keys()))

In [None]:
dc = datacube.Datacube()

In [None]:
# Load the data and calculate the cloud coverage for each time slice
sentinel_dataset, coverage_table = build_cloud_coverage_table_sentinel(product = product,
                                                                       platform = "SENTINEL-2",
                                                                       latitude = latitude,
                                                                       longitude = longitude,
                                                                       time = time_extents,
                                                                       dc = dc,
                                                                       extra_band = 'green',
                                                                       extra_load_params={
                                                                         'output_crs':'EPSG:6933',
                                                                         'resolution': (-10,10),
                                                                         'skip_broken_datasets': True,
                                                                         'dask_chunks': {'time':1}
                                                                       })

## Create a table of cloud coverage percentage for each date

This table displays data for each time slice in the cube (starting at an index=0). The "clean percent" is the percent of pixels WITHOUT clouds. So, low numbers are cloudy scenes and high numbers are clearer scenes. The "Clean_count" is the number of clear pixels in the scene and the "Total_count" is the total number of pixels in the scene.

Typically, there is a separation of 5 days between Sentinel-2 scenes for a single location. This considers the availability of two missions (A and B) which is the case for most places in the world. When there is significant cloud cover, scenes may be missing from time series due to issues with processing and geolocation. 

In [None]:
pd.set_option('display.max_rows', len(coverage_table))
coverage_table

## Create a plot of cloud coverage percentage for each date

In [None]:
plt.figure(figsize = (15,5))
plt.plot(coverage_table["Date"].values, coverage_table["Clean_percent"].values, 'bo', markersize=8)
plt.ylim([0, 105])
plt.show()

## Review an RGB scene for a selected time slice

In [None]:
# Load the data to create an RGB image
sentinel_dataset = dc.load(like=sentinel_dataset,
                           product='s2_l2a',
                           measurements = ['red', 'green', 'blue', 'nir', 'swir_1', 'swir_2'],
                           dask_chunks={'time':1}) 

In [None]:
# Select one of the time slices and create an RGB image. 
# Time slices are numbered from 0 to x and shown in the table above
# Review the clean_percentage values above to select scenes with few clouds
# Clouds will be visible in WHITE for an RGB image

# RGB image options
# True-Color RGB = Red, Green, Blue
# False Color RGB (Mosaic) = SWIR2, NIR, Green

slice = 0

fig, ax = plt.subplots(1, 2, figsize=(16, 8))
true_rgb = sentinel_dataset.isel(time=slice)[['red', 'green', 'blue']].to_array()
false_rgb = sentinel_dataset.isel(time=slice)[['swir_2', 'nir', 'green']].to_array()
true_rgb.plot.imshow(ax=ax[0], vmin=0, vmax=2000)
false_rgb.plot.imshow(ax=ax[1], vmin=0, vmax=5000)
ax[0].set_title('True Color'), ax[0].xaxis.set_visible(False), ax[0].yaxis.set_visible(False)
ax[1].set_title('False Color'), ax[1].xaxis.set_visible(False), ax[1].yaxis.set_visible(False)
plt.show()