# Patch downloading

This notebook contains the code to download the training patches. The steps are the followings:

1. Filter the leak data to only contain leaks that were present in the main distribution pipe. 
2. Set the coordinates of the leak to the right CRS in order to be compatible with GEE.
3. Define the functions for the download task: 
    - `get_image`: Get the collection of images that contains the leak coordinate and were captured before leak repair.
    - `bands_clip_image`: Select the bands that will be downloaded and the patch size (in meters).
    - `download_image`: Download and write the patch in the folder named patches_raw.
4. Download the patches. Each patch is downloaded in a zip file which contains one tif file per band. 

In [1]:
import pandas as pd
from datetime import timedelta, datetime
import ee
import pyproj
import requests
import winsound

In [2]:
# Read leak data 
leaks_clean = pd.read_csv("../data/clean/leaks20-22_clean.csv")

## Functions to download patches

In [26]:
# Get image from image collection
def get_image(start, end, poi_leak):
    
    if isinstance(start, str) == False:
        start = str(start)
        end = str(end)
    
    collection = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")\
    .filterBounds(poi_leak)\
    .filterDate(ee.Date(start), ee.Date(end))\
    .sort("system:time_start", opt_ascending = False)\
    .sort("CLOUDY_PIXEL_PERCENTAGE")

    img = collection.first()

    print("Date of selected image (Sentinel): ", ee.Date(img.get("system:time_start")).format("yyyy-MM-dd").getInfo(),
          "\nSentinel images found:", collection.size().getInfo(),
          "\nCloud %: ", img.get("CLOUDY_PIXEL_PERCENTAGE").getInfo()) 
    
    return img  

In [4]:
# Define the bands to select and the patch size (radius in meters respect to leak point)
def bands_clip_image(image, buffer_size = 100, bands = ["B4", "B3", "B2"]):
    # Clip image
    image = image.clip(poi.buffer(buffer_size).bounds(proj = "EPSG:32613", maxError = 0.001))

    # Select bands
    image = image.select(bands)

    return image

In [5]:
# Download patch image
def download_image(image, path, date_label):
    url = image.getDownloadURL(
        {
        "scale": 10,
        "crs": "EPSG:32613",
        "fileFormat": "GeoTIFF",
        "maxPixels": 1e13
        }
    )

    r = requests.get(url, allow_redirects = True)
    open(path + "S2" + "_" + date_label + ".zip", "wb").write(r.content)
    print("Download complete")

## Functions to estimate LST 10m band

In [16]:
def get_landsat_images(start, end, poi_leak):

    if isinstance(start, str) == False:
        start = str(start)
        end = str(end)

    selected_Landsat_collection = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")\
                                    .filterBounds(poi_leak)\
                                    .filterDate(ee.Date(start), ee.Date(end))\
                                    .sort("CLOUD_COVER")

    img_landsat = selected_Landsat_collection.first()

    print("Date of selected image (Landsat):", ee.Date(img_landsat.get("system:time_start")).format("yyyy-MM-dd").getInfo(),
        "\nLandsat images found: ", selected_Landsat_collection.size().getInfo(),
        "\nCloud %: ", img_landsat.get("CLOUD_COVER").getInfo())
    
    if selected_Landsat_collection.size().getInfo() == 0:
        print("THERES NO LANDSAT IMAGES FOR THE SELECTED DATES")

    return img_landsat

$LST_{10m} = \beta_0 + \beta_1 NDVI_{10m} + \beta_2 NDBI_{10m} + \beta_3 NDWI_{10m}$

where: 

$\beta_0, \beta_1, \beta_2, \beta_3$ are estimators calculated with the following linear regression model.

$LST_{30m} = \beta_0 + \beta_1 NDVI_{30m} + \beta_2 NDBI_{30m} + \beta_3 NDWI_{30m}$


In [27]:
# Create compund bands in order to perform linear regression
def lst_regression(landsat_image, sentinel_image, buffer_size, residuals = True):
    ndvi_landsat = landsat_image.normalizedDifference(["SR_B5", "SR_B4"]).rename("ndvi")
    ndwi_landsat = landsat_image.normalizedDifference(["SR_B3", "SR_B5"]).rename("ndwi")
    ndbi_landsat = landsat_image.normalizedDifference(["SR_B6", "SR_B5"]).rename("ndbi")
    lst_landsat_30m = landsat_image.select("ST_B10").rename("Landsat_LST_30m")

    ndvi_sentinel = sentinel_image.normalizedDifference(["B8", "B4"]).rename("s2_ndvi")
    ndwi_sentinel = sentinel_image.normalizedDifference(["B3", "B11"]).rename("s2_ndwi")
    ndbi_sentinel = sentinel_image.normalizedDifference(["B11", "B8"]).rename("s2_ndbi")


    # Linear regression
    bands = ee.Image(1).addBands(ndvi_landsat).addBands(ndbi_landsat).addBands(ndwi_landsat).addBands(lst_landsat_30m).rename(["constant", "ndvi", "ndbi", "ndwi", "lst"])

    img_landsat_regression = bands.reduceRegion(
        reducer = ee.Reducer.linearRegression(4, 1),
        geometry = poi.buffer(buffer_size).bounds(proj = "EPSG:32613", maxError = 0.001),
        scale = 30,
        maxPixels = 1e13
    )

    # Get coefficients of linear regression
    coefList2 = ee.Array(img_landsat_regression.get("coefficients")).toList()
    intercept2 = ee.Image(ee.Number(ee.List(coefList2.get(0)).get(0))).reproject(crs = "EPSG:32613")
    intercept2_list = ee.List(coefList2.get(0)).get(0)
    slopeNDVI2 = ee.Image(ee.Number(ee.List(coefList2.get(1)).get(0))).reproject(crs = "EPSG:32613")
    slopeNDVI2_list =  ee.List(coefList2.get(1)).get(0)
    slopeNDBI2 = ee.Image(ee.Number(ee.List(coefList2.get(2)).get(0))).reproject(crs = "EPSG:32613")
    slopeNDBI2_list =  ee.List(coefList2.get(2)).get(0)
    slopeNDWI2 = ee.Image(ee.Number(ee.List(coefList2.get(3)).get(0))).reproject(crs = "EPSG:32613")
    slopeNDWI2_list =  ee.List(coefList2.get(3)).get(0)

    # Downscale image
    lst_landsat_10m1 = ee.Image(intercept2).add(slopeNDVI2.multiply(ndvi_sentinel)).add(slopeNDBI2.multiply(ndbi_sentinel).add(slopeNDWI2.multiply(ndwi_sentinel)))

    # Create model 
    LST_model = intercept2.add(slopeNDVI2.multiply(ndvi_landsat))\
                .add(slopeNDBI2.multiply(ndbi_landsat))\
                .add(slopeNDWI2.multiply(ndwi_landsat)).clip(poi.buffer(buffer_size).bounds(proj = "EPSG:32613", maxError = 0.001))

    # Get residuals
    residuals = lst_landsat_30m.subtract(LST_model)

    # Define gaussian kernel in order to perform convolution for image smoothness
    gaussian = ee.Kernel.gaussian(radius = 1.5, units = "pixels")

    # Apply convolution 
    residuals_gaussian = residuals.resample("bicubic").convolve(gaussian)

    # Final downscaled image
    lst_landsat_10m_final = ee.Image(intercept2).add(slopeNDVI2.multiply(ndvi_sentinel))\
                                                .add(slopeNDBI2.multiply(ndbi_sentinel)).add(slopeNDWI2.multiply(ndwi_sentinel))

    # Get final downscaled image with residuals
    lst_landsat_10m_final_residuals = lst_landsat_10m_final.add(residuals_gaussian)

    if residuals == True:
        return lst_landsat_10m_final_residuals.clip(poi.buffer(buffer_size).bounds(proj = "EPSG:32613", maxError = 0.001))
    else: 
        return lst_landsat_10m_final.clip(poi.buffer(buffer_size).bounds(proj = "EPSG:32613", maxError = 0.001))

In [8]:
def add_lst10m_band(sentinel_image, lst_image):
    
    return sentinel_image.addBands(lst_image.rename("LST_10m"))

## Automate Download for all the leaks

In [9]:
# Initialize GEE API
ee.Initialize()

In [10]:
leaks_clean.columns

Index(['Unnamed: 0', 'id', 'fecha_de_i', 'fecha_fina', 'fechaasign',
       'fechalegal', 'nombreunid', 'codestado', 'codtrabajo', 'actividad',
       'barrio', 'com1', 'com2', 'causal', 'orden', 'solicitud', 'cliente',
       'producto', 'consecruta', 'codruta', 'x', 'y'],
      dtype='object')

### Downloas leak patches

In [32]:
bands = ["B4", "B3", "B2", "B1", "B5", "B6", "B7", "B8", "B8A", "B9", "B11", "B12", "WVP"]
i = 0
download_path = "../data/patches_raw/leak/"
patch_size = 100
index_progress = []
id = []
sentinel_img_date = []
landsat_img_date = []
date_diff = []
date_diff_leak = []
sentinel_cloud = []
landsat_cloud = []

prev_rep = 0
for rep in [100, 200, 300, 400, 500, 600, 700, 800, 900, 985]:

    for leak in range(prev_rep, rep):
        print("="*100)
        print("Leak index: ", leak)
        date_leak = datetime.strptime(leaks_clean.fechalegal[leak], "%Y-%m-%d").date()
        id_leak = leaks_clean.id[leak]
        id.append(id_leak)
        
        # Dates for patch

        end_date_landsat = date_leak - timedelta(days = 5)
        start_date_landsat = end_date_landsat - timedelta(days = 25)    

        print("Date of leak: ", date_leak)

        if leak == 0:
            previous_end_date = ""

        if previous_end_date == date_leak:
            i += 1
            print("Leak detected at the same date")
        else:
            i = 0

        # Date for zip label
        date_lab = "i" + str(int(id_leak)) + "d" + "_" + str(leaks_clean.fechalegal[leak])[:10] + "_" + str(leak)
    
        # Coords of leaks
        leak_lat_ogcrs = leaks_clean["y"][leak]
        leak_lon_ogcrs = leaks_clean["x"][leak]

        # Transform to degrees
        transformer = pyproj.Transformer.from_crs("epsg:32613", "epsg:4326")
        leak_lon, leak_lat = transformer.transform(leak_lon_ogcrs, leak_lat_ogcrs)

        # Point of leak
        poi = ee.Geometry.Point([leak_lat, leak_lon])
 
        print("Leak coords: ", (leak_lat, leak_lon), "\n")

        # Get sentinel and landsat collection of images according to leak coord 
        img_landsat = get_landsat_images(start = start_date_landsat, end = end_date_landsat, poi_leak = poi)
        landsat_date = ee.Date(img_landsat.get("system:time_start")).format("yyyy-MM-dd").getInfo()
        landsat_date = datetime.strptime(landsat_date, "%Y-%m-%d").date()

        # Sentinel image is selected after using landsat image date in order to get the minimun time between images
        end_date_sentinel = landsat_date
        start_date_sentinel = end_date_sentinel - timedelta(days = 5)    

        img = get_image(start = start_date_sentinel, end = end_date_sentinel, poi_leak = poi)
        sentinel_date = ee.Date(img.get("system:time_start")).format("yyyy-MM-dd").getInfo()
        sentinel_date = datetime.strptime(sentinel_date, "%Y-%m-%d").date()
        
        # Append date of images
        sentinel_img_date.append(sentinel_date)
        landsat_img_date.append(landsat_date)

        # Get difference of days between dates
        date_diff.append(abs(sentinel_date - landsat_date))
        date_diff_leak.append(sentinel_date - date_leak)
        print("Difference between image dates:", abs(sentinel_date - landsat_date))
        print("Difference between date of leak and date of image:" , abs(sentinel_date - date_leak))

        # Append cloud %
        sentinel_cloud.append(img.get("CLOUDY_PIXEL_PERCENTAGE").getInfo())
        landsat_cloud.append(img_landsat.get("CLOUD_COVER").getInfo())

        # Select bands and reduce the hole image to a patch centered on the leak
        img = bands_clip_image(img, buffer_size = patch_size, bands = bands)

        # Estimate lst from landsat image and add its lst band to sentinel image
        lst_image = lst_regression(landsat_image = img_landsat, sentinel_image = img, buffer_size = patch_size, residuals = True)
        img = add_lst10m_band(img, lst_image = lst_image)

        # Download image
        download_image(image = img, path = download_path, date_label = date_lab)

        previous_end_date = date_leak

        index_progress.append(leak)

    print("+"*100)
    print("PATCH DOWNLOAD COMPLETED", "Batch:", rep)
    prev_rep = rep
    winsound.Beep(2000, 1000)
    if prev_rep == 985:
        break


Leak index:  0
Date of leak:  2020-01-07
Leak coords:  (-102.25322875767611, 21.885447715909184) 

Date of selected image (Landsat): 2019-12-13 
Landsat images found:  2 
Cloud %:  0.09
Date of selected image (Sentinel):  2019-12-09 
Sentinel images found: 2 
Cloud %:  0.150207
Difference between image dates: 4 days, 0:00:00
Difference between date of leak and date of image: 29 days, 0:00:00
Download complete
Leak index:  1
Date of leak:  2020-01-13
Leak coords:  (-102.27553239136687, 21.877245545501356) 

Date of selected image (Landsat): 2020-01-05 
Landsat images found:  2 
Cloud %:  1.89
Date of selected image (Sentinel):  2020-01-03 
Sentinel images found: 3 
Cloud %:  0.055907
Difference between image dates: 2 days, 0:00:00
Difference between date of leak and date of image: 10 days, 0:00:00
Download complete
Leak index:  2
Date of leak:  2020-01-15
Leak coords:  (-102.41360879741895, 21.717590021696104) 

Date of selected image (Landsat): 2020-01-05 
Landsat images found:  2 
Clo

### Download non leak patches

In [None]:
bands = ["B4", "B3", "B2", "B1", "B5", "B6", "B7", "B8", "B8A", "B9", "B11", "B12", "WVP"]
i = 0
download_path = "../data/patches_raw/non_leak/"
patch_size = 100

index_progress_nonleak = []
id_nonleak = []
sentinel_img_date_nonleak = []
landsat_img_date_nonleak = []
date_diff_nonleak = []
date_diff_leak_nonleak = []
sentinel_cloud_nonleak = []
landsat_cloud_nonleak = []

prev_rep = 0
for rep in [100, 200, 300, 400, 500, 600, 700, 800, 900, 985]:

    for leak in range(prev_rep, rep):
        print("="*100)
        print("Leak index: ", leak)
    
        date_leak = datetime.strptime(leaks_clean.fechalegal[leak], "%Y-%m-%d").date()
        id_leak = leaks_clean.id[leak]
        id_nonleak.append(id_leak)
   
        # Dates for patch after leak
        end_date_landsat = date_leak + timedelta(days = 25)
        start_date_landsat = end_date_landsat - timedelta(days = 20)    
    
        print("Date of leak: ", date_leak)

        if leak == 0:
            previous_end_date = ""

        if previous_end_date == date_leak:
            i += 1
            print("Leak detected at the same date")
        else:
            i = 0

        # Date for zip label
        date_lab = "i" + str(int(id_leak)) + "d" + "_" + str(leaks_clean.fechalegal[leak])[:10] + "_" + str(leak)
    
        # Coords of leaks
        leak_lat_ogcrs = leaks_clean["y"][leak]
        leak_lon_ogcrs = leaks_clean["x"][leak]

        # Transform to degrees
        transformer = pyproj.Transformer.from_crs("epsg:32613", "epsg:4326")
        leak_lon, leak_lat = transformer.transform(leak_lon_ogcrs, leak_lat_ogcrs)

        # Point of leak
        poi = ee.Geometry.Point([leak_lat, leak_lon])
 
        print("Leak coords: ", (leak_lat, leak_lon), "\n")

        # Get sentinel and landsat collection of images according to leak coord 
        img_landsat = get_landsat_images(start = start_date_landsat, end = end_date_landsat, poi_leak = poi)
        landsat_date = ee.Date(img_landsat.get("system:time_start")).format("yyyy-MM-dd").getInfo()
        landsat_date = datetime.strptime(landsat_date, "%Y-%m-%d").date()

        # Sentinel image is selected after using landsat image date in order to get the minimun time between images
        end_date_sentinel = landsat_date
        start_date_sentinel = end_date_sentinel - timedelta(days = 5)    

        img = get_image(start = start_date_sentinel, end = end_date_sentinel, poi_leak = poi)
        sentinel_date = ee.Date(img.get("system:time_start")).format("yyyy-MM-dd").getInfo()
        sentinel_date = datetime.strptime(sentinel_date, "%Y-%m-%d").date()

        # Append date of images
        sentinel_img_date_nonleak.append(sentinel_date)
        landsat_img_date_nonleak.append(landsat_date)
    
        # Get difference of days between dates
        date_diff.append(abs(sentinel_date - landsat_date))
        date_diff_leak.append(sentinel_date - date_leak)
        print("Difference between image dates:", abs(sentinel_date - landsat_date))
        print("Difference between date of leak and date of image:" , abs(sentinel_date - date_leak))

        # Append cloud %
        sentinel_cloud_nonleak.append(img.get("CLOUDY_PIXEL_PERCENTAGE").getInfo())
        landsat_cloud_nonleak.append(img_landsat.get("CLOUD_COVER").getInfo())
    
        # Select bands and reduce the hole image to a patch centered on the leak
        img = bands_clip_image(img, buffer_size = patch_size, bands = bands)

        # Estimate lst from landsat image and add its lst band to sentinel image
        lst_image = lst_regression(landsat_image = img_landsat, sentinel_image = img, buffer_size = patch_size, residuals = True)
        img = add_lst10m_band(img, lst_image = lst_image)

        # Download image
        download_image(image = img, path = download_path, date_label = date_lab)

        previous_end_date = date_leak

    prev_rep = rep
    winsound.Beep(2000, 1000)
    if prev_rep == 985:
        break

In [33]:
# Add all the info from download to leaks_clean df

leaks_clean["id_download"] = id
leaks_clean["sentinel_img_date"] = sentinel_img_date
leaks_clean["landsat_img_date"] = landsat_img_date
leaks_clean["date_diff_img"] = date_diff
leaks_clean["date_diff_leak"] = date_diff_leak
leaks_clean["sentinel_cloud"] = sentinel_cloud
leaks_clean["landsat_cloud"] = landsat_cloud

#leaks_clean["id_download_nonleak"] = id_nonleak
#leaks_clean["sentinel_img_date_nonleak"] = sentinel_img_date_nonleak
#leaks_clean["landsat_img_date_nonleak"] = landsat_img_date_nonleak
#leaks_clean["date_diff_img_nonleak"] = date_diff_nonleak
#leaks_clean["date_diff_leak_nonleak"] = date_diff_leak_nonleak
#leaks_clean["sentinel_cloud_nonleak"] = sentinel_cloud_nonleak
#leaks_clean["landsat_cloud_nonleak"] = landsat_cloud_nonleak

In [34]:
leaks_clean.to_csv("../data/clean/leaks_clean.csv")