In [2]:
%config IPCompleter.greedy=True
from tqdm import tqdm
import pandas as pd
import pygc
import openeo
from openeo.processes import ProcessBuilder
import ast
import os
from osgeo import gdal

In [2]:
main_datafile_path = "batch1.csv"

In [3]:
batch = pd.read_csv(main_datafile_path)
batch

Unnamed: 0.1,Unnamed: 0,code,geom,box
0,0,S_k_1,ge...,"{'lats': [-13.085938878464573, -13.08593821972..."
1,1,S_k_2,ge...,"{'lats': [-14.139638101050135, -14.13963738722..."
2,2,S_k_3,ge...,"{'lats': [-14.168238079189077, -14.16823736385..."
3,3,S_k_4,ge...,"{'lats': [-15.267837208631624, -15.26783643526..."
4,4,S_k_5,ge...,"{'lats': [-12.503239284731974, -12.50323865626..."
...,...,...,...,...
195,195,S_k_2339,ge...,"{'lats': [-19.79613303038875, -19.796132011205..."
196,196,S_k_2340,ge...,"{'lats': [-19.79433303223093, -19.794332013148..."
197,197,S_k_2341,ge...,"{'lats': [-19.38713344540091, -19.387132448930..."
198,198,S_k_2709,ge...,"{'lats': [-19.988632832578688, -19.98863180266..."


In [9]:
def latLonBoxByWandH(lat,lon,ew_width,ns_height):
    lats, lons = [], []
    #distance in m, az (in deg), lat (in deg), long (in deg)

    res = pygc.great_circle(distance=ew_width/2, azimuth=90, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']

    res = pygc.great_circle(distance=ns_height/2, azimuth=180, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)

    res = pygc.great_circle(distance=ew_width, azimuth=270, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)

    res = pygc.great_circle(distance=ns_height, azimuth=0, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)

    res = pygc.great_circle(distance=ew_width, azimuth=90, latitude=lat, longitude=lon)
    lat, lon = res['latitude'], res['longitude']
    lats.append(lat), lons.append(lon)
    
    return {'lats':lats,'lons':lons}

In [10]:
samplelist = batch.sample(n=100)

In [11]:
ew_width = 2000
ns_height = 2000

sitelist = samplelist['code'].tolist()
boxlist = samplelist['box'].tolist()

### Downloading Sentinel 2 files using AOI

In [3]:
connection = openeo.connect(url="https://openeo.dataspace.copernicus.eu/openeo/1.1")
connection

<Connection to 'https://openeo.dataspace.copernicus.eu/openeo/1.1' with NullAuth>

In [4]:
connection.authenticate_oidc()

Authenticated using refresh token.


<Connection to 'https://openeo.dataspace.copernicus.eu/openeo/1.1' with OidcBearerAuth>

In [61]:
t = ("2020-01-01", "2020-12-31")

In [62]:
# define child process, use ProcessBuilder
def scale_function(x: ProcessBuilder):
    return x.linear_scale_range(0, 6000, 0, 255)

In [63]:
for site, box in tqdm(zip(sitelist, boxlist), total = len(sitelist)):
    box = ast.literal_eval(box)
    
    spatial_extent = {
        "west": min(box["lons"]),
        "south": min(box["lats"]),
        "east": max(box["lons"]),
        "north": max(box["lats"]),
        "crs": "EPSG:4326",
    }
    
    s2_cube = connection.load_collection(
        "SENTINEL2_L2A",
        temporal_extent= t,
        spatial_extent = spatial_extent,
        bands=["B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B11", "B12", "SCL"],
        max_cloud_cover=50,
    )
        
    scl_band = s2_cube.band("SCL")
    cloud_mask = (scl_band == 3) | (scl_band == 8) | (scl_band == 9)
    
    cloud_mask = cloud_mask.resample_cube_spatial(s2_cube)
    cube_masked = s2_cube.mask(cloud_mask)
    s2_cube= cube_masked.mean_time()
    s2_cube = s2_cube.apply(scale_function)
    
    s2_cube.download(f"sent2_8bands/{site}_2017.tif")

  0%|          | 0/100 [00:00<?, ?it/s]

### Clipping AOI from CCI files

In [3]:
import rioxarray as rio
from tqdm import tqdm
from shapely.geometry import box
import geopandas as gpd

In [29]:
filelist = []
for filename in os.listdir('./ccidataset(2020)'):
    f = os.path.join('./ccidataset(2020)/', filename)
    # checking if it is a file
    if os.path.isfile(f):
        filelist.append(f)
filelist

['./ccidataset(2020)/S10E120_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S10E130_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S10E140_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S20E130_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S20E140_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S20E150_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S30E110_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S30E120_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S30E130_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S30E140_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S30E150_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S30E170_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff',
 './ccidataset(2020)/S40E140

In [20]:
da = rio.open_rasterio("./ccidataset(2020)/S10E140_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.tiff")
da

# batch1 = treecoords.loc[(treecoords["latitude"] <= -10) &(treecoords["latitude"] > -20)]
# batch2 = treecoords.loc[(treecoords["latitude"] <= -20) &(treecoords["latitude"] > -30)]
# batch3 = treecoords.loc[(treecoords["latitude"] <= -30) &(treecoords["latitude"] > -40)]
# batch4 = treecoords.loc[(treecoords["latitude"] <= -40) &(treecoords["latitude"] > -50)]

In [30]:
for i in filelist:
    da = rio.open_rasterio(i)
    for site, boxe in tqdm(zip(sitelist, boxlist), total = len(sitelist)):
        try:
            boxe = ast.literal_eval(boxe)
            geof = gpd.GeoDataFrame(
                geometry=[
                    box(min(boxe["lons"]), 
                        min(boxe["lats"]), 
                        max(boxe["lons"]), 
                        max(boxe["lats"])
                        )
                    ], 
                crs="EPSG:4326")
            # print(geof.geometry)
            clipped = da.rio.clip(geof.geometry.values, geof.crs, drop=True)
            # print(clipped)
            # clipped.plot()
        
            clipped.rio.to_raster(f"outputCCI/{site}_2017_agbm.tif", dtype="float32")
            # clipped.rio.to_raster("clipped_invert.tif", compress='LZMA', tiled=True, dtype="int32")
        except:
            continue

 36%|███▌      | 36/100 [00:26<00:47,  1.36it/s]

### Resizing Downloaded Files

In [9]:
# file resize
def resizefiles(directory, outdirectory):
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(f):
            # print(filename)
            file_path = directory + filename # change accordingly
            # print(file_path)
            output_path = outdirectory + filename # change accordingly
            # print(output_path)
            !gdal_translate -outsize 200 200 {file_path} {output_path}
            pass 

In [10]:
resizefiles("./outputCCI/", "./outputCCIresized/")

Input file size is 21, 21
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 24, 20
0...10...20...30...40...50...60...70...80...90...100 - done.


In [11]:
resizefiles("./sent2_8bands/", "./sent2_8bandsresized/")

Input file size is 203, 202
0...10...20...30...40...50...60...70...80...90...100 - done.




Input file size is 203, 202
0...10...20...30...40...50...60...70...80...90...100 - done.


