In [31]:
import os
from osgeo import gdal, ogr, osr
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

gdal.UseExceptions()

In [69]:
os.environ['PROJ_DATA']='/srv/conda/envs/env_visual/share/proj/' # os.environ["GDAL_DATA"] = "/opt/conda/envs/env_label/share/gdal"
os.environ['GDAL_DATA']='/srv/conda/envs/env_visual/share/gdal/'

In [32]:
import pystac
from pystac import Link, Asset
from datetime import datetime

In [33]:
from pystac.extensions.label import LabelExtension
from pystac.extensions.label import LabelType
from pystac.extensions.label import LabelClasses
from pystac.extensions.label import LabelStatistics

from pystac.extensions.version import ItemVersionExtension

In [35]:
# import matplotlib.pyplot as plt
# import matplotlib.colors as colors

In [5]:
def pixel_to_coords(source, x, y):
    """Returns global coordinates in EPSG:4326 from pixel x, y coords"""

    geo_transform = source.GetGeoTransform()

    x_min = geo_transform[0]
    x_size = geo_transform[1]
    y_min = geo_transform[3]
    y_size = geo_transform[5]
    px = x * x_size + x_min
    py = y * y_size + y_min

    srs = osr.SpatialReference()
    srs.ImportFromWkt(source.GetProjection())

    srs_4326 = srs.CloneGeogCS()
    ct = osr.CoordinateTransformation(srs, srs_4326)

    long, lat, _ = ct.TransformPoint(px, py)

    return long, lat

## Read STAC Item

In [57]:
aws_url = "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_10TFK_20210713_0_L2A"

In [58]:
item = pystac.read_file(aws_url)
display(item.properties)

{'datetime': '2021-07-13T19:03:24Z',
 'platform': 'sentinel-2b',
 'constellation': 'sentinel-2',
 'instruments': ['msi'],
 'gsd': 10,
 'view:off_nadir': 0,
 'proj:epsg': 32610,
 'sentinel:utm_zone': 10,
 'sentinel:latitude_band': 'T',
 'sentinel:grid_square': 'FK',
 'sentinel:sequence': '0',
 'sentinel:product_id': 'S2B_MSIL2A_20210713T184919_N0301_R113_T10TFK_20210713T213143',
 'sentinel:data_coverage': 100,
 'eo:cloud_cover': 0,
 'sentinel:valid_cloud_cover': True,
 'created': '2021-07-13T23:57:53.846Z',
 'updated': '2021-07-13T23:57:53.846Z'}

In [59]:
print(f'Available bands: {list(item.assets.keys())}')

Available bands: ['thumbnail', 'overview', 'info', 'metadata', 'visual', 'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'AOT', 'WVP', 'SCL']


## Get `SCL` and `B04` bands

In [None]:
# scl_href = item.assets['SCL'].href
# b4_href = item.assets['B04'].href

In [61]:
for band in ['SCL', 'B04']:
    # Extract bands
    b_href = item.assets[band].href

    # Get gdal object
    b_g = gdal.Open(b_href)
    scl_rst = scl_g.GetRasterBand(1)

    # Info about the image
    scl_arr = scl_rst.ReadAsArray()
    print('Shape:', np.shape(scl_arr))
    n_cl = len(np.unique(scl_arr))
    print(f'Number of LC classes: {n_cl}')

In [66]:
band = 'SCL'

In [68]:
print('band:', band)

# Extract band
b_href = item.assets[band].href
print('href:', b_href)

# Get gdal object
b_g = gdal.Open(b_href)
b_rst = scl_g.GetRasterBand(1)

# Get array
b_arr = scl_rst.ReadAsArray()
print('Shape:', np.shape(b_arr))
n_cl = len(np.unique(b_arr))
print(f'Number of LC classes: {n_cl}')

band: SCL
href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2021/7/S2B_10TFK_20210713_0_L2A/SCL.tif
Shape: (5490, 5490)
Number of LC classes: 11


In [9]:
pixel_to_coords(scl_g, 0, np.shape(scl_arr)[0])
# pixel_to_coords(scl_g, 861, 5391)

(-121.83433447992233, 39.65575725226219)

In [10]:
# Generate 
np.random.seed(42) # keep this fixed 
xy = np.random.randint(1, np.shape(scl_arr)[0], size=(500, 2))
xy[:5]

array([[ 861, 5391],
       [5227, 5192],
       [3773, 3093],
       [ 467, 5335],
       [4427, 3445]])

In [11]:
x_values = []
y_values = []

# note that this will become a function, and as main paramtere will be source_g which is the gdal object of my input tif image  

for pos in xy:
    
    x_values.append([*pixel_to_coords(source_g, pos[0], pos[1])])

    y_values.append(
        int(
            scl_rst.ReadAsArray(
                xoff=int(pos[0]), yoff=int(pos[1]), win_xsize=1, win_ysize=1
            )[0][0]
        )
    )
print(x_values[:10])
print(y_values[:10])

[[-121.63330522527042, 39.6714049509491], [-120.61464030376956, 39.69079991941494], [-120.94227874261767, 40.07526633324874], [-121.7249722758687, 39.68253875396601], [-120.791033392279, 40.009059535172696], [-121.08229242170857, 40.10884005933172], [-121.79178069747134, 40.340761546680845], [-121.64389382043105, 40.21191559767241], [-121.26629118244627, 39.68074004844452], [-120.63056000456179, 40.413057814721775]]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5]


In [89]:
[(np.round(x_[0],2),np.round(x_[1],2)) for x_ in x_values[:10]]

[(-121.63, 39.67),
 (-120.61, 39.69),
 (-120.94, 40.08),
 (-121.72, 39.68),
 (-120.79, 40.01),
 (-121.08, 40.11),
 (-121.79, 40.34),
 (-121.64, 40.21),
 (-121.27, 39.68),
 (-120.63, 40.41)]

In [86]:
df = pd.DataFrame(x_values, y_values, columns=['long', 'lat', 'SCL'])
df

TypeError: unhashable type: 'list'

TypeError: unhashable type: 'list'

## Make Pandas dataframe

In [None]:
# Function to generate a dataframe from the selected STAC items
def makeDataFrame(items_json):
    result = []

    for elem in items_json:

        geom_poly = Polygon([tuple(l) for l in elem['geometry']['coordinates'][0]]) # converts the geom from json to POLYGON WKT 
        
        result.append(
            {
                "aoi_intersect": np.round((polygon.intersection(geom_poly).area / polygon.area) * 100.0, 2),
                "id": elem["id"],  # .split('_')[5],
                "datetime": pd.to_datetime(elem["properties"]["datetime"]),
                "orbit_state": elem["properties"]["sat:orbit_state"],
                "relative_orbit": elem["properties"]["sat:relative_orbit"],
                "polarization": elem["properties"]["sar:polarizations"],
            }
        )

    df = pd.DataFrame(
        result
    )  # .sort_values(by=['aoi_intersec'], ascending=False, ignore_index=True)
    return df

In [None]:
data_df = makeDataFrame(items_sec_json["features"])


In [None]:
np.array(y_values).shape

## Split dataset into train and validation

In [78]:
print(len(x_values), len(y_values))

500 500


In [77]:
x_train, x_rem, y_train, y_rem = train_test_split(
    np.array(x_values), np.array(y_values), train_size=0.8
)
print(len(x_train),len(y_train))
print(len(x_rem),len(y_rem))

400 400
100 100


In [76]:
x_valid, x_test, y_valid, y_test = train_test_split(
    np.array(x_rem), np.array(y_rem), test_size=0.5
)
print(len(x_valid),len(y_valid))
print(len(x_test),len(y_test))

50 50
50 50


In [79]:
x_valid.shape, x_train.shape, x_test.shape

((50, 2), (400, 2), (50, 2))

In [80]:
y_valid.shape, y_train.shape, y_test.shape

((50,), (400,), (50,))

In [82]:
def to_geojson(t, x, y):
    """Converts the given x, y, and split dataset type (train, test, validate ) to a geojson file
    The geojson file is saved in the current directory with the name label-{t}.geojson
    """

    field_name = "class"
    field_type = ogr.OFTInteger

    # Create the output Driver
    out_driver = ogr.GetDriverByName("GeoJSON")

    geojson_filename = f"label-{t}.geojson"
    # Create the output GeoJSON
    out_datasource = out_driver.CreateDataSource(geojson_filename)
    out_layer = out_datasource.CreateLayer("labels", geom_type=ogr.wkbPolygon)
    id_field = ogr.FieldDefn(field_name, field_type)
    out_layer.CreateField(id_field)
    # Get the output Layer's Feature Definition
    feature_def = out_layer.GetLayerDefn()

    for index, v in enumerate(y):
        point = ogr.Geometry(ogr.wkbPoint)
        point.AddPoint(x[index][0], x[index][1])

        # create a new feature
        out_feature = ogr.Feature(feature_def)

        # Set new geometry
        out_feature.SetGeometry(point)

        out_feature.SetField(field_name, int(v))
        # Add new feature to output Layer
        out_layer.CreateFeature(out_feature)

        # dereference the feature
        out_feature = None

    # Save and close DataSources
    out_datasource = None

In [83]:
to_geojson("train", x_train, y_train)
to_geojson("test", x_test, y_test)
to_geojson("validate", x_valid, y_valid)