In [1]:
import os
from osgeo import gdal, ogr, osr
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

gdal.UseExceptions()

In [2]:
aws_url = "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_10TFK_20220524_0_L2A"

In [3]:
source = gdal.Open(
    "/vsicurl/https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2021/7/S2B_10TFK_20210713_0_L2A/SCL.tif"
)

scl = source.GetRasterBand(1)

In [4]:
# Info about the image
img = scl.ReadAsArray()
print('Shape:', np.shape(img))
n_cl = len(np.unique(img))
print(f'Number of LC classes: {n_cl}')

Shape: (5490, 5490)
Number of LC classes: 11


In [5]:
def pixel_to_coords(source, x, y):
    """Returns global coordinates in EPSG:4326 from pixel x, y coords"""

    geo_transform = source.GetGeoTransform()

    x_min = geo_transform[0]
    x_size = geo_transform[1]
    y_min = geo_transform[3]
    y_size = geo_transform[5]
    px = x * x_size + x_min
    py = y * y_size + y_min

    srs = osr.SpatialReference()
    srs.ImportFromWkt(source.GetProjection())

    srs_4326 = srs.CloneGeogCS()
    ct = osr.CoordinateTransformation(srs, srs_4326)

    long, lat, _ = ct.TransformPoint(px, py)

    return long, lat

In [8]:
os.environ['PROJ_DATA']='/srv/conda/envs/env_visual/share/proj/' # os.environ["GDAL_DATA"] = "/opt/conda/envs/env_label/share/gdal"
os.environ['GDAL_DATA']='/srv/conda/envs/env_visual/share/gdal/'

In [9]:
pixel_to_coords(source, 0, np.shape(img)[0])
# pixel_to_coords(source, 861, 5391)

(-121.83433447992233, 39.65575725226219)

In [10]:
# Generate 
np.random.seed(42) # keep this fixed 
xy = np.random.randint(1, np.shape(img)[0], size=(500, 2))
xy[:5]

array([[ 861, 5391],
       [5227, 5192],
       [3773, 3093],
       [ 467, 5335],
       [4427, 3445]])

In [11]:
x_values = []
y_values = []

for pos in xy:
    
    x_values.append([*pixel_to_coords(source, pos[0], pos[1])])

    y_values.append(
        int(
            scl.ReadAsArray(
                xoff=int(pos[0]), yoff=int(pos[1]), win_xsize=1, win_ysize=1
            )[0][0]
        )
    )
print(x_values[:10])
print(y_values[:10])

[[-121.63330522527042, 39.6714049509491], [-120.61464030376956, 39.69079991941494], [-120.94227874261767, 40.07526633324874], [-121.7249722758687, 39.68253875396601], [-120.791033392279, 40.009059535172696], [-121.08229242170857, 40.10884005933172], [-121.79178069747134, 40.340761546680845], [-121.64389382043105, 40.21191559767241], [-121.26629118244627, 39.68074004844452], [-120.63056000456179, 40.413057814721775]]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5]


In [None]:
np.array(y_values).shape

In [None]:
x_train, x_rem, y_train, y_rem = train_test_split(
    np.array(x_values), np.array(y_values), train_size=0.8
)

In [None]:
x_valid, x_test, y_valid, y_test = train_test_split(
    np.array(x_rem), np.array(y_rem), test_size=0.5
)

In [None]:
x_valid.shape, x_train.shape, x_test.shape

In [None]:
y_valid.shape, y_train.shape, y_test.shape

In [None]:
y_valid

In [None]:
def to_geojson(t, x, y):
    """Converts the given x, y, and split dataset type (train, test, validate ) to a geojson file
    The geojson file is saved in the current directory with the name label-{t}.geojson
    """

    field_name = "class"
    field_type = ogr.OFTInteger

    # Create the output Driver
    out_driver = ogr.GetDriverByName("GeoJSON")

    geojson_filename = f"label-{t}.geojson"
    # Create the output GeoJSON
    out_datasource = out_driver.CreateDataSource(geojson_filename)
    out_layer = out_datasource.CreateLayer("labels", geom_type=ogr.wkbPolygon)
    id_field = ogr.FieldDefn(field_name, field_type)
    out_layer.CreateField(id_field)
    # Get the output Layer's Feature Definition
    feature_def = out_layer.GetLayerDefn()

    for index, v in enumerate(y):
        point = ogr.Geometry(ogr.wkbPoint)
        point.AddPoint(x[index][0], x[index][1])

        # create a new feature
        out_feature = ogr.Feature(feature_def)

        # Set new geometry
        out_feature.SetGeometry(point)

        out_feature.SetField(field_name, int(v))
        # Add new feature to output Layer
        out_layer.CreateFeature(out_feature)

        # dereference the feature
        out_feature = None

    # Save and close DataSources
    out_datasource = None

In [None]:
to_geojson("train", x_train, y_train)
to_geojson("test", x_test, y_test)
to_geojson("validate", x_valid, y_valid)