In [1]:
import os
from osgeo import gdal, ogr, osr
import pyproj
import numpy as np
import pandas as pd
import json

gdal.UseExceptions()

In [2]:
os.environ['PROJ_DATA']='/workspace/.conda/envs/env_labels/share/proj' 
# os.environ['GDAL_DATA']='/workspace/.conda/envs/env_labels/share/gdal'
# os.environ['GTIFF_SRS_SOURCE'] = 'EPSG'

In [3]:
import pystac
from pystac import Link, Asset
from pystac.extensions.label import LabelExtension
from pystac.extensions.label import LabelType
from pystac.extensions.label import LabelClasses
from pystac.extensions.label import LabelStatistics
from pystac.extensions.version import ItemVersionExtension

In [4]:
# Set dataframe to None 
df = None

In [5]:
# Read GeoJSON file and extract point coordinates
def read_geojson_coordinates(geojson_file):
    with open(geojson_file, 'r') as file:
        geojson_data = json.load(file)
    #for f in geojson_data['features'][:10]: print(f)
    
    points = []
    luc = []
    for feature in geojson_data['features']:
        if feature['geometry']['type'] == 'Point':
            # Add lon and lat
            lon, lat, _ = feature['geometry']['coordinates']
            points.append((lon, lat))
            
            # Add classification 
            luc.append(feature['properties']['class'])
    return points, luc

In [6]:
# Function to transform unprojected coordinates to projected coordinates
def transform_coordinates(coordinates, epsg_s, epsg_t):
    source_crs = pyproj.CRS(f'EPSG:{epsg_s}') 
    target_crs = pyproj.CRS(f'EPSG:{epsg_t}')  
    transformer = pyproj.Transformer.from_crs(source_crs, target_crs, always_xy=True)
    transformed_coords = [transformer.transform(lon, lat) for lon, lat in coordinates]
    
    transformed_coords_int = [[int(tc[0]), int(tc[1])] for tc in transformed_coords]
    return transformed_coords_int

In [7]:
# Function to extract pixel values from a GeoTIFF at given coordinates
def extract_pixel_values(b_g, transformed_coords):
    gt = b_g.GetGeoTransform()
    b_rst = b_g.GetRasterBand(1)
    
    values = []

    for lon, lat in transformed_coords:
        px = int((lon - gt[0]) / gt[1])  # Convert longitude to pixel x
        py = int((lat - gt[3]) / gt[5])  # Convert latitude to pixel y

        value = b_rst.ReadAsArray(px, py, 1, 1)[0][0]
        values.append(value)
    
    # Empty raster 
    b_rst = None
    
    return values

# Read ML STAC Item

In [8]:
# Define name of the ML item
item_label_fname = "item-label-train.json"

In [9]:
item = pystac.read_file(item_label_fname)
display(item.properties)

{'ml-aoi:split': 'train',
 'label:description': 'Land cover labels',
 'label:type': 'vector',
 'label:properties': ['class'],
 'label:classes': [{'NO_DATA': 0,
   'SATURATED_OR_DEFECTIVE': 1,
   'CAST_SHADOWS': 2,
   'CLOUD_SHADOWS': 3,
   'VEGETATION': 4,
   'NOT_VEGETATED': 5,
   'WATER': 6,
   'UNCLASSIFIED': 7,
   'CLOUD_MEDIUM_PROBABILITY': 8,
   'CLOUD_HIGH_PROBABILITY': 9,
   'THIN_CIRRUS': 10,
   'SNOW or ICE': 11}],
 'label:tasks': ['segmentation', 'regression'],
 'label:methods': ['manual'],
 'version': '0.1',
 'deprecated': False,
 'datetime': '2023-05-29T13:57:36.729558Z'}

## Load S2 scene

In [10]:
s2_href = [l.target for l in item.links if l.rel == 'source'][0]
print('href of the S2 scene:', s2_href)

href of the S2 scene: https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_10TFK_20220524_0_L2A


In [11]:
# Read STAC Item
s2item = pystac.read_file(s2_href)
display(s2item.properties)

{'datetime': '2022-05-24T19:03:29Z',
 'platform': 'sentinel-2a',
 'constellation': 'sentinel-2',
 'instruments': ['msi'],
 'gsd': 10,
 'view:off_nadir': 0,
 'proj:epsg': 32610,
 'sentinel:utm_zone': 10,
 'sentinel:latitude_band': 'T',
 'sentinel:grid_square': 'FK',
 'sentinel:sequence': '0',
 'sentinel:product_id': 'S2A_MSIL2A_20220524T184921_N0400_R113_T10TFK_20220525T004817',
 'sentinel:data_coverage': 100,
 'eo:cloud_cover': 0.24,
 'sentinel:valid_cloud_cover': True,
 'sentinel:processing_baseline': '04.00',
 'sentinel:boa_offset_applied': True,
 'created': '2022-05-25T03:20:43.295Z',
 'updated': '2022-05-25T03:20:43.295Z'}

In [12]:
epsg_t = s2item.properties['proj:epsg']
print(f'- Target EPSG:{epsg_t}')
print(f'- Available bands: {list(s2item.assets.keys())}')

- Target EPSG:32610
- Available bands: ['thumbnail', 'overview', 'info', 'metadata', 'visual', 'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'AOT', 'WVP', 'SCL']


## Load geojson points

In [13]:
geojson_href = item.assets['labels'].href
print('href of the geojson file:', geojson_href)

href of the geojson file: label-train.geojson


In [14]:
# Open asset (geojson format) and read all the coordinates within 
coordinates, luc = read_geojson_coordinates(geojson_href)
coordinates[:5], luc[:5]

([(-121.27859163156747, 40.38218701036475),
  (-121.19953674120562, 39.91585786896506),
  (-121.33843034984275, 40.4972903822101),
  (-121.68743345874819, 40.07457719936124),
  (-121.26139230895694, 40.290492371933254)],
 [4, 4, 5, 4, 5])

In [15]:
# Transofrm coordinates
epsg_s = '4326'
transformed_coords = transform_coordinates(coordinates, epsg_s, epsg_t)
transformed_coords[:5]

[[646120, 4471599],
 [653880, 4419970],
 [640800, 4484280],
 [611920, 4436859],
 [647780, 4461450]]

## Extract values of selected band(s) for each point in the geojson file


In [16]:
# Define dictionary of other Common Band Names, for those that are missing in the metadata
other_cbn = {
    'B05': 'rededge70', 
    'B06': 'rededge74', 
    'B07': 'rededge78', 
    'B8A': 'nir08', 
    'B09': 'nir09'
}

In [17]:
for band in list(s2item.assets.keys()): 
    b_metadata = s2item.assets[band].to_dict()
    if 'eo:bands' in b_metadata.keys() and len(b_metadata['eo:bands']) == 1:
        if 'common_name' in b_metadata['eo:bands'][0].keys(): 
            cbn = b_metadata['eo:bands'][0]['common_name']
        else: 
            # cbn does not exist in metadata - use dictionary of other_cbn
            cbn = other_cbn[b_metadata['eo:bands'][0]['name']]
    else: 
        print(f'{band} is not eo band, skipping.')
        continue

    if (df is not None) and (cbn in df.columns): 
        print(f'Band {cbn} exists already in the dataframe, skipping.')
        continue
    
    print('Band:', band)
    print(f'- Common Band Name: {cbn}')
    print(f'- Res: {b_metadata["gsd"]}m')
    print(f'- Center Wavelenght: {b_metadata["eo:bands"][0]["center_wavelength"]}')

    # Extract band
    b_href = s2item.assets[band].href
    print('- href:', b_href)

    # Get gdal object
    b_g = gdal.Open(b_href)
    
    # Extract pixel values
    pixel_values = extract_pixel_values(b_g, transformed_coords)
    # for (lon0, lat0), (lon1, lat1), value, lu in zip(coordinates[:5], transformed_coords[:5], pixel_values[:5], luc[:5]):
    #     print(f"Coords (Unprj): {np.round(lon0,3)}, {np.round(lat0,3)} - Coords (Prj): {lon1}, {lat1} - Pixel Value: {value} - LUC: {lu}")
    
    # Empty b_g
    b_g = None
    
    # Make or Append to Pandas dataframe
    data = {
        'long': [x[0] for x in transformed_coords], 
        'lat': [x[1] for x in transformed_coords], 
        'LUC': luc,
        cbn: pixel_values,
    }

    if df is None: 
        print('Creating Dataframe')
        # Create a DataFrame from the dictionary
        df = pd.DataFrame(data)
        df.index.name = 'Index'

    else: 
        print('Adding to existing Dataframe')

        # Create temp dataframe
        df2 = pd.DataFrame(data)
        df2.index.name = 'Index'

        # Assert the two dataframes have the same long and lat values
        assert df['long'].isin(df2['long']).value_counts().values[0] == len(pixel_values)
        assert df['lat'].isin(df2['lat']).value_counts().values[0] == len(pixel_values)

        # Merge temp dataframe with original dataframe, based on matching columns
        df = pd.merge(df, df2, on=['Index', 'long', 'lat', 'LUC'])   
        # Empty memory
        df2 = None

    display(df)
    print()

print('\n--- Complete Dataframe with all Sentinel-2 bands ---')
display(df)

thumbnail is not eo band, skipping.
overview is not eo band, skipping.
info is not eo band, skipping.
metadata is not eo band, skipping.
visual is not eo band, skipping.
Band: B01
- Common Band Name: coastal
- Res: 60m
- Center Wavelenght: 0.4439
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B01.tif
Creating Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,646120,4471599,4,318
1,653880,4419970,4,365
2,640800,4484280,5,833
3,611920,4436859,4,308
4,647780,4461450,5,261
...,...,...,...,...
395,668920,4432610,4,156
396,679400,4444339,5,241
397,672460,4433990,4,416
398,635849,4404110,4,91



Band: B02
- Common Band Name: blue
- Res: 10m
- Center Wavelenght: 0.4966
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B02.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,646120,4471599,4,318,363
1,653880,4419970,4,365,448
2,640800,4484280,5,833,432
3,611920,4436859,4,308,312
4,647780,4461450,5,261,394
...,...,...,...,...,...
395,668920,4432610,4,156,1
396,679400,4444339,5,241,260
397,672460,4433990,4,416,462
398,635849,4404110,4,91,246



Band: B03
- Common Band Name: green
- Res: 10m
- Center Wavelenght: 0.56
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B03.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,646120,4471599,4,318,363,542
1,653880,4419970,4,365,448,552
2,640800,4484280,5,833,432,645
3,611920,4436859,4,308,312,507
4,647780,4461450,5,261,394,650
...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78
396,679400,4444339,5,241,260,574
397,672460,4433990,4,416,462,640
398,635849,4404110,4,91,246,480



Band: B04
- Common Band Name: red
- Res: 10m
- Center Wavelenght: 0.6645
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B04.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,646120,4471599,4,318,363,542,754
1,653880,4419970,4,365,448,552,738
2,640800,4484280,5,833,432,645,872
3,611920,4436859,4,308,312,507,533
4,647780,4461450,5,261,394,650,1044
...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29
396,679400,4444339,5,241,260,574,710
397,672460,4433990,4,416,462,640,891
398,635849,4404110,4,91,246,480,443



Band: B05
- Common Band Name: rededge70
- Res: 20m
- Center Wavelenght: 0.7039
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B05.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,646120,4471599,4,318,363,542,754,947
1,653880,4419970,4,365,448,552,738,874
2,640800,4484280,5,833,432,645,872,1147
3,611920,4436859,4,308,312,507,533,821
4,647780,4461450,5,261,394,650,1044,1251
...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173
396,679400,4444339,5,241,260,574,710,1338
397,672460,4433990,4,416,462,640,891,1147
398,635849,4404110,4,91,246,480,443,768



Band: B06
- Common Band Name: rededge74
- Res: 20m
- Center Wavelenght: 0.7402
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B06.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,646120,4471599,4,318,363,542,754,947,1013
1,653880,4419970,4,365,448,552,738,874,942
2,640800,4484280,5,833,432,645,872,1147,1474
3,611920,4436859,4,308,312,507,533,821,1679
4,647780,4461450,5,261,394,650,1044,1251,1417
...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392
396,679400,4444339,5,241,260,574,710,1338,2327
397,672460,4433990,4,416,462,640,891,1147,1183
398,635849,4404110,4,91,246,480,443,768,2077



Band: B07
- Common Band Name: rededge78
- Res: 20m
- Center Wavelenght: 0.7825
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B07.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094
1,653880,4419970,4,365,448,552,738,874,942,1027
2,640800,4484280,5,833,432,645,872,1147,1474,1612
3,611920,4436859,4,308,312,507,533,821,1679,1896
4,647780,4461450,5,261,394,650,1044,1251,1417,1406
...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462
396,679400,4444339,5,241,260,574,710,1338,2327,2761
397,672460,4433990,4,416,462,640,891,1147,1183,1290
398,635849,4404110,4,91,246,480,443,768,2077,2467



Band: B08
- Common Band Name: nir
- Res: 10m
- Center Wavelenght: 0.8351
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B08.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115
1,653880,4419970,4,365,448,552,738,874,942,1027,1062
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454
...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766



Band: B8A
- Common Band Name: nir08
- Res: 20m
- Center Wavelenght: 0.8648
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B8A.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547
...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668



Band: B09
- Common Band Name: nir09
- Res: 60m
- Center Wavelenght: 0.945
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B09.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343
...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291



Band: B11
- Common Band Name: swir16
- Res: 20m
- Center Wavelenght: 1.6137
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B11.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09,swir16
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182,2141
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142,2328
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020,2059
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317,1694
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343,2629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654,622
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833,2009
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416,2100
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291,1426



Band: B12
- Common Band Name: swir22
- Res: 20m
- Center Wavelenght: 2.22024
- href: https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/T/FK/2022/5/S2A_10TFK_20220524_0_L2A/B12.tif
Adding to existing Dataframe


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09,swir16,swir22
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182,2141,2069
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142,2328,2275
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020,2059,1696
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317,1694,977
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343,2629,2482
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654,622,431
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833,2009,1191
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416,2100,2119
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291,1426,788



AOT is not eo band, skipping.
WVP is not eo band, skipping.
SCL is not eo band, skipping.

--- Dataframe ---


Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09,swir16,swir22
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182,2141,2069
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142,2328,2275
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020,2059,1696
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317,1694,977
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343,2629,2482
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654,622,431
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833,2009,1191
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416,2100,2119
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291,1426,788


## Add NDVI and NDWI bands

In [18]:
# Add NDVI
assert 'nir' in df.columns and 'red' in df.columns
df['ndvi'] = ((df['nir'].astype(int) - df['red'].astype(int)) / (df['nir'].astype(int) + df['red'].astype(int)) * 10000).astype(int)
df

Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09,swir16,swir22,ndvi
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182,2141,2069,1931
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142,2328,2275,1800
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020,2059,1696,2751
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317,1694,977,5710
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343,2629,2482,1641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654,622,431,3555
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833,2009,1191,6095
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416,2100,2119,1734
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291,1426,788,7239


In [19]:
# Add NDWI
# Formula for S2: NDWI = (NIR - MIR) / (NIR + MIR) using Sentinel-2 Band 8 (NIR) and Band 12 (MIR=SWIR for Sentinel2)
assert 'nir' in df.columns and 'swir16' in df.columns and 'swir22' in df.columns
df['ndwi1'] = ((df['nir'].astype(int) - df['swir16'].astype(int)) / (df['nir'].astype(int) + df['swir16'].astype(int)) * 10000).astype(int)
df['ndwi2'] = ((df['nir'].astype(int) - df['swir22'].astype(int)) / (df['nir'].astype(int) + df['swir22'].astype(int)) * 10000).astype(int)
df

Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09,swir16,swir22,ndvi,ndwi1,ndwi2
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182,2141,2069,1931,-3151,-2996
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142,2328,2275,1800,-3734,-3635
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020,2059,1696,2751,-1461,-501
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317,1694,977,5710,707,3328
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343,2629,2482,1641,-2877,-2611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654,622,431,3555,-8213,-7520
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833,2009,1191,6095,1859,4215
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416,2100,2119,1734,-2481,-2523
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291,1426,788,7239,3196,5565


## Add Water Label

In [20]:
# Add Water label from the LUC value (LUC=6 is water)
df.loc[df['LUC'] == 6, 'water'] = 1
df.loc[df['LUC'] != 6, 'water'] = 0
df

Unnamed: 0_level_0,long,lat,LUC,coastal,blue,green,red,rededge70,rededge74,rededge78,nir,nir08,nir09,swir16,swir22,ndvi,ndwi1,ndwi2,water
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,646120,4471599,4,318,363,542,754,947,1013,1094,1115,1209,1182,2141,2069,1931,-3151,-2996,0.0
1,653880,4419970,4,365,448,552,738,874,942,1027,1062,1213,1142,2328,2275,1800,-3734,-3635,0.0
2,640800,4484280,5,833,432,645,872,1147,1474,1612,1534,1706,2020,2059,1696,2751,-1461,-501,0.0
3,611920,4436859,4,308,312,507,533,821,1679,1896,1952,2051,2317,1694,977,5710,707,3328,0.0
4,647780,4461450,5,261,394,650,1044,1251,1417,1406,1454,1547,1343,2629,2482,1641,-2877,-2611,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,668920,4432610,4,156,1,78,29,173,392,462,61,525,1654,622,431,3555,-8213,-7520,0.0
396,679400,4444339,5,241,260,574,710,1338,2327,2761,2927,3125,2833,2009,1191,6095,1859,4215,0.0
397,672460,4433990,4,416,462,640,891,1147,1183,1290,1265,1320,1416,2100,2119,1734,-2481,-2523,0.0
398,635849,4404110,4,91,246,480,443,768,2077,2467,2766,2668,2291,1426,788,7239,3196,5565,0.0


## Show some statistics

In [21]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
long,400.0,655988.8,30513.350503,600060.0,630404.0,656454.5,680209.0,709669.0
lat,400.0,4445340.0,31139.597892,4390280.0,4421010.0,4445129.0,4471669.75,4499950.0
LUC,400.0,4.3375,0.586903,2.0,4.0,4.0,5.0,6.0
coastal,400.0,356.275,356.209246,1.0,182.75,285.5,404.75,3580.0
blue,400.0,435.51,431.492043,1.0,232.0,352.0,524.25,3776.0
green,400.0,657.905,465.474378,40.0,430.0,567.0,752.5,4288.0
red,400.0,759.8225,508.406924,26.0,423.75,671.5,992.25,4484.0
rededge70,400.0,1076.737,528.559819,1.0,785.75,993.0,1278.75,6044.0
rededge74,400.0,1666.027,627.470202,1.0,1359.25,1666.0,1975.5,6087.0
rededge78,400.0,1870.037,683.736576,1.0,1513.25,1884.0,2241.75,5512.0


## Export Dataframe to CSV

In [22]:
# Export dataframe 
df.to_csv('df_extractedpixels.csv')

In [None]:
print('END')