In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import rasterstats

import stackstac
import pystac_client

In [2]:
path_to_aoi = r"\\storage2\production\Finland\RD_projects\KUOPUS_2022\Species_classification\geopackage\stands_AOI.gpkg"
aoi_layer_name = "stands_AOI"

aoi_data = gpd.read_file(path_to_aoi, layer=aoi_layer_name)
aoi_data_wgs = aoi_data.to_crs(crs="EPSG:4326")

bbox = list(aoi_data.bounds.loc[0])
bbox_wgs = list(aoi_data_wgs.bounds.loc[0])

In [3]:
print(bbox)
print(bbox_wgs)

[596000.0000000009, 6912000.000000004, 614000.0000000009, 6936000.0]
[28.85293582102271, 62.32208325626651, 29.215953867109075, 62.542453897106675]


In [4]:
URL = "https://paituli.csc.fi/geoserver/ogc/stac/v1"
catalog = pystac_client.Client.open(URL)
catalog

In [5]:
collections = [collection.id for collection in catalog.get_all_collections()]
collections.sort()

_ = [print(collection) for collection in collections]

2m_digital_terrain_model_products_at_fmi
building_at_geocubes
canopy_cover_at_geocubes
canopy_height_model_at_fmi
corine_land_cover_at_geocubes
country_at_geocubes
country_land_at_geocubes
daily_wind_damage_risk_at_fmi
digital_surface_model_at_geocubes
digital_terrain_model_10m_at_geocubes
digital_terrain_model_2m_aspect_at_geocubes
digital_terrain_model_2m_at_geocubes
digital_terrain_model_2m_slope_at_geocubes
fmi_daily_global_radiation_10km_at_paituli
fmi_daily_max_temperature_10km_at_paituli
fmi_daily_max_temperature_predictions_10km_at_paituli
fmi_daily_mean_temperature_10km_at_paituli
fmi_daily_mean_temperature_predictions_10km_at_paituli
fmi_daily_min_temperature_10km_at_paituli
fmi_daily_min_temperature_predictions_10km_at_paituli
fmi_daily_precipitation_10km_at_paituli
fmi_daily_precipitation_predictions_10km_at_paituli
fmi_daily_relative_humidity_10km_at_paituli
fmi_daily_sea_level_pressure_10km_at_paituli
fmi_daily_snow_depth_10km_at_paituli
fmi_monthly_avg_temperature_1km_at

In [6]:
def get_data(*, catalog, params, assets, bbox, epsg, resolution, time_period):

    search = catalog.search(**params)
    item_collection = search.item_collection()
    
    num_found_items = len(item_collection.items)
    print(f'Found items: {num_found_items}')

    gdf = None
    if num_found_items > 0:
        # ItemCollection as GeoJson 
        stac_json = search.item_collection_as_dict()
        
        # Add Item ID to properties to have access to it in GeoPandas
        for a in stac_json['features']:
            a['properties']['title']=a['id']
        
        # GeoJson as GeoPandas dataframe
        gdf = gpd.GeoDataFrame.from_features(stac_json, f"epsg:{epsg}")

    stack = stackstac.stack(
        items=item_collection,
        assets=assets,
        bounds=bbox,
        epsg=epsg,
        resolution=resolution
    ).squeeze()

    resampled = stack.resample(time=time_period).median("time", skipna=True, keep_attrs=True)

    return gdf, stack, resampled

In [7]:
s2_11_d_params = {
    "bbox": bbox_wgs,
    "collections": "sentinel_2_11_days_mosaics_at_fmi",
    "datetime": "2023-05-01/2023-10-31"
}

s2_m_index_params = {
    "bbox": bbox_wgs,
    "collections": "sentinel_2_monthly_index_mosaics_at_fmi",
    "datetime": "2023-05-01/2023-10-31"
}

s2_11_d_gdf, s2_11_d_stack, s2_11_d_monthly = get_data(
    catalog=catalog,
    params=s2_11_d_params,
    assets=['b02', 'b03', 'b04', 'b05', 'b06', 'b07', 'b08', 'b8a', 'b11', 'b12'],
    bbox=bbox,
    epsg=3067,
    resolution=10,
    time_period="MS"
)

s2_m_index_gdf, s2_m_index_stack, s2_m_index_monthly = get_data(
    catalog=catalog,
    params=s2_m_index_params,
    assets=["meta", "ndvi", "ndti", "ndsi", "ndmi", "ndbi"],
    bbox=bbox,
    epsg=3067,
    resolution=10,
    time_period="MS"
)

Found items: 12
Found items: 12


In [8]:
display(s2_11_d_gdf)
display(s2_m_index_gdf)

Unnamed: 0,geometry,start_datetime,end_datetime,datetime,gsd,proj:epsg,proj:transform,title
0,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-10-21T00:00:00.000+00:00,2023-10-31T23:59:59.000+00:00,2023-10-21T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-10-21_202...
1,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-10-11T00:00:00.000+00:00,2023-10-20T23:59:59.000+00:00,2023-10-11T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-10-11_202...
2,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-09-21T00:00:00.000+00:00,2023-09-30T23:59:59.000+00:00,2023-09-21T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-09-21_202...
3,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-09-11T00:00:00.000+00:00,2023-09-20T23:59:59.000+00:00,2023-09-11T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-09-11_202...
4,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-09-01T00:00:00.000+00:00,2023-09-10T23:59:59.000+00:00,2023-09-01T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-09-01_202...
5,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-08-21T00:00:00.000+00:00,2023-08-31T23:59:59.000+00:00,2023-08-21T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-08-21_202...
6,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-08-11T00:00:00.000+00:00,2023-08-20T23:59:59.000+00:00,2023-08-11T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-08-11_202...
7,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-08-01T00:00:00.000+00:00,2023-08-10T23:59:59.000+00:00,2023-08-01T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-08-01_202...
8,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-07-21T00:00:00.000+00:00,2023-07-31T23:59:59.000+00:00,2023-07-21T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-07-21_202...
9,"POLYGON ((19.117 58.68, 31.59 58.84, 33.993 70...",2023-07-11T00:00:00.000+00:00,2023-07-20T23:59:59.000+00:00,2023-07-11T00:00:00.000+00:00,20.0,32635,"[20, 0, 43547, 0, -20, 7795461, 0, 0, 1]",Sentinel-2_global_mosaic_dekadi_2023-07-11_202...


Unnamed: 0,geometry,start_datetime,end_datetime,datetime,gsd,proj:epsg,proj:transform,title
0,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-10-01T00:00:00.000+00:00,2023-10-31T23:59:59.000+00:00,2023-10-01T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-10-01_2023-10-31
1,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-09-15T00:00:00.000+00:00,2023-10-15T23:59:59.000+00:00,2023-09-15T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-09-15_2023-10-15
2,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-09-01T00:00:00.000+00:00,2023-09-30T23:59:59.000+00:00,2023-09-01T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-09-01_2023-09-30
3,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-08-15T00:00:00.000+00:00,2023-09-15T23:59:59.000+00:00,2023-08-15T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-08-15_2023-09-15
4,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-08-01T00:00:00.000+00:00,2023-08-31T23:59:59.000+00:00,2023-08-01T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-08-01_2023-08-31
5,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-07-15T00:00:00.000+00:00,2023-08-15T23:59:59.000+00:00,2023-07-15T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-07-15_2023-08-15
6,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-07-01T00:00:00.000+00:00,2023-07-31T23:59:59.000+00:00,2023-07-01T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-07-01_2023-07-31
7,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-06-15T00:00:00.000+00:00,2023-07-15T23:59:59.000+00:00,2023-06-15T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-06-15_2023-07-15
8,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-06-01T00:00:00.000+00:00,2023-06-30T23:59:59.000+00:00,2023-06-01T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-06-01_2023-06-30
9,"POLYGON ((19.087 59.298, 33.028 59.399, 36.022...",2023-05-15T00:00:00.000+00:00,2023-06-15T23:59:59.000+00:00,2023-05-15T00:00:00.000+00:00,10.0,3067,"[10, 0, 50000, 0, -10, 7800000, 0, 0, 1]",Sentinel-2_indeksimosaiikki_2023-05-15_2023-06-15


In [9]:
s2_11_d_monthly

Unnamed: 0,Array,Chunk
Bytes,1.61 GiB,8.00 MiB
Shape,"(5, 10, 2400, 1801)","(1, 1, 1024, 1024)"
Dask graph,300 chunks in 22 graph layers,300 chunks in 22 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.61 GiB 8.00 MiB Shape (5, 10, 2400, 1801) (1, 1, 1024, 1024) Dask graph 300 chunks in 22 graph layers Data type float64 numpy.ndarray",5  1  1801  2400  10,

Unnamed: 0,Array,Chunk
Bytes,1.61 GiB,8.00 MiB
Shape,"(5, 10, 2400, 1801)","(1, 1, 1024, 1024)"
Dask graph,300 chunks in 22 graph layers,300 chunks in 22 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [10]:
s2_m_index_monthly

Unnamed: 0,Array,Chunk
Bytes,1.35 GiB,8.00 MiB
Shape,"(7, 6, 2400, 1801)","(1, 1, 1024, 1024)"
Dask graph,252 chunks in 28 graph layers,252 chunks in 28 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.35 GiB 8.00 MiB Shape (7, 6, 2400, 1801) (1, 1, 1024, 1024) Dask graph 252 chunks in 28 graph layers Data type float64 numpy.ndarray",7  1  1801  2400  6,

Unnamed: 0,Array,Chunk
Bytes,1.35 GiB,8.00 MiB
Shape,"(7, 6, 2400, 1801)","(1, 1, 1024, 1024)"
Dask graph,252 chunks in 28 graph layers,252 chunks in 28 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [11]:
s2_11_d_data = s2_11_d_monthly.compute()
s2_m_index_data = s2_m_index_monthly.compute()

  return func(*args, **kwargs)


In [12]:
path_to_training_data = r"\\storage2\production\Finland\RD_projects\KUOPUS_2022\Species_classification\geopackage\stands_Training_data.gpkg"
td_layer_name = "stands_Training_data"

training_data = gpd.read_file(path_to_training_data, layer=td_layer_name)
training_data.head()

Unnamed: 0,fid_1,ID,XCOORD,YCOORD,HEIGHT,SPECIES,Shape_Length,Shape_Area,INV_UNIT,PAR_UNIT,...,L_Ch3_LI5_IP2,L_Ch3_LI5_IP3,L_Ch3_LI5_IA1,L_Ch3_LI5_IA2,L_Ch3_LI5_IA3,L_Ch3_D_RF,L_Ch3_D_RL,L_Ch3_D_RI,L_Ch3_D_RO,geometry
0,2322012.0,67330,604767.87,6913829.0,20.5926,2.0,16.0,9.875,22454000,,...,52234.0,56286.0,16731.787879,25634.823529,49347.0,0.3,0.303333,0.35,0.046667,"MULTIPOLYGON (((604766.37 6913830.75, 604766.8..."
1,2322222.0,8147,604771.495,6913828.125,25.119101,2.0,19.5,14.375,22454001,,...,49151.0,57475.0,31289.02,30309.875,35478.857143,0.337349,0.251004,0.319277,0.092369,"MULTIPOLYGON (((604771.37 6913830.5, 604771.87..."
2,2322523.0,8184,604776.995,6913824.375,25.944401,2.0,29.0,25.0,22454002,,...,42809.0,49283.0,18965.697248,33808.674157,34692.321429,0.259259,0.328704,0.180556,0.231481,"MULTIPOLYGON (((604775.62 6913827.75, 604776.1..."
3,2322537.0,67317,604767.87,6913823.625,20.515301,2.0,17.0,10.375,22454003,,...,38845.0,44482.0,14135.75,22624.043478,29995.1875,0.348571,0.314286,0.2,0.137143,"MULTIPOLYGON (((604769.12 6913825.75, 604769.3..."
4,2322718.0,8200,604771.87,6913822.875,25.156799,2.0,26.5,21.875,22454004,,...,42721.0,49283.0,15544.655172,23551.405797,37208.435897,0.320863,0.292086,0.323741,0.063309,"MULTIPOLYGON (((604772.62 6913826.25, 604772.8..."


In [14]:
def calculate_statistics(vector_df, raster_data):
    stats_list = []

    # Loop through time dimension
    for t in raster_data:
        date_str = np.datetime_as_string(t.time.values, unit="M")
        print("Calculating statistics for", date_str)

        # Loop through bands
        for band in t:
            band_title = str(band.title.values).upper()
            print("\tCalculating statistics for", band_title)
            
            column_name = f"S2_{date_str}_{band_title}"
        
            transform = band.transform
            band_data = band.data

            stats = rasterstats.zonal_stats(vector_df.geometry,
                                            band_data,
                                            affine=transform,
                                            stats="mean",
                                            all_touched=True)
            
            stats_list.append(pd.Series(
                # Get the calculated mean values out of the dictionaries
                data=[item["mean"] for item in stats],
                index=training_data.index,
                name=f"{column_name}_MEAN"
            ))
        
        print("")

    # Combine all the series into a single DataFrame
    stats_df = pd.concat(stats_list, axis=1)
    return stats_df

In [15]:
s2_11_d_stats = calculate_statistics(training_data, s2_11_d_data)

Calculating statistics for 2023-06
	Calculating statistics for B02




	Calculating statistics for B03
	Calculating statistics for B04
	Calculating statistics for B05
	Calculating statistics for B06
	Calculating statistics for B07
	Calculating statistics for B08
	Calculating statistics for B8A
	Calculating statistics for B11
	Calculating statistics for B12

Calculating statistics for 2023-07
	Calculating statistics for B02
	Calculating statistics for B03
	Calculating statistics for B04
	Calculating statistics for B05
	Calculating statistics for B06
	Calculating statistics for B07
	Calculating statistics for B08
	Calculating statistics for B8A
	Calculating statistics for B11
	Calculating statistics for B12

Calculating statistics for 2023-08
	Calculating statistics for B02
	Calculating statistics for B03
	Calculating statistics for B04
	Calculating statistics for B05
	Calculating statistics for B06
	Calculating statistics for B07
	Calculating statistics for B08
	Calculating statistics for B8A
	Calculating statistics for B11
	Calculating statistics for B12


In [28]:
s2_11_d_stats

Unnamed: 0,S2_2023-06_B02_MEAN,S2_2023-06_B03_MEAN,S2_2023-06_B04_MEAN,S2_2023-06_B05_MEAN,S2_2023-06_B06_MEAN,S2_2023-06_B07_MEAN,S2_2023-06_B08_MEAN,S2_2023-06_B8A_MEAN,S2_2023-06_B11_MEAN,S2_2023-06_B12_MEAN,...,S2_2023-10_B02_MEAN,S2_2023-10_B03_MEAN,S2_2023-10_B04_MEAN,S2_2023-10_B05_MEAN,S2_2023-10_B06_MEAN,S2_2023-10_B07_MEAN,S2_2023-10_B08_MEAN,S2_2023-10_B8A_MEAN,S2_2023-10_B11_MEAN,S2_2023-10_B12_MEAN
0,1264.500000,1395.5,1222.500000,1577.000000,2809.000000,3172.000000,3353.000000,,1853.000000,,...,1351.750000,1390.500000,1380.000000,1549.500000,2035.750000,2179.250000,2458.50,2338.250000,1761.250000,1582.750000
1,1259.333333,1383.0,1214.000000,1562.666667,2769.333333,3122.666667,3277.000000,,1840.000000,,...,1361.500000,1395.500000,1388.333333,1556.833333,2056.833333,2202.166667,2476.00,2365.333333,1769.000000,1590.166667
2,1249.000000,1358.0,1197.000000,1534.000000,2690.000000,3024.000000,3125.000000,,1814.000000,,...,1381.000000,1405.500000,1405.000000,1571.500000,2099.000000,2248.000000,2511.00,2419.500000,1784.500000,1605.000000
3,1280.000000,1433.0,1248.000000,1620.000000,2928.000000,3320.000000,3581.000000,,1892.000000,,...,1322.500000,1375.500000,1355.000000,1527.500000,1972.500000,2110.500000,2406.00,2257.000000,1738.000000,1560.500000
4,1257.333333,1375.0,1210.666667,1570.000000,2778.333333,3138.333333,3260.333333,,1837.666667,,...,1352.833333,1385.333333,1367.166667,1551.500000,2049.666667,2200.000000,2418.00,2363.333333,1757.666667,1591.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914,1298.000000,1519.0,1370.000000,1803.000000,2982.000000,3274.000000,3532.000000,,2413.000000,,...,1969.000000,1987.500000,1998.500000,2173.500000,2591.000000,2714.500000,3030.50,2868.500000,2527.500000,2431.000000
3915,1325.000000,1541.0,1439.333333,1954.333333,2949.666667,3231.000000,3335.666667,,2717.333333,,...,1836.333333,1891.333333,1873.000000,2075.166667,2579.333333,2732.333333,3009.00,2860.833333,2313.500000,2173.666667
3916,1294.000000,1501.0,1375.000000,1875.000000,3004.000000,3277.000000,3337.000000,,2525.000000,,...,2026.000000,2008.000000,2024.500000,2259.500000,2739.500000,2873.500000,3104.50,2994.500000,2637.500000,2491.500000
3917,1310.000000,1521.0,1412.000000,1935.000000,2961.000000,3252.000000,3318.000000,,2670.000000,,...,1983.500000,2043.500000,2042.000000,2354.000000,2866.500000,2987.000000,3202.00,3128.000000,2709.000000,2590.500000


In [16]:
s2_11_d_stats.isna().sum()

S2_2023-06_B02_MEAN       0
S2_2023-06_B03_MEAN       0
S2_2023-06_B04_MEAN       0
S2_2023-06_B05_MEAN       0
S2_2023-06_B06_MEAN       0
S2_2023-06_B07_MEAN       0
S2_2023-06_B08_MEAN       0
S2_2023-06_B8A_MEAN    3919
S2_2023-06_B11_MEAN       0
S2_2023-06_B12_MEAN    3919
S2_2023-07_B02_MEAN       0
S2_2023-07_B03_MEAN       0
S2_2023-07_B04_MEAN       0
S2_2023-07_B05_MEAN       0
S2_2023-07_B06_MEAN       0
S2_2023-07_B07_MEAN      60
S2_2023-07_B08_MEAN      60
S2_2023-07_B8A_MEAN      60
S2_2023-07_B11_MEAN       0
S2_2023-07_B12_MEAN       0
S2_2023-08_B02_MEAN       0
S2_2023-08_B03_MEAN       0
S2_2023-08_B04_MEAN       0
S2_2023-08_B05_MEAN       0
S2_2023-08_B06_MEAN       0
S2_2023-08_B07_MEAN       0
S2_2023-08_B08_MEAN       0
S2_2023-08_B8A_MEAN       0
S2_2023-08_B11_MEAN       0
S2_2023-08_B12_MEAN       0
S2_2023-09_B02_MEAN      72
S2_2023-09_B03_MEAN      72
S2_2023-09_B04_MEAN      72
S2_2023-09_B05_MEAN      72
S2_2023-09_B06_MEAN      72
S2_2023-09_B07_MEAN 

In [17]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(s2_11_d_stats.describe())

Unnamed: 0,S2_2023-06_B02_MEAN,S2_2023-06_B03_MEAN,S2_2023-06_B04_MEAN,S2_2023-06_B05_MEAN,S2_2023-06_B06_MEAN,S2_2023-06_B07_MEAN,S2_2023-06_B08_MEAN,S2_2023-06_B11_MEAN,S2_2023-07_B02_MEAN,S2_2023-07_B03_MEAN,S2_2023-07_B04_MEAN,S2_2023-07_B05_MEAN,S2_2023-07_B06_MEAN,S2_2023-07_B07_MEAN,S2_2023-07_B08_MEAN,S2_2023-07_B8A_MEAN,S2_2023-07_B11_MEAN,S2_2023-07_B12_MEAN,S2_2023-08_B02_MEAN,S2_2023-08_B03_MEAN,S2_2023-08_B04_MEAN,S2_2023-08_B05_MEAN,S2_2023-08_B06_MEAN,S2_2023-08_B07_MEAN,S2_2023-08_B08_MEAN,S2_2023-08_B8A_MEAN,S2_2023-08_B11_MEAN,S2_2023-08_B12_MEAN,S2_2023-09_B02_MEAN,S2_2023-09_B03_MEAN,S2_2023-09_B04_MEAN,S2_2023-09_B05_MEAN,S2_2023-09_B06_MEAN,S2_2023-09_B07_MEAN,S2_2023-09_B08_MEAN,S2_2023-09_B8A_MEAN,S2_2023-09_B11_MEAN,S2_2023-09_B12_MEAN,S2_2023-10_B02_MEAN,S2_2023-10_B03_MEAN,S2_2023-10_B04_MEAN,S2_2023-10_B05_MEAN,S2_2023-10_B06_MEAN,S2_2023-10_B07_MEAN,S2_2023-10_B08_MEAN,S2_2023-10_B8A_MEAN,S2_2023-10_B11_MEAN,S2_2023-10_B12_MEAN
count,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3859.0,3859.0,3859.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3919.0,3847.0,3847.0,3847.0,3847.0,3847.0,3847.0,3847.0,3847.0,3847.0,3847.0,3908.0,3908.0,3908.0,3908.0,3908.0,3908.0,3908.0,3908.0,3908.0,3908.0
mean,1251.119801,1375.654015,1228.368738,1583.80871,2579.654674,2867.520201,2970.854321,1901.489432,1365.109892,1465.526016,1320.583184,1644.429021,2642.815769,2955.026183,3019.105468,3104.335881,1880.360785,1472.885993,1181.408586,1272.905057,1167.009941,1444.34959,2350.243738,2652.02709,2690.39283,2803.262163,1734.685911,1334.316375,1477.232324,1552.927346,1441.824994,1701.823954,2470.0348,2707.797331,2746.096785,2856.327864,1837.587308,1534.547721,1379.404565,1403.839794,1346.352962,1544.314174,2104.827992,2268.356256,2397.74887,2402.802702,1661.837939,1466.529342
std,36.098334,55.389814,54.07854,97.873814,227.611089,266.330616,295.246674,202.357706,186.002786,178.610576,182.741023,194.187097,305.004814,421.53191,449.3309,453.688519,208.331768,178.19924,52.459287,61.17267,48.275739,94.941824,234.854393,282.176006,302.698629,305.058304,168.619799,89.614146,275.289228,265.276294,248.886105,279.180285,396.481743,441.977211,468.180539,482.679748,339.148311,332.836907,302.920697,287.532956,289.401939,309.987727,394.329155,433.962495,498.046252,474.695834,338.521784,323.541037
min,1177.0,1275.0,1156.333333,1410.0,2099.0,2308.0,2334.0,1541.666667,1129.25,1187.5,1092.0,1314.0,1901.0,1584.0,1553.0,1603.0,1391.0,1177.0,1084.5,1156.0,1085.0,1280.0,1879.0,2080.0,2018.5,2185.0,1453.5,1194.0,1042.0,1078.0,1036.0,1212.0,1598.0,1733.0,1656.0,1807.5,1288.5,1117.0,1001.0,1008.0,1006.0,1084.0,1214.0,1197.0,1188.0,1190.0,1040.0,1005.0
25%,1229.0,1338.0,1195.5,1522.0,2422.0,2682.0,2772.0,1767.0,1242.666667,1348.625,1208.0,1517.4375,2431.0,2682.125,2730.0,2822.0,1736.083333,1357.125,1147.0,1231.0,1136.0,1375.5,2171.833333,2440.0,2464.0,2578.0,1613.0,1274.0,1298.5,1385.0,1285.5,1519.583333,2192.416667,2382.1875,2404.833333,2505.125,1571.25,1291.25,1184.104167,1225.5,1163.0,1355.75,1842.0,1982.3125,2086.1875,2101.0,1440.0,1238.75
50%,1247.0,1362.0,1213.0,1563.5,2532.666667,2817.0,2916.5,1856.5,1296.333333,1405.5,1248.0,1605.5,2620.0,2937.0,3015.0,3097.0,1875.5,1435.5,1175.0,1263.5,1159.0,1428.0,2320.0,2622.5,2658.5,2761.5,1710.0,1321.0,1407.0,1489.75,1376.25,1624.0,2414.5,2655.0,2668.625,2828.5,1781.5,1416.5,1304.5,1327.5,1263.375,1458.5,2076.0,2247.75,2376.5,2390.0,1587.0,1370.0
75%,1268.0,1399.583333,1242.5,1627.416667,2714.0,3010.0,3144.5,1995.0,1412.0,1535.0,1369.416667,1730.0,2819.5,3185.0,3264.0,3367.0,2013.75,1534.0,1203.0,1298.0,1184.0,1487.0,2509.0,2838.0,2893.0,3004.75,1828.666667,1377.0,1637.5,1675.0,1551.8125,1856.5,2699.0,2978.125,3041.0,3134.5,2026.5,1712.0,1491.25,1483.5,1414.0,1651.0,2349.6875,2555.125,2740.5,2721.125,1801.0,1577.0
max,1501.333333,1723.0,1815.0,2163.666667,4051.0,4608.0,4743.0,3587.666667,2191.5,2317.5,2169.5,2475.5,4172.5,4756.0,4899.0,4975.0,2736.5,2203.0,1604.0,1579.666667,1565.0,2020.333333,3479.0,3938.0,4104.0,4287.0,3252.666667,2322.333333,3031.0,2903.0,2752.0,3225.0,4084.0,4344.0,4364.0,4458.0,3325.0,3167.0,3176.0,3144.5,3104.5,3379.0,3908.5,3937.0,4259.0,4065.5,3227.5,2858.0


In [18]:
s2_m_index_stats = calculate_statistics(training_data, s2_m_index_data)

Calculating statistics for 2023-04
	Calculating statistics for META
	Calculating statistics for NDVI
	Calculating statistics for NDTI
	Calculating statistics for NDSI
	Calculating statistics for NDMI
	Calculating statistics for NDBI

Calculating statistics for 2023-05
	Calculating statistics for META
	Calculating statistics for NDVI
	Calculating statistics for NDTI
	Calculating statistics for NDSI
	Calculating statistics for NDMI
	Calculating statistics for NDBI

Calculating statistics for 2023-06
	Calculating statistics for META
	Calculating statistics for NDVI
	Calculating statistics for NDTI
	Calculating statistics for NDSI
	Calculating statistics for NDMI
	Calculating statistics for NDBI

Calculating statistics for 2023-07
	Calculating statistics for META
	Calculating statistics for NDVI
	Calculating statistics for NDTI
	Calculating statistics for NDSI
	Calculating statistics for NDMI
	Calculating statistics for NDBI

Calculating statistics for 2023-08
	Calculating statistics for M

In [29]:
s2_m_index_stats

Unnamed: 0,S2_2023-04_META_MEAN,S2_2023-04_NDVI_MEAN,S2_2023-04_NDTI_MEAN,S2_2023-04_NDSI_MEAN,S2_2023-04_NDMI_MEAN,S2_2023-04_NDBI_MEAN,S2_2023-05_META_MEAN,S2_2023-05_NDVI_MEAN,S2_2023-05_NDTI_MEAN,S2_2023-05_NDSI_MEAN,...,S2_2023-09_NDTI_MEAN,S2_2023-09_NDSI_MEAN,S2_2023-09_NDMI_MEAN,S2_2023-09_NDBI_MEAN,S2_2023-10_META_MEAN,S2_2023-10_NDVI_MEAN,S2_2023-10_NDTI_MEAN,S2_2023-10_NDSI_MEAN,S2_2023-10_NDMI_MEAN,S2_2023-10_NDBI_MEAN
0,133.0,151.500000,133.000000,95.250000,131.250000,17.500000,151.500000,163.500000,138.125000,91.375000,...,140.750000,102.5,140.250000,8.250000,293.0,141.000000,113.000000,101.000000,131.500000,28.000000
1,133.0,152.000000,133.000000,96.000000,132.000000,16.333333,151.500000,162.833333,138.000000,91.500000,...,140.333333,103.0,139.666667,9.000000,293.0,141.000000,113.000000,100.666667,131.000000,28.333333
2,133.0,153.500000,134.500000,98.000000,135.000000,12.000000,151.500000,161.750000,138.000000,93.500000,...,140.500000,105.0,140.500000,6.500000,293.0,143.500000,113.000000,101.000000,132.000000,25.500000
3,133.0,150.000000,133.000000,95.000000,129.000000,22.000000,151.500000,162.500000,138.000000,90.500000,...,140.000000,102.0,141.000000,10.000000,293.0,138.000000,113.000000,100.000000,128.000000,34.000000
4,133.0,152.333333,133.333333,97.000000,133.000000,15.000000,151.500000,162.166667,138.000000,92.833333,...,140.000000,104.0,140.666667,7.666667,293.0,141.000000,112.666667,100.666667,130.333333,29.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914,133.0,140.000000,129.000000,69.000000,101.000000,60.000000,153.000000,157.500000,134.000000,71.000000,...,132.000000,103.5,131.000000,29.000000,,138.000000,137.000000,107.000000,133.000000,28.000000
3915,133.0,144.000000,129.333333,67.666667,105.666667,51.333333,151.666667,156.333333,131.333333,69.500000,...,131.500000,107.5,134.500000,23.500000,293.0,125.333333,106.333333,82.000000,107.000000,68.333333
3916,133.0,139.500000,129.500000,71.000000,104.000000,57.000000,153.000000,158.250000,133.750000,72.750000,...,132.500000,105.0,131.500000,29.000000,,137.000000,137.000000,109.000000,132.000000,32.000000
3917,133.0,144.000000,129.000000,67.500000,104.000000,52.500000,152.250000,157.000000,131.750000,69.250000,...,131.500000,106.5,132.500000,27.500000,,128.500000,116.000000,93.000000,116.500000,56.000000


In [19]:
s2_m_index_stats.isna().sum()

S2_2023-04_META_MEAN       0
S2_2023-04_NDVI_MEAN       0
S2_2023-04_NDTI_MEAN       0
S2_2023-04_NDSI_MEAN       0
S2_2023-04_NDMI_MEAN       0
S2_2023-04_NDBI_MEAN       1
S2_2023-05_META_MEAN       0
S2_2023-05_NDVI_MEAN       0
S2_2023-05_NDTI_MEAN       0
S2_2023-05_NDSI_MEAN       0
S2_2023-05_NDMI_MEAN       0
S2_2023-05_NDBI_MEAN     441
S2_2023-06_META_MEAN       0
S2_2023-06_NDVI_MEAN       0
S2_2023-06_NDTI_MEAN       0
S2_2023-06_NDSI_MEAN       0
S2_2023-06_NDMI_MEAN       0
S2_2023-06_NDBI_MEAN    1555
S2_2023-07_META_MEAN       0
S2_2023-07_NDVI_MEAN       0
S2_2023-07_NDTI_MEAN       0
S2_2023-07_NDSI_MEAN       0
S2_2023-07_NDMI_MEAN       0
S2_2023-07_NDBI_MEAN    1476
S2_2023-08_META_MEAN       0
S2_2023-08_NDVI_MEAN       0
S2_2023-08_NDTI_MEAN       0
S2_2023-08_NDSI_MEAN       0
S2_2023-08_NDMI_MEAN       0
S2_2023-08_NDBI_MEAN    1816
S2_2023-09_META_MEAN     179
S2_2023-09_NDVI_MEAN     163
S2_2023-09_NDTI_MEAN     163
S2_2023-09_NDSI_MEAN     163
S2_2023-09_NDM

In [20]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(s2_m_index_stats.describe())

Unnamed: 0,S2_2023-04_META_MEAN,S2_2023-04_NDVI_MEAN,S2_2023-04_NDTI_MEAN,S2_2023-04_NDSI_MEAN,S2_2023-04_NDMI_MEAN,S2_2023-04_NDBI_MEAN,S2_2023-05_META_MEAN,S2_2023-05_NDVI_MEAN,S2_2023-05_NDTI_MEAN,S2_2023-05_NDSI_MEAN,S2_2023-05_NDMI_MEAN,S2_2023-05_NDBI_MEAN,S2_2023-06_META_MEAN,S2_2023-06_NDVI_MEAN,S2_2023-06_NDTI_MEAN,S2_2023-06_NDSI_MEAN,S2_2023-06_NDMI_MEAN,S2_2023-06_NDBI_MEAN,S2_2023-07_META_MEAN,S2_2023-07_NDVI_MEAN,S2_2023-07_NDTI_MEAN,S2_2023-07_NDSI_MEAN,S2_2023-07_NDMI_MEAN,S2_2023-07_NDBI_MEAN,S2_2023-08_META_MEAN,S2_2023-08_NDVI_MEAN,S2_2023-08_NDTI_MEAN,S2_2023-08_NDSI_MEAN,S2_2023-08_NDMI_MEAN,S2_2023-08_NDBI_MEAN,S2_2023-09_META_MEAN,S2_2023-09_NDVI_MEAN,S2_2023-09_NDTI_MEAN,S2_2023-09_NDSI_MEAN,S2_2023-09_NDMI_MEAN,S2_2023-09_NDBI_MEAN,S2_2023-10_META_MEAN,S2_2023-10_NDVI_MEAN,S2_2023-10_NDTI_MEAN,S2_2023-10_NDSI_MEAN,S2_2023-10_NDMI_MEAN,S2_2023-10_NDBI_MEAN
count,3919.0,3919.0,3919.0,3919.0,3919.0,3918.0,3919.0,3919.0,3919.0,3919.0,3919.0,3478.0,3919.0,3919.0,3919.0,3919.0,3919.0,2364.0,3919.0,3919.0,3919.0,3919.0,3919.0,2443.0,3919.0,3919.0,3919.0,3919.0,3919.0,2103.0,3740.0,3756.0,3756.0,3756.0,3756.0,3541.0,3896.0,3905.0,3905.0,3905.0,3905.0,3350.0
mean,132.543697,153.798269,135.520222,89.46517,128.884643,17.707057,151.863826,162.163264,137.418729,90.382698,136.862061,6.893509,171.802447,164.491675,138.565716,91.123841,139.80653,4.337017,204.711651,163.094965,140.853428,94.515384,140.928394,5.46623,233.188887,162.886281,142.559773,96.772572,141.875032,5.797987,261.776649,148.240049,137.281794,113.721778,143.725861,11.851325,290.290533,147.207981,134.961353,118.437196,145.765749,13.462488
std,1.331648,3.600641,2.082724,8.478218,7.061031,9.080573,2.547408,3.455146,1.851928,7.665305,5.979198,4.911279,5.26387,3.41069,1.921823,6.966536,5.213624,3.599205,5.540039,4.571936,2.418707,8.270009,5.119889,4.427283,4.546601,4.237408,2.212503,9.376171,5.684823,5.108855,8.161823,8.49758,4.697192,12.9052,6.132129,8.22043,5.317392,8.11727,7.723347,12.894862,8.930892,11.393088
min,115.0,128.0,124.666667,56.666667,86.666667,1.5,138.0,149.5,127.333333,56.0,97.0,1.0,162.5,153.5,128.125,58.166667,103.5,1.0,189.5,145.5,124.75,61.666667,106.333333,1.0,218.0,146.5,132.375,60.5,103.25,1.0,243.0,115.875,110.0,80.75,122.5,1.0,278.0,117.5,96.0,82.0,106.75,1.0
25%,133.0,152.0,134.5,83.5,125.0,12.0,150.5,159.75,136.5,85.0,133.5,4.0,168.0,162.25,137.5,86.5,137.0,2.0,201.125,160.5,139.5,89.0,138.0625,2.75,230.5,160.5,141.5,90.0,139.0,2.5,256.0,144.5,135.5,104.5,140.125,5.375,287.729167,143.75,134.666667,110.0,141.666667,6.0
50%,133.0,154.0,136.0,90.5,129.5,16.5,151.5,162.0,137.5,91.0,138.0,5.75,172.625,164.5,138.666667,91.833333,140.5,3.5,205.5,163.5,141.0,94.5,141.25,4.25,234.25,163.333333,142.75,97.0,142.25,4.25,260.0,149.25,138.0,112.0,144.0,9.5,293.0,148.333333,136.5,118.0,147.25,10.125
75%,133.0,156.0,137.0,96.0,134.0,21.5,153.75,164.0,138.5,96.25,140.666667,8.25,175.75,166.5,140.0,96.5,143.0,5.0,206.5,166.0,142.5,100.0,144.0,6.75,238.0,166.0,144.0,103.5,145.5,7.5,265.0,154.5,140.0,121.0,147.5,17.0,293.0,153.0,138.5,126.0,151.5,17.5
max,133.0,161.25,140.0,113.0,148.0,79.0,156.5,174.0,142.5,106.5,152.0,53.75,183.0,176.5,144.0,108.0,154.625,42.5,223.0,176.5,147.0,117.5,154.375,40.75,241.25,173.375,148.0,131.0,155.5,43.166667,285.0,164.0,148.0,158.5,169.0,50.666667,301.0,168.0,150.0,159.25,166.0,73.75


In [21]:
training_data_all_stats = pd.concat([training_data, s2_11_d_stats, s2_m_index_stats], axis=1)

In [22]:
training_data_all_stats

Unnamed: 0,fid_1,ID,XCOORD,YCOORD,HEIGHT,SPECIES,Shape_Length,Shape_Area,INV_UNIT,PAR_UNIT,...,S2_2023-09_NDTI_MEAN,S2_2023-09_NDSI_MEAN,S2_2023-09_NDMI_MEAN,S2_2023-09_NDBI_MEAN,S2_2023-10_META_MEAN,S2_2023-10_NDVI_MEAN,S2_2023-10_NDTI_MEAN,S2_2023-10_NDSI_MEAN,S2_2023-10_NDMI_MEAN,S2_2023-10_NDBI_MEAN
0,2322012.0,67330,604767.870,6913829.000,20.592600,2.0,16.0,9.8750,22454000,,...,140.750000,102.5,140.250000,8.250000,293.0,141.000000,113.000000,101.000000,131.500000,28.000000
1,2322222.0,8147,604771.495,6913828.125,25.119101,2.0,19.5,14.3750,22454001,,...,140.333333,103.0,139.666667,9.000000,293.0,141.000000,113.000000,100.666667,131.000000,28.333333
2,2322523.0,8184,604776.995,6913824.375,25.944401,2.0,29.0,25.0000,22454002,,...,140.500000,105.0,140.500000,6.500000,293.0,143.500000,113.000000,101.000000,132.000000,25.500000
3,2322537.0,67317,604767.870,6913823.625,20.515301,2.0,17.0,10.3750,22454003,,...,140.000000,102.0,141.000000,10.000000,293.0,138.000000,113.000000,100.000000,128.000000,34.000000
4,2322718.0,8200,604771.870,6913822.875,25.156799,2.0,26.5,21.8750,22454004,,...,140.000000,104.0,140.666667,7.666667,293.0,141.000000,112.666667,100.666667,130.333333,29.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914,24213332.0,7286,607496.370,6935673.500,21.703899,1.0,19.0,17.0625,22457914,,...,132.000000,103.5,131.000000,29.000000,,138.000000,137.000000,107.000000,133.000000,28.000000
3915,24213454.0,7303,607451.370,6935670.875,22.870600,1.0,15.5,10.6250,22457915,,...,131.500000,107.5,134.500000,23.500000,293.0,125.333333,106.333333,82.000000,107.000000,68.333333
3916,24213483.0,7296,607483.745,6935671.875,23.334999,2.0,18.5,12.7500,22457916,,...,132.500000,105.0,131.500000,29.000000,,137.000000,137.000000,109.000000,132.000000,32.000000
3917,24213693.0,7322,607466.370,6935668.375,26.629000,2.0,28.0,31.3750,22457917,,...,131.500000,106.5,132.500000,27.500000,,128.500000,116.000000,93.000000,116.500000,56.000000


In [None]:
#td_out_path = r"\\storage2\production\Finland\RD_projects\KUOPUS_2022\Species_classification\All_stats\training_data_all_stats.gpkg"
#training_data_all_stats.to_file(td_out_path, layer=td_layer_name, driver="GPKG")

In [27]:
import rioxarray
s2_m_index_monthly.sel(time="2023-07-01").sel(band="ndvi").rio.to_raster(
    r"\\storage2\production\Finland\RD_projects\KUOPUS_2022\Species_classification\Sentinel\Stack\S2_2023_07_NDVI.tif"
)

In [30]:
s2_m_index_monthly.sel(time="2023-05-01").sel(band="ndsi").rio.to_raster(
    r"\\storage2\production\Finland\RD_projects\KUOPUS_2022\Species_classification\Sentinel\Stack\S2_2023_05_NDSI.tif"
)