In [1]:
import rasterio as rio
import pandas as pd
import geopandas as gpd
from pathlib import Path
from datetime import datetime
import json
import os

In [2]:
s2_image_path = r"C:\Users\Kostas\Downloads\sentinel2_images_mean_2019-04-01_to_2019-05-01-0000000000-0000000000.tif"
envelopes_gdf = gpd.read_file(r"C:\Users\Kostas\Desktop\GIMA\Module_7\Data\PEP725\After_2016_sent_from_PEP725\pep725_outputs\PEP725_envelopes.geojson")

In [3]:
with rio.open(s2_image_path) as src:
    print(src.bounds)
    print(src.crs)
    src.close

BoundingBox(left=280320.0, bottom=5686440.0, right=695040.0, top=6101160.0)
EPSG:32632


In [4]:
envelopes_gdf.set_crs(32632, inplace=True, allow_override=True)
envelopes_gdf.tail(10)

Unnamed: 0,s_id,lon,lat,alt,alt_dem,gss_id,genus,species,phase_id,year,day,date,Label,geometry
129319,4240,10.3333,49.3833,440,463,10000,perm_grass,,111,2021,224,2021-08-12,M,"POLYGON ((592769.082 5466921.590, 600769.082 5..."
129320,21516,10.8667,52.5667,75,64,10000,perm_grass,,131,2021,225,2021-08-13,M,"POLYGON ((622519.644 5821708.065, 630519.644 5..."
129321,26751,7.6958,50.5622,290,276,10000,perm_grass,,111,2021,225,2021-08-13,M,"POLYGON ((403627.815 5597953.720, 411627.815 5..."
129322,4240,10.3333,49.3833,440,463,10000,perm_grass,,131,2021,228,2021-08-16,M,"POLYGON ((592769.082 5466921.590, 600769.082 5..."
129323,1963,9.0,49.9167,150,168,10000,perm_grass,,131,2021,231,2021-08-19,M,"POLYGON ((496000.000 5525369.095, 504000.000 5..."
129324,5456,13.75,50.7333,875,864,10000,perm_grass,,131,2021,244,2021-09-01,M,"POLYGON ((831136.405 5626933.921, 839136.405 5..."
129325,961,7.5,52.7667,30,33,10000,perm_grass,,111,2021,257,2021-09-14,M,"POLYGON ((394795.749 5843373.189, 402795.749 5..."
129326,20595,8.5,49.4667,95,86,10000,perm_grass,,131,2021,258,2021-09-15,M,"POLYGON ((459771.720 5475458.856, 467771.720 5..."
129327,19312,8.36667,48.6,490,622,10000,perm_grass,,131,2021,285,2021-10-12,M,"POLYGON ((449306.954 5379184.811, 457306.954 5..."
129328,8197,10.9833,48.2333,525,504,10000,perm_grass,,111,2021,289,2021-10-16,M,"POLYGON ((643273.716 5340132.606, 651273.716 5..."


In [5]:
# Separate the gdfs by year
#envelopes_gdf_2019 = envelopes_gdf[envelopes_gdf['year'] == 2019]
#envelopes_gdf_2020 = envelopes_gdf[envelopes_gdf['year'] == 2020]

In [6]:
#envelopes_gdf_2019.head()

## 1. Temporal filter

Now that everything is loaded the temporal filter should be applied

In [7]:
# Function to extract the dates from the filename of GEE S2 images

def imageNamingGEEfiles(raster_path):
    # Example file name: sentinel2_images_mean_2019-07-01_to_2019-08-01-0000006912-0000006912.tif
    string_parts = raster_path.split("_")
    start_date = string_parts[3]
    token = string_parts[5]
    token_string_parts = token.split("-")
    end_date = token_string_parts[0] + "-" + token_string_parts[1] + "-" + token_string_parts[2]
    # Save the month and year to variables
    s2month = datetime.strptime(start_date, '%Y-%m-%d').month
    s2year = datetime.strptime(start_date, '%Y-%m-%d').year
    return start_date, end_date, s2month, s2year

a, b, c, d = imageNamingGEEfiles("sentinel2_images_mean_2019-07-01_to_2019-08-01-0000006912-0000006912.tif")
print(a, b, c, d)

2019-07-01 2019-08-01 7 2019


In [8]:
# Pathlib stuff: Path methods: anchor, parent, name, stem, suffixes
# This can be used for easier extraction of dates from the filename
print("anchor: ", Path(s2_image_path).anchor)
print("parent: ", Path(s2_image_path).parent)
print("name: ", Path(s2_image_path).name)
print("stem: ", Path(s2_image_path).stem)
print("suffixes: ", Path(s2_image_path).suffixes)
print("Normal print: ", s2_image_path)


anchor:  C:\
parent:  C:\Users\Kostas\Downloads
name:  sentinel2_images_mean_2019-04-01_to_2019-05-01-0000000000-0000000000.tif
stem:  sentinel2_images_mean_2019-04-01_to_2019-05-01-0000000000-0000000000
suffixes:  ['.tif']
Normal print:  C:\Users\Kostas\Downloads\sentinel2_images_mean_2019-04-01_to_2019-05-01-0000000000-0000000000.tif


In [9]:
# Get the start and end dates of the image from its name
s2_image_start_date, s2_image_end_date, s2month, s2year = imageNamingGEEfiles(Path(s2_image_path).name)
print(s2_image_start_date, s2_image_end_date, s2month, s2year)

2019-04-01 2019-05-01 4 2019


__________

Datetime stuff in order to do date comparisons and find all the dates that are represented in an image

In [10]:
# Converting the date column to datetime data type

envelopes_gdf['date'] = pd.to_datetime(envelopes_gdf['date'], format='%Y-%m-%d').dt.date

In [11]:
# Converting the outputs to datetime.date dtype
s2_image_start_date = datetime.strptime(s2_image_start_date, '%Y-%m-%d').date()
s2_image_end_date = datetime.strptime(s2_image_end_date, '%Y-%m-%d').date()


In [12]:
# Creating a mask to filter the dates that are needed
temporal_mask = (envelopes_gdf.date > s2_image_start_date) & (envelopes_gdf.date < s2_image_end_date)
display(envelopes_gdf.loc[temporal_mask])
s2_image_gdf = envelopes_gdf.loc[temporal_mask]


Unnamed: 0,s_id,lon,lat,alt,alt_dem,gss_id,genus,species,phase_id,year,day,date,Label,geometry
49410,110,10.48330,53.5000,40,54,1030100,Aesculus,Aesculus hippocastanum,11,2019,92,2019-04-02,DBL,"POLYGON ((594387.807 5924917.608, 602387.807 5..."
49411,148,10.83330,54.2500,55,35,1180100,Betula,Betula pendula,11,2019,92,2019-04-02,DBL,"POLYGON ((615444.341 6008888.454, 623444.341 6..."
49412,299,10.25000,53.5167,35,41,1580100,Fraxinus,Fraxinus excelsior,60,2019,92,2019-04-02,DBL,"POLYGON ((578881.130 5926478.618, 586881.130 5..."
49413,354,8.50000,52.7000,35,41,2290100,Sorbus,Sorbus aucuparia,11,2019,92,2019-04-02,DBL,"POLYGON ((462212.807 5835016.122, 470212.807 5..."
49414,370,8.58333,52.5500,40,40,1050100,Alnus,Alnus glutinosa,11,2019,92,2019-04-02,DBL,"POLYGON ((467747.325 5818295.203, 475747.325 5..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123541,8212,13.88330,51.1333,250,236,10000,perm_grass,,111,2019,120,2019-04-30,M,"POLYGON ((837588.487 5671994.839, 845588.487 5..."
123542,8227,7.26667,53.0500,3,4,10000,perm_grass,,111,2019,120,2019-04-30,M,"POLYGON ((379814.909 5875237.296, 387814.909 5..."
123543,20320,8.63333,52.1500,110,82,10000,perm_grass,,111,2019,120,2019-04-30,M,"POLYGON ((470912.168 5773785.241, 478912.168 5..."
123544,20637,6.53333,51.7333,22,24,10000,perm_grass,,111,2019,120,2019-04-30,M,"POLYGON ((325669.511 5730255.241, 333669.511 5..."


## Target extraction

In [13]:
def addMonths(gdf):
    # Convert the date to datetime type to work later
    gdf['date'] = pd.to_datetime(gdf['date'])

    # Create a Series with the month (1-12)
    # It finds the month (int 1-12) based on the .month method of the datetime property
    # It achieves that by mapping a lambda function on each element of the date column. Therefore the result is just the month number

    getmonth = gdf['date'].map(lambda x:x.month)
    # Another way
    # test_df = test_df.assign(month=test_df['date'].map(lambda x: x.month))

    # Merge this into the gdf
    gdf = gdf.merge(getmonth, left_index=True, right_index=True)

    # Rename the column
    gdf.rename(columns = {'date_y':'month'}, inplace = True)
    return gdf

In [14]:
envelopes_gdf = addMonths(envelopes_gdf)

In [15]:
# Check for indexing using specific observations
station = 4240
year = 2021
month = 8
envelopes_gdf[(envelopes_gdf['s_id'] == station) & (envelopes_gdf['year'] == year) & (envelopes_gdf['month'] == month)]

Unnamed: 0,s_id,lon,lat,alt,alt_dem,gss_id,genus,species,phase_id,year,day,date_x,Label,geometry,month
107280,4240,10.3333,49.3833,440,463,2290100,Sorbus,Sorbus aucuparia,286,2021,233,2021-08-21,DBL,"POLYGON ((592769.082 5466921.590, 600769.082 5...",8
107657,4240,10.3333,49.3833,440,463,2230200,Sambucus,Sambucus nigra,286,2021,242,2021-08-30,DBL,"POLYGON ((592769.082 5466921.590, 600769.082 5...",8
129319,4240,10.3333,49.3833,440,463,10000,perm_grass,,111,2021,224,2021-08-12,M,"POLYGON ((592769.082 5466921.590, 600769.082 5...",8
129322,4240,10.3333,49.3833,440,463,10000,perm_grass,,131,2021,228,2021-08-16,M,"POLYGON ((592769.082 5466921.590, 600769.082 5...",8


In [16]:
envelopes_gdf.tail()

Unnamed: 0,s_id,lon,lat,alt,alt_dem,gss_id,genus,species,phase_id,year,day,date_x,Label,geometry,month
129324,5456,13.75,50.7333,875,864,10000,perm_grass,,131,2021,244,2021-09-01,M,"POLYGON ((831136.405 5626933.921, 839136.405 5...",9
129325,961,7.5,52.7667,30,33,10000,perm_grass,,111,2021,257,2021-09-14,M,"POLYGON ((394795.749 5843373.189, 402795.749 5...",9
129326,20595,8.5,49.4667,95,86,10000,perm_grass,,131,2021,258,2021-09-15,M,"POLYGON ((459771.720 5475458.856, 467771.720 5...",9
129327,19312,8.36667,48.6,490,622,10000,perm_grass,,131,2021,285,2021-10-12,M,"POLYGON ((449306.954 5379184.811, 457306.954 5...",10
129328,8197,10.9833,48.2333,525,504,10000,perm_grass,,111,2021,289,2021-10-16,M,"POLYGON ((643273.716 5340132.606, 651273.716 5...",10


In [17]:
# Group observations by s_id and month, and calculate the label with the maximum frequency for each group
#freqresults_df = envelopes_gdf.groupby(['s_id', pd.Grouper(key='month'), pd.Grouper(key='year')])['Label'].apply(lambda x: x.value_counts().index[0]).reset_index()
freqresults_df = envelopes_gdf.groupby(['s_id', pd.Grouper(key='month'), pd.Grouper(key='year')])\
    .apply(lambda x: pd.Series({'Label': x['Label'].value_counts().index[0],
                                'phase_id': x['phase_id'].value_counts().index[0]}))\
    .reset_index()

# Rename the column with the label
freqresults_df = freqresults_df.rename(columns={'Label': 'max_label', 'phase_id': 'max_phase_id'})


In [18]:
freqresults_df.head()

Unnamed: 0,s_id,month,year,max_label,max_phase_id
0,21,2,2017,DBL,60
1,21,2,2020,DBL,60
2,21,2,2021,DBL,60
3,21,3,2017,DBL,60
4,21,3,2018,DBL,60


In [19]:
# Check if it works correctly
freqresults_df.sort_values(['year', 'month']).head(50)

Unnamed: 0,s_id,month,year,max_label,max_phase_id
6853,1554,1,2017,DBL,60
15589,3120,1,2017,DBL,60
27370,5363,1,2017,DBL,60
35846,19544,1,2017,M,182
0,21,2,2017,DBL,60
96,43,2,2017,DBL,60
106,66,2,2017,DBL,60
120,94,2,2017,DBL,60
162,106,2,2017,DBL,60
366,148,2,2017,DBL,60


In [20]:
freqresults_df['max_label'].value_counts()

DBL    37914
M       1744
EC        16
Name: max_label, dtype: int64

In [21]:
freqresults_df[freqresults_df['max_label'] == 'M'].head(10)

Unnamed: 0,s_id,month,year,max_label,max_phase_id
18,21,6,2017,M,131
19,21,6,2018,M,131
20,21,6,2019,M,131
59,32,6,2017,M,131
60,32,6,2018,M,131
62,32,6,2020,M,131
113,66,7,2017,M,131
174,110,3,2021,M,182
186,110,6,2018,M,131
187,110,6,2019,M,131


In [22]:
freqresults_df['max_phase_id'].value_counts()

60     13869
286    10477
205     6161
11      4952
95      2578
131     1122
182      385
111      113
10        17
Name: max_phase_id, dtype: int64

___________

## Spatial filter

In [23]:
s2_image_gdf = addMonths(s2_image_gdf)

In [24]:
# Convert the envelopes_gdf to a list to work with the functions
s2_image_gdf_list = s2_image_gdf.geometry.tolist()

# This is used to save the indices and then extract the targets directly from the gdf
s2_image_gdf_index_list = s2_image_gdf.index.values.tolist()


In [25]:
s2_image_gdf_index_list[150:160]

[49560, 49561, 49562, 49563, 49564, 49565, 49566, 49567, 49568, 49569]

In [26]:
s2_image_gdf.iloc[150:160]

Unnamed: 0,s_id,lon,lat,alt,alt_dem,gss_id,genus,species,phase_id,year,day,date_x,Label,geometry,month
49560,5714,12.1167,51.6,85,72,1030100,Aesculus,Aesculus hippocastanum,11,2019,92,2019-04-02,DBL,"POLYGON ((711840.653 5717153.292, 719840.653 5...",4
49561,5756,12.0333,52.5,20,32,1010300,Acer,Acer platanoides,60,2019,92,2019-04-02,DBL,"POLYGON ((701884.866 5816977.455, 709884.866 5...",4
49562,5764,12.55,51.6833,150,158,1580100,Fraxinus,Fraxinus excelsior,60,2019,92,2019-04-02,DBL,"POLYGON ((741389.232 5727782.823, 749389.232 5...",4
49563,5935,11.15,52.85,25,39,1010300,Acer,Acer platanoides,60,2019,92,2019-04-02,DBL,"POLYGON ((640777.689 5853750.012, 648777.689 5...",4
49564,6090,11.0833,51.4,380,400,1030100,Aesculus,Aesculus hippocastanum,11,2019,92,2019-04-02,DBL,"POLYGON ((640916.759 5692367.343, 648916.759 5...",4
49565,6216,10.7,51.1833,240,222,1580100,Fraxinus,Fraxinus excelsior,60,2019,92,2019-04-02,DBL,"POLYGON ((614813.970 5667582.414, 622813.970 5...",4
49566,6248,10.3333,51.1667,400,445,1010300,Acer,Acer platanoides,60,2019,92,2019-04-02,DBL,"POLYGON ((589219.692 5665207.793, 597219.692 5...",4
49567,6360,11.0833,51.2167,140,126,2290100,Sorbus,Sorbus aucuparia,11,2019,92,2019-04-02,DBL,"POLYGON ((641495.452 5671985.544, 649495.452 5...",4
49568,8133,8.73333,52.5,40,31,1180100,Betula,Betula pendula,60,2019,92,2019-04-02,DBL,"POLYGON ((477897.639 5812685.428, 485897.639 5...",4
49569,8187,11.5667,49.35,585,575,1510100,Fagus,Fagus sylvatica,11,2019,92,2019-04-02,DBL,"POLYGON ((682406.583 5465533.574, 690406.583 5...",4


In [27]:
output_dir = Path(r'C:\Users\Kostas\Desktop\GIMA\Module_7\Data\filtered_patches_GEE')

In [28]:
"""Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""

def getFeatures(gdf):
        return [json.loads(gdf.to_json())['features'][0]['geometry']]

In [29]:
'''
This function reads the envelope list and a raster, checks if the polygons are fully contained in the raster 
and returns 5 lists, 4 with the boundary coordinates for all the envelopes that are fully contained in the raster 
and one of their indexes from the full_index_list.
'''

def getContainedEnvelopeCoords (raster, envelope_list, full_index_list):
    with rio.open(raster, driver='GTiff') as src:
        raster_extent = src.bounds
        
        # List initialization
        minx_list = []
        miny_list = []
        maxx_list = []
        maxy_list = []
        index_list = []
        for i in range(0, len(envelope_list)):
            poly_extent = envelope_list[i].bounds

            # Check if the polygon is fully inside the raster's extent
            if (poly_extent[0] >= raster_extent[0] and poly_extent[2] <= raster_extent[2] and
                poly_extent[1] >= raster_extent[1] and poly_extent[3] <= raster_extent[3]):
                    minx_list.append(poly_extent[0])
                    miny_list.append(poly_extent[1])
                    maxx_list.append(poly_extent[2])
                    maxy_list.append(poly_extent[3])
                    index_list.append(full_index_list[i])
    return minx_list, miny_list, maxx_list, maxy_list, index_list

In [30]:
for poly in s2_image_gdf_list:
    print(poly)
    break

for i in range(0, len(s2_image_gdf_list)):
    print(i)
    break


POLYGON ((594387.8070813899 5924917.6078205975, 602387.8070813899 5924917.6078205975, 602387.8070813899 5932917.6078205975, 594387.8070813899 5932917.6078205975, 594387.8070813899 5924917.6078205975))
0


In [31]:
'''
This function receives a raster file (.tif) and the boundary coordinates for a polygon. 
It then clips the raster to the extent of the polygon. 
The polygon has to intersect the raster for the operation to be completed
'''

from shapely.geometry import box
from rasterio.mask import mask

def exportImage(raster, output_path, minx, miny, maxx, maxy):
    # open the raster file (Single Band)
    data = rio.open(raster, driver='GTiff')

    # Create a bounding box from the polygon min-max coordinates    
    bbox = box(minx, miny, maxx, maxy)
    # Create a geodataframe with a single polygon so that it can be used with rasterio
    geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs='32632')
    # Transform the geodataframe to a GeoJSON-like object that can be used as an input in the rasterio mask function
    coords = getFeatures(geo)
    #print(coords)
    
    # Mask and crop the raster AOI where polygon overlaps the whole raster
    out_img, out_transform = mask(data, shapes=coords, crop=True)
    # Define resolution and more
    out_profile = data.profile.copy()
    
    out_profile.update({'driver':'GTiff', 'width': out_img.shape[2],'height': out_img.shape[1], 'transform': out_transform})
    
    # Write the extracted raster patch to a file
    with rio.open(output_path, 'w', **out_profile) as dst:
        dst.write(out_img)
    
    # data.close()
    # data = None

In [32]:
minx_list, miny_list, maxx_list, maxy_list, contained_index_list = getContainedEnvelopeCoords(s2_image_path, s2_image_gdf_list, s2_image_gdf_index_list)

In [33]:
contained_index_list[30:35]

[49440, 49442, 49447, 49448, 49449]

In [38]:
envelopes_gdf.loc[49440]



KeyboardInterrupt



In [35]:
# Test to check if the index list works. It works.

for test in range(0, 10):
    print('Coordinates: ', minx_list[test], miny_list[test], maxx_list[test], maxy_list[test], '\nIndex: \t', contained_index_list[test])
    print("---------")

print("Check if the elements on the index and coord lists are the ones from the actual gdf")
s2_image_gdf.loc[49419]

Coordinates:  594387.8070813899 5924917.6078205975 602387.8070813899 5932917.6078205975 
Index: 	 49410
---------
Coordinates:  615444.3410472146 6008888.454367091 623444.3410472146 6016888.454367091 
Index: 	 49411
---------
Coordinates:  578881.1304628432 5926478.618106418 586881.1304628432 5934478.618106418 
Index: 	 49412
---------
Coordinates:  462212.8066672371 5835016.122399375 470212.8066672371 5843016.122399375 
Index: 	 49413
---------
Coordinates:  467747.3246259597 5818295.20291999 475747.3246259597 5826295.20291999 
Index: 	 49414
---------
Coordinates:  546005.9353980366 5790659.128762336 554005.9353980366 5798659.128762336 
Index: 	 49415
---------
Coordinates:  546005.9353980366 5790659.128762336 554005.9353980366 5798659.128762336 
Index: 	 49416
---------
Coordinates:  530095.2360452933 5790523.630913248 538095.2360452933 5798523.630913248 
Index: 	 49417
---------
Coordinates:  526616.562715307 5801624.127417376 534616.562715307 5809624.127417376 
Index: 	 49418
----

s_id                                                      460
lon                                                       9.4
lat                                                     52.45
alt                                                        45
alt_dem                                                    46
gss_id                                                1580100
genus                                                Fraxinus
species                                    Fraxinus excelsior
phase_id                                                   60
year                                                     2019
day                                                        92
date_x                                    2019-04-02 00:00:00
Label                                                     DBL
geometry    POLYGON ((523183.964558654 5807165.645333859, ...
month                                                       4
Name: 49419, dtype: object

In [42]:
# Create a list with the contained s_id's. It works
contained_s_id_list = []
for i in range(0, len(contained_index_list)):
    sid = s2_image_gdf.loc[contained_index_list[i], 's_id']
    contained_s_id_list.append(sid)


## Combination of everything to mine the labels

In [52]:
freqresults_df.head()

Unnamed: 0,s_id,month,year,max_label,max_phase_id
0,21,2,2017,DBL,60
1,21,2,2020,DBL,60
2,21,2,2021,DBL,60
3,21,3,2017,DBL,60
4,21,3,2018,DBL,60


# CONTINUE FROM HERE

In [63]:
for station in contained_s_id_list:
    a = freqresults_df.loc[freqresults_df['s_id'] == station, 'max_label']
    print(a)

169    DBL
170    DBL
171    DBL
172    DBL
173    DBL
174      M
175    DBL
176    DBL
177    DBL
178    DBL
179    DBL
180    DBL
181    DBL
182    DBL
183    DBL
184    DBL
185    DBL
186      M
187      M
188    DBL
189    DBL
190    DBL
191    DBL
192    DBL
193    DBL
194    DBL
195    DBL
196    DBL
197    DBL
198    DBL
199    DBL
200    DBL
201    DBL
202    DBL
Name: max_label, dtype: object
366    DBL
367    DBL
368    DBL
369    DBL
370    DBL
371    DBL
372    DBL
373    DBL
374    DBL
375    DBL
376    DBL
377    DBL
378    DBL
379    DBL
380    DBL
381    DBL
382    DBL
383    DBL
384    DBL
385      M
386      M
387    DBL
388    DBL
389    DBL
390    DBL
391    DBL
392    DBL
393    DBL
394    DBL
395    DBL
396    DBL
397    DBL
398    DBL
399    DBL
400    DBL
401    DBL
402    DBL
403    DBL
404    DBL
405    DBL
406    DBL
407    DBL
408    DBL
409    DBL
410    DBL
Name: max_label, dtype: object
1088    DBL
1089    DBL
1090    DBL
1091    DBL
1092    DBL
1093    D

KeyboardInterrupt: 

In [55]:
# # Test with freqresult filter
# f = {'s_id': [5363, 1554, 3120, 2021, 1521], 'month': [2, 4, 4, 4, 4], 'year': [2019, 2019, 2019, 2019, 2020], 'max_label': ['DBL', 'EC', 'M', 'DBL','DBL']}
# freqtest_df = pd.DataFrame(data=f)
# freqtest_df.head()

Unnamed: 0,s_id,month,year,max_label
0,5363,2,2019,DBL
1,1554,4,2019,EC
2,3120,4,2019,M
3,2021,4,2019,DBL
4,1521,4,2020,DBL


In [56]:
result_list = []
print(f'Getting all the maxfreq labels for month {s2month} and year {s2year}')
for i in range(0, len(contained_s_id_list)):
    result = freqtest_df.loc[(freqtest_df['s_id'].isin(contained_s_id_list)) & (freqtest_df['year'] == s2year) & (freqtest_df['month'] == s2month), 'max_label']

    result_list.append(result)

Getting all the maxfreq labels for month 4 and year 2019


In [None]:
#freqtest_df[freqtest_df['s_id'].isin(s_id_list_test)]
freqtest_df[(freqtest_df['s_id'].isin(s_id_list_test)) & (freqtest_df['year'] == s2year) & (freqtest_df['month'] == s2month)]


In [None]:
freqresults_df['max_label'].value_counts()

In [None]:
freqresults_df[freqresults_df['max_label'] == 'EC']

In [None]:
# I think that this works great! Maybe not. I see only DBL
freqresults_df[(freqresults_df['s_id'].isin(s_id_list_test)) & (freqresults_df['year'] == s2year) & (freqresults_df['month'] == s2month)]

In [None]:
print(result_list[0])

In [None]:
for x in result_list:
    print(x)

In [None]:
def unique(list1):
    x = np.array(list1)
    print(np.unique(x))

unique(result_list)

In [None]:
print("Creating patches for the image: ", Path(s2_image_path).name)

# Iterating over each envelope in the gdf

for i in range(0, len(minx_list)):
    # Get the station id for the station with the index in the i-th position
    station_id = s2_image_gdf.loc[index_list[i], 's_id']
    output_name = os.path.join(output_dir, Path(s2_image_path).stem + f'_station_{station_id}_index_{index_list[i]}.tif')
    print(f"\t Patch {i+1} out of {len(minx_list) + 1}")
    exportImage(s2_image_path, output_name, minx_list[i], miny_list[i], maxx_list[i], maxy_list[i])
print('Patch creation completed!') 



    

In [None]:
envelopes_gdf[(envelopes_gdf['year'] == 2019) & (envelopes_gdf['month'] == 4) & (envelopes_gdf['Label'] == 'EC')]