# Patch Classifier Query
We are now running both a pixel and patch classifier on Descartes. This notebook takes in a set of patch classifier outputs along with a set of pixel classifier candidates. This notebook finds the intersection between candidate points and patch classifier predictions above a given threshold.

In [None]:
import geopandas as gpd
# pygeos is required to use geopandas spatial indexing
gpd.options.use_pygeos = True

In [None]:
region_name = 'africa_new'
start = '2021-01-01'
end = '2022-07-01'

In [None]:
patch_model_version = '3.0'
patch_classifier_fname= f'{region_name}_patch_weak_labels_3.0_{start}_{end}_stride_8'
patch = gpd.read_file(f'../data/model_outputs/patch_classifier/{patch_classifier_fname}.geojson')

In [None]:
pixel_model_version = '0.0.11'
threshold = 0.6
min_sigma = 5.0
pixel_classifier_fname = f'{region_name}_v0.0.11_{start}_{end}_blobs_thresh_{threshold}_min-sigma_{min_sigma}_area-thresh_0.0025_band-median'
pixel = gpd.read_file(f'../data/model_outputs/candidate_sites/{pixel_model_version}/{pixel_classifier_fname}.geojson')

In [None]:
threshold = 0.3
patch_threshold = patch[patch['mean'] > threshold]
patch_index = patch_threshold['geometry'].sindex

In [None]:
overlap = []
for candidate in pixel['geometry']:
    if len(patch_index.query(candidate)) > 0:
        overlap.append(True)
    else:
        overlap.append(False)
union = pixel[overlap]
print(f"{len(union)} ({len(union) / len(pixel):.0%}) candidate points intersect with patch classifier predictions greater than {threshold}")
union

In [None]:
file_path = f'../data/model_outputs/candidate_sites/{pixel_model_version}/{pixel_classifier_fname}_patch_v{patch_model_version}_threshold_{threshold}_intersection.geojson'
union.to_file(file_path, driver='GeoJSON')
print("Saved to", file_path)

# Batched run

In [None]:
roi_list = countries = ['Portugal', 'Spain', 'Gibraltar', 'France', 'Monaco', 'Italy', 'san_marino', 'malta', 'slovenia', 'croatia', 'macedonia', 'montenegro', 'greece', 'iraq', 'syria', 'lebanon', 'jordan', 'israel']
date_list = [['2021-01-01', '2022-07-01']]

pixel_model_version = '0.0.11'
threshold = 0.6
min_sigma = 5.0

pred_threshold = 0.6
min_sigma = 5.0
band = 'median'
patch_model_version = '3.0'
patch_threshold_value = 0.3

for roi in roi_list:
    print("Processing", roi)
    for dates in date_list:
        start = dates[0]
        end = dates[1]
        try:
            patch_classifier_fname= f'{roi}_patch_weak_labels_3.0_{start}_{end}_stride_8'
            patch = gpd.read_file(f'../data/model_outputs/patch_classifier/{patch_classifier_fname}.geojson')
            patch_threshold = patch[patch['mean'] > patch_threshold_value]
            patch_index = patch_threshold['geometry'].sindex
            
            pixel_classifier_fname = f'{roi}_v0.0.11_{start}_{end}_blobs_thresh_{threshold}_min-sigma_{min_sigma}_area-thresh_0.0025_band-median'
            pixel = gpd.read_file(f'../data/model_outputs/candidate_sites/{pixel_model_version}/{pixel_classifier_fname}.geojson')

            overlap = []
            for candidate in pixel['geometry']:
                if len(patch_index.query(candidate)) > 0:
                    overlap.append(True)
                else:
                    overlap.append(False)
            union = pixel[overlap]

            print(f"{len(union)} ({len(union) / len(pixel):.0%}) candidate points intersect with patch classifier predictions greater than {patch_threshold_value}")
            file_path = f'../data/model_outputs/candidate_sites/{pixel_model_version}/{pixel_classifier_fname}_patch_v{patch_model_version}_threshold_{patch_threshold_value}_intersection.geojson'
            union.to_file(file_path, driver='GeoJSON')
            print("Saved to", file_path)
        except:
            print("No data found for", roi)

In [None]:
roi_list = countries = ['Portugal', 'Spain', 'Gibraltar', 'France', 'Monaco', 'Italy', 'san_marino', 'malta', 'slovenia', 'croatia', 'macedonia', 'montenegro', 'greece', 'iraq', 'syria', 'lebanon', 'jordan', 'israel']
date_list = [['2021-01-01', '2022-07-01']]

pixel_model_version = '0.0.11'
threshold = 0.6
min_sigma = 5.0

pred_threshold = 0.6
min_sigma = 5.0
band = 'median'
patch_model_version = '3.0'
patch_threshold_value = 0.3

candidate_gdfs = []

for roi in roi_list:
    try:
        patch_classifier_fname= f'{roi}_patch_weak_labels_3.0_{start}_{end}_stride_8'
        patch_threshold = patch[patch['mean'] > patch_threshold_value]
        patch_index = patch_threshold['geometry'].sindex
        
        pixel_classifier_fname = f'{roi}_v0.0.11_{start}_{end}_blobs_thresh_{threshold}_min-sigma_{min_sigma}_area-thresh_0.0025_band-median'
        file_path = f'../data/model_outputs/candidate_sites/{pixel_model_version}/{pixel_classifier_fname}_patch_v{patch_model_version}_threshold_{patch_threshold_value}_intersection.geojson'

        candidate_gdfs.append(gpd.read_file(file_path))
    except:
        print("No data found for", roi)


In [None]:
# merge candidate gdfs into a single geodataframe
import pandas as pd
candidate_gdf = gpd.GeoDataFrame(pd.concat(candidate_gdfs))

In [None]:
file_path

In [None]:
candidate_gdf.to_file('../data/model_outputs/candidate_sites/0.0.11/mediterranean_combined_v0.0.11_2021-01-01_2022-07-01_blobs_thresh_0.6_min-sigma_5.0_area-thresh_0.0025_band-median_patch_v3.0_threshold_0.3_intersection.geojson', driver='GeoJSON')