### Deploy semantic search using with finetuned model 
The deployment architecture includes: 
- Choose a pretrain BERT model, here we use all-MiniLM-L6-v2 model
- Save the ML models in S3 bucket
- Host the ML models using SageMaker endpoints 
- Create Vector index and load data into the index 
- Create API gateway handels queries from web applications and pass it to lambda 
- Create a Lambda function to call SageMaker endpoints to generate embeddings from user query, and send the query results back to API gateway 
- API gateway sends the search results to frontend, and return search results to the users 

![Semantic_search_finetuned_fullstack](image/Semantic_search_finetune_fullstack.png)

In [1]:
import torch 
print(torch.__version__)

2.1.0


In [3]:
#installed in the previous notebook
!pip install -q boto3
!pip install -q requests
!pip install -q requests-aws4auth
!pip install -q opensearch-py
!pip install -q tqdm
!pip install -q install transformers[torch]
!pip install -q transformers
!pip install -q sentence-transformers rank_bm25
!pip install -q sagemaker

### 1.Preprocess and embed the text 

In [11]:
import pandas as pd 
from sentence_transformers import SentenceTransformer, util
import boto3
import torch
import io
import json
import os 
from tqdm import tqdm
#from io import BytesIO
from inference import model_fn, predict_fn

In [5]:
# Load metadata
def read_parquet_from_s3_as_df(region, s3_bucket, s3_key):
    """
    Load a Parquet file from an S3 bucket into a pandas DataFrame.

    Parameters:
    - region: AWS region where the S3 bucket is located.
    - s3_bucket: Name of the S3 bucket.
    - s3_key: Key (path) to the Parquet file within the S3 bucket.

    Returns:
    - df: pandas DataFrame containing the data from the Parquet file.
    """

    # Setup AWS session and clients
    session = boto3.Session(region_name=region)
    s3 = session.resource('s3')

    # Load the Parquet file as a pandas DataFrame
    object = s3.Object(s3_bucket, s3_key)
    body = object.get()['Body'].read()
    df = pd.read_parquet(io.BytesIO(body))
    return df


# Upload the duplicate date to S3 as a parquet file 
def upload_df_to_s3_as_parquet(df, bucket_name, file_key):
    # Save DataFrame as a Parquet file locally
    parquet_file_path = 'temp.parquet'
    df.to_parquet(parquet_file_path)

    # Create an S3 client
    s3_client = boto3.client('s3')

    # Upload the Parquet file to S3 bucket
    try:
        response = s3_client.upload_file(parquet_file_path, bucket_name, file_key)
        os.remove(parquet_file_path)
        print(f'Uploading {file_key} to {bucket_name} as parquet file')
        # Delete the local Parquet file
        return True
    except Exception as e:
        print(e)
        return False

# Create new column 'organization_en' required by the API JSON response 
def extract_organisation_en(contact_str):
    try:
        # Parse the stringified JSON into Python objects
        contact_data = json.loads(contact_str)
        # If the parsed data is a list, iterate through it
        if isinstance(contact_data, list):
            for item in contact_data:
                # Check if 'organisation' and 'en' keys exist
                if 'organisation' in item and 'en' in item['organisation']:
                    return item['organisation']['en']
        elif isinstance(contact_data, dict):
            # If the data is a dictionary, extract 'organisation' in 'en' directly
            return contact_data.get('organisation', {}).get('en', None)
    except json.JSONDecodeError:
        # Handle cases where the contact string is not valid JSON
        return None
    except Exception as e:
        # Catch-all for any other unexpected errors
        return f"Error: {str(e)}"


# Text preprocess
def preprocess_records_into_text(df):
    selected_columns = ['features_properties_title_en','features_properties_description_en','features_properties_keywords_en']
    df = df[selected_columns]
    return df.apply(lambda x: f"{x['features_properties_title_en']}\n{x['features_properties_description_en']}\nkeywords:{x['features_properties_keywords_en']}",axis=1 )

Note, change the parquet bucket name to'-dev', '-stage', or '-prod' based on the environment you are running the file.

In [6]:
#1) Step1: Load the data 
df_parquet = read_parquet_from_s3_as_df('ca-central-1', 'webpresence-geocore-geojson-to-parquet-stage', 'records.parquet')
df_sentinel1 = read_parquet_from_s3_as_df('ca-central-1', 'webpresence-geocore-geojson-to-parquet-stage', 'sentinel1.parquet')
df = pd.concat([df_parquet, df_sentinel1], ignore_index=True)

#2) Step2: Clean the data  
col_names_list = [
    'features_properties_id','features_geometry_coordinates','features_properties_title_en',
    'features_properties_description_en','features_properties_date_published_date',
    'features_properties_keywords_en','features_properties_options','features_properties_contact',
    'features_properties_topicCategory','features_properties_date_created_date',
    'features_properties_spatialRepresentation','features_properties_type',
    'features_properties_temporalExtent_begin','features_properties_temporalExtent_end',
    'features_properties_graphicOverview','features_properties_language','features_popularity',
    'features_properties_sourceSystemName','features_properties_eoCollection',
    'features_properties_eoFilters'
]
df_en = df[col_names_list]
df_en['organisation_en'] = df_en['features_properties_contact'].apply(extract_organisation_en)

# Create a new column 'temporalExtent' as a dictionary of {'begin': ..., 'end': ...}
values_to_replace = {'Present': None, 'Not Available; Indisponible': None}
columns_to_replace = ['features_properties_temporalExtent_begin', 'features_properties_temporalExtent_end']
df_en[columns_to_replace] = df_en[columns_to_replace].replace(values_to_replace)

df_en['temporalExtent'] = df_en.apply(lambda row: {'begin': row['features_properties_temporalExtent_begin'], 'end': row['features_properties_temporalExtent_end']}, axis=1)
df_en = df_en.drop(columns =['features_properties_temporalExtent_begin', 'features_properties_temporalExtent_end'])

values_to_replace = {'Not Available; Indisponible': None} # modifies dates to acceptable values
columns_to_replace = ['features_properties_date_published_date', 'features_properties_date_created_date']
df_en[columns_to_replace] = df_en[columns_to_replace].replace(values_to_replace)

#3) Step 3: Preprocess text 
df_en['text'] = preprocess_records_into_text(df_en)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_en['organisation_en'] = df_en['features_properties_contact'].apply(extract_organisation_en)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_en[columns_to_replace] = df_en[columns_to_replace].replace(values_to_replace)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_en['temporalExtent'] = df_

In [7]:
df_en.describe()
print(type(df_en['text'].head(6)[1]))
print(df_en['text'].head(6)[1])


<class 'str'>
National Road Network - NRN - GeoBase Series
Notice - Format decommissioning\n\nGML (Geography Markup Language) and KML (Keyhole Markup Language) distribution formats will no longer be included in NRN releases produced after April 1, 2022. After this date, NRN versions will still be available in GeoPackage and ESRI Shapefile formats. This change does not impact the currently available NRN releases on this portal.\n\nThe NRN product is distributed in the form of thirteen provincial or territorial datasets and consists of two linear entities (Road Segment and Ferry Connection Segment) and three punctual entities (Junction, Blocked Passage, Toll Point) with which is associated a series of descriptive attributes such as, among others: First House Number, Last House Number, Street Name Body, Place Name, Functional Road Class, Pavement Status, Number Of Lanes, Structure Type, Route Number, Route Name, Exit Number. The development of the NRN was realized by means of individual m

Before running step 4, download the finetuned model data (here 'all-mpnet-base-v2-huggingface') from the S3 bucket 'cloudformation-templates/lambda/semantic-search/model/' and upload the model data to the project folder 'semantic-search-with-amazon-opensearch/model' 

In [8]:
# Step 4: Embedding text 
tqdm.pandas()
model_directory ="/home/ec2-user/SageMaker/semantic-search-with-amazon-opensearch/model/all-mpnet-base-v2-mpf-huggingface-reupload"
model = model_fn(model_directory)
df_en['vector'] = df_en['text'].progress_apply(lambda x: predict_fn({"inputs": x}, model))

100%|██████████| 61302/61302 [3:22:36<00:00,  5.04it/s]  


In [9]:
vector = df_en['vector'].head(6) 
#print(vector[0])
print(type(vector[0]))
print(len(vector[0]))
print(df_en['vector'].shape)

<class 'list'>
768
(61302,)


In [37]:
df_en.head(4)

Unnamed: 0,features_properties_id,features_geometry_coordinates,features_properties_title_en,features_properties_description_en,features_properties_date_published_date,features_properties_keywords_en,features_properties_options,features_properties_contact,features_properties_topicCategory,features_properties_date_created_date,...,features_properties_graphicOverview,features_properties_language,features_popularity,features_properties_sourceSystemName,features_properties_eoCollection,features_properties_eoFilters,organisation_en,temporalExtent,text,vector
0,d3881c4c-650d-4070-bf9b-1e00aabf0a1d,"[[[-143, 39.05], [-47, 39.05], [-47, 85], [-14...",Canadian Hydrographic Service Non-Navigational...,"**CHS NONNA data has been updated: April 21, 2...",2018-10-11,"Bathymetry, Depth, Hydrography","[{""url"": ""https://data.chs-shc.ca/"", ""protocol...","[{""individual"": ""null"", ""position"": {""en"": ""nu...","oceans, inlandWaters",2018-10-01,...,"[{""overviewFileName"": ""https://pacgis01.dfo-mp...",eng; CAN,3929,cgp,,[],Government of Canada; Fisheries and Oceans Can...,"{'begin': <NA>, 'end': <NA>}",Canadian Hydrographic Service Non-Navigational...,"[0.026849709451198578, 0.023685483261942863, -..."
1,3d282116-e556-400c-9306-ca1a3cada77f,"[[[-141.0027151, 41.7], [-52.6, 41.7], [-52.6,...",National Road Network - NRN - GeoBase Series,Notice - Format decommissioning\n\nGML (Geogra...,2015,"Canada, Geographic Infrastructure, NRN, Nation...","[{""url"": ""https://geo.statcan.gc.ca/geo_wa/ser...","[{""individual"": ""null"", ""position"": {""en"": ""nu...",transportation,2010,...,"[{""overviewFileName"": ""http://ftp.geogratis.gc...",eng; CAN,1991,cgp,,[],Government of Canada; Statistics Canada,"{'begin': '1979-07', 'end': '2020-05'}",National Road Network - NRN - GeoBase Series\n...,"[0.056445203721523285, 0.06621260195970535, 0...."
2,b6567c5c-8339-4055-99fa-63f92114d9e4,"[[[-141.003, 41.6755], [-52.6174, 41.6755], [-...",First Nations Location,The First Nations geographic location dataset ...,2015-05-01,"First Nation, Band, Aboriginal, Indian and Nor...","[{""url"": ""https://data.aadnc-aandc.gc.ca/geoma...","[{""individual"": ""null"", ""position"": {""en"": ""Me...","location, society",2007-06-01,...,"[{""overviewFileName"": ""https://data.aadnc-aand...",eng; CAN,1881,cgp,,[],Government of Canada;Indigenous Services Canad...,"{'begin': '2007-06-01', 'end': <NA>}",First Nations Location\nThe First Nations geog...,"[-0.0003571372071746737, 0.02866480126976967, ..."
3,522b07b9-78e2-4819-b736-ad9208eb1067,"[[[-141.003, 41.6755], [-52.6174, 41.6755], [-...",Aboriginal Lands of Canada Legislative Boundaries,The Aboriginal Lands of Canada Legislative Bou...,2017-07-28,"Canada Lands, Indian reserves, Land management...","[{""url"": ""https://proxyinternet.nrcan-rncan.gc...","[{""individual"": ""null"", ""position"": {""en"": ""nu...",boundaries,2016-02-03,...,[],eng; CAN,1832,cgp,,[],Government of Canada; Natural Resources Canada...,"{'begin': '2004-04-02', 'end': <NA>}",Aboriginal Lands of Canada Legislative Boundar...,"[0.03291967511177063, -0.02788965404033661, -0..."


In [12]:
# Step 5 Upload the embeddings as a parquet file to S3 bucket 
upload_df_to_s3_as_parquet(df=df_en, bucket_name='webpresence-nlp-data-preprocessing-stage', file_key='semantic_search_embeddings-mpnet-mpf.parquet') 

Uploading semantic_search_embeddings-mpnet-mpf.parquet to webpresence-nlp-data-preprocessing-stage as parquet file


True

### 2. Create OpenSearch index and load text/vector data into the index 


In [45]:
# import json
# import time
# import boto3

# from tqdm import tqdm
# from urllib.parse import urlparse
# from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from opensearch import get_awsauth_from_secret, create_opensearch_connection, delete_aos_index_if_exists, load_data_to_opensearch_index
from Preprocess_and_embed_text import read_parquet_from_s3_as_df

In [39]:
#Optional: read the embedding data from the S3 bucket 
# df_en = read_parquet_from_s3_as_df('ca-central-1', 'webpresence-nlp-data-preprocessing-stage', 'semantic_search_embeddings-mpnet-mpf.parquet')

In [46]:
df_en.head(4)

Unnamed: 0,features_properties_id,features_geometry_coordinates,features_properties_title_en,features_properties_description_en,features_properties_date_published_date,features_properties_keywords_en,features_properties_options,features_properties_contact,features_properties_topicCategory,features_properties_date_created_date,...,features_properties_graphicOverview,features_properties_language,features_popularity,features_properties_sourceSystemName,features_properties_eoCollection,features_properties_eoFilters,organisation_en,temporalExtent,text,vector
0,d3881c4c-650d-4070-bf9b-1e00aabf0a1d,"[[[-143, 39.05], [-47, 39.05], [-47, 85], [-14...",Canadian Hydrographic Service Non-Navigational...,"**CHS NONNA data has been updated: April 21, 2...",2018-10-11,"Bathymetry, Depth, Hydrography","[{""url"": ""https://data.chs-shc.ca/"", ""protocol...","[{""individual"": ""null"", ""position"": {""en"": ""nu...","oceans, inlandWaters",2018-10-01,...,"[{""overviewFileName"": ""https://pacgis01.dfo-mp...",eng; CAN,3929,cgp,,[],Government of Canada; Fisheries and Oceans Can...,"{'begin': <NA>, 'end': <NA>}",Canadian Hydrographic Service Non-Navigational...,"[0.026849709451198578, 0.023685483261942863, -..."
1,3d282116-e556-400c-9306-ca1a3cada77f,"[[[-141.0027151, 41.7], [-52.6, 41.7], [-52.6,...",National Road Network - NRN - GeoBase Series,Notice - Format decommissioning\n\nGML (Geogra...,2015,"Canada, Geographic Infrastructure, NRN, Nation...","[{""url"": ""https://geo.statcan.gc.ca/geo_wa/ser...","[{""individual"": ""null"", ""position"": {""en"": ""nu...",transportation,2010,...,"[{""overviewFileName"": ""http://ftp.geogratis.gc...",eng; CAN,1991,cgp,,[],Government of Canada; Statistics Canada,"{'begin': '1979-07', 'end': '2020-05'}",National Road Network - NRN - GeoBase Series\n...,"[0.056445203721523285, 0.06621260195970535, 0...."
2,b6567c5c-8339-4055-99fa-63f92114d9e4,"[[[-141.003, 41.6755], [-52.6174, 41.6755], [-...",First Nations Location,The First Nations geographic location dataset ...,2015-05-01,"First Nation, Band, Aboriginal, Indian and Nor...","[{""url"": ""https://data.aadnc-aandc.gc.ca/geoma...","[{""individual"": ""null"", ""position"": {""en"": ""Me...","location, society",2007-06-01,...,"[{""overviewFileName"": ""https://data.aadnc-aand...",eng; CAN,1881,cgp,,[],Government of Canada;Indigenous Services Canad...,"{'begin': '2007-06-01', 'end': <NA>}",First Nations Location\nThe First Nations geog...,"[-0.0003571372071746737, 0.02866480126976967, ..."
3,522b07b9-78e2-4819-b736-ad9208eb1067,"[[[-141.003, 41.6755], [-52.6174, 41.6755], [-...",Aboriginal Lands of Canada Legislative Boundaries,The Aboriginal Lands of Canada Legislative Bou...,2017-07-28,"Canada Lands, Indian reserves, Land management...","[{""url"": ""https://proxyinternet.nrcan-rncan.gc...","[{""individual"": ""null"", ""position"": {""en"": ""nu...",boundaries,2016-02-03,...,[],eng; CAN,1832,cgp,,[],Government of Canada; Natural Resources Canada...,"{'begin': '2004-04-02', 'end': <NA>}",Aboriginal Lands of Canada Legislative Boundar...,"[0.03291967511177063, -0.02788965404033661, -0..."


Under the cloudformation template 'geocore-semantic-search-with-opensearch-stage; Output tab, find the values for region, aos_host, and os_secret_id

In [47]:
# Create a opensearch connection 
# region = environ['MY_AWS_REGION']
# aos_host = environ['OS_ENDPOINT'] 
# os_secret_id = environ['OS_SECRET_ID']

region = "ca-central-1"
aos_host = "search-semantic-search-arieibeskhrn6vn2qd7gf5br7q.ca-central-1.es.amazonaws.com"
os_secret_id = "OpenSearchSecret-geocore-semantic-search-with-opensearch-stage"

#awsauth = get_awsauth_from_secret(region, secret_id=os_secret_id)
awsauth = ("admin", "Semantic123!")
aos_client =create_opensearch_connection(aos_host, awsauth)


Connection to OpenSearch established: <OpenSearch([{'host': 'search-semantic-search-arieibeskhrn6vn2qd7gf5br7q.ca-central-1.es.amazonaws.com', 'port': 443}])>


In [48]:
#Create an index 
index_name = "mpnet-mpf-knn"
knn_index = {
    "settings": {
        "index.knn": True, #This enables the k-nearest neighbor (KNN) search capability on the index.
        "index.knn.space_type": "cosinesimil", #cosine similarity 
        "analysis": {
          "analyzer": {
            "default": {
              "type": "standard",
              "stopwords": "_english_"
            }
          }
        }
    },
    "mappings": {
        "properties": {
            "vector": {
                "type": "knn_vector",
                "dimension": 768,
                "store": True
            },
            "coordinates":{
              "type": "geo_shape", 
              "store": True 
            }  
        }
    }
}

In [49]:
#Delete index if it exists 
delete_aos_index_if_exists(aos_client, index_to_delete=index_name)

Current indexes: ['minilm-knn', 'minilm-pretrain-knn', '.opensearch-observability', '.plugins-ml-config', '.ql-datasources', '.kibana_1', '.opendistro_security', 'mpnet-mpf-knn']
Deleted index: mpnet-mpf-knn
Response: {'acknowledged': True}
Indexes after deletion attempt: ['minilm-knn', 'minilm-pretrain-knn', '.opensearch-observability', '.plugins-ml-config', '.ql-datasources', '.kibana_1', '.opendistro_security']


In [50]:
#Create a index 
aos_client.indices.create(index=index_name,body=knn_index,ignore=400)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'mpnet-mpf-knn'}

In [None]:
#Load data to OpenSearch Index 
load_data_to_opensearch_index(df_en, aos_client, index_name)

In [52]:
#Check 
res = aos_client.search(index=index_name, body={"query": {"match_all": {}}})
print(f"Records loaded into the index {index_name} is {res['hits']['total']['value']}.")

Records loaded into the index mpnet-mpf-knn is 10000.


### 3. Deploy all-mpnet-base-v2-mpf-huggingface model using sagemaker 

In [53]:
import boto3
import re
import time
import sagemaker
from sagemaker import get_execution_role
from sagemaker.huggingface.model import HuggingFaceModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [57]:
!cd /home/ec2-user/SageMaker/semantic-search-with-amazon-opensearch/model/all-mpnet-base-v2-mpf-huggingface-reupload && tar czvf ../all-mpnet-base-v2-mpf-huggingface.tar.gz *

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


1_Pooling/
1_Pooling/.ipynb_checkpoints/
1_Pooling/config.json
2_Normalize/
code/
code/inference.py
code/requirements.txt
config_2.json
config.json
config_sentence_transformers.json
eval/
model.safetensors
modules.json
README.md
sentence_bert_config.json
special_tokens_map.json
tokenizer_config.json
tokenizer.json
vocab.txt


In [59]:
sagemaker_session = sagemaker.Session()
inputs = sagemaker_session.upload_data(path='/home/ec2-user/SageMaker/semantic-search-with-amazon-opensearch/model/all-mpnet-base-v2-mpf-huggingface.tar.gz', key_prefix='sentence-transformers-model')
print(f"Response from model upload: {inputs}") 

# Create a SageMaker session and get the execution role to be used later 
role = sagemaker.get_execution_role()

# Deploy with model data 
hub = {
    'HF_TASK':'feature-extraction'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=inputs,  # path to your trained SageMaker model
   role=role,                                            # IAM role with permissions to create an endpoint
   transformers_version="4.26",                           # Transformers version used
   pytorch_version="1.13",                                # PyTorch version used
   py_version='py39',                                    # Python version used
   env=hub
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.t2.medium",
   endpoint_name = f'all-mpnet-base-v2-mpf-huggingface-test'
)


Response from model upload: s3://sagemaker-ca-central-1-759472643633/sentence-transformers-model/all-mpnet-base-v2-mpf-huggingface.tar.gz
---------!

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (0) from primary with message "Your invocation timed out while waiting for a response from container primary. Review the latency metrics for each container in Amazon CloudWatch, resolve the issue, and try again.". See https://ca-central-1.console.aws.amazon.com/cloudwatch/home?region=ca-central-1#logEventViewer:group=/aws/sagemaker/Endpoints/all-mpnet-base-v2-mpf-huggingface-test in account 759472643633 for more information.

In [None]:
# example request: you always need to define "inputs"
data = {"inputs":" Today is a sunny and nice day in Ottawa"} 
# request
vector = predictor.predict(data)
len(vector)

### 4. Test the model endpoints and perform search in OpenSearch index 

In [61]:
from sagemaker_fn import invoke_sagemaker_endpoint_ft

In [None]:
endpoint_name ='all-mpnet-base-v2-mpf-huggingface-test'
payload = {"inputs": "floods event in Canada"}
vector = invoke_sagemaker_endpoint_ft(endpoint_name, payload)
print(len(vector))

In [42]:
region = "ca-central-1"
aos_host = "search-semantic-search-dfcizxxxuj62dusl5skmeu3czu.ca-central-1.es.amazonaws.com"
os_secret_id = "dev/OpenSearch/SemanticSearch"

awsauth = get_awsauth_from_secret(region, secret_id=os_secret_id)
aos_client =create_opensearch_connection(aos_host, awsauth)

query={
    "size": 20,
    "query": {
        "knn": {
            "vector":{
                "vector":vector,
                "k":20
            }
        }
    }
}

res = aos_client.search(index='mpnet-mpf-knn', size=20, body=query, request_timeout=55)
query_result=[]
for hit in res['hits']['hits']:
    row=[hit['_id'],hit['_score'],hit['_source']['title'],hit['_source']['id']]
    query_result.append(row)
query_result_df = pd.DataFrame(data=query_result,columns=["_id","relevancy_score","title",'uuid'])
display(query_result_df)

Connection to OpenSearch established: <OpenSearch([{'host': 'search-semantic-search-dfcizxxxuj62dusl5skmeu3czu.ca-central-1.es.amazonaws.com', 'port': 443}])>


Unnamed: 0,_id,relevancy_score,title,uuid
0,15eQT5ABdooaaHUe8g0S,0.727605,Floods in Canada - Cartographic Product Collec...,08b810c2-7c81-40f1-adb1-c32c8a2c9f50
1,65eST5ABdooaaHUeTiL6,0.664807,High tides December 2010: breaking waves,39bdcc75-dbaf-424d-9dbd-265c282f14f5
2,GZeST5ABdooaaHUeZCRK,0.653705,CGDIWH-142543,CGDIWH-142543
3,_ZeRT5ABdooaaHUeixbR,0.643081,Flood Risk Areas Database (BDZI),3ac8ddff-fe0a-4a7a-8393-d5938e8f35e5
4,bJeRT5ABdooaaHUeghbl,0.63736,Flooding zones,12c51ab4-e22a-4abd-bf90-eaeb274a98c9
5,C5eRT5ABdooaaHUe-B5j,0.624253,Flood Risk Areas and Historical Floods,35782937-d7ac-b721-7fb3-bf51f18903ba
6,QJeRT5ABdooaaHUezBvt,0.623981,Forest Abiotic Damage Event,c32dfe71-bb89-4301-a8a3-4f97d1629c00
7,35eTT5ABdooaaHUeeTT-,0.621082,2023 - Dynamic Surface Water Maps of Canada fr...,ccmeo-dynamic-surface-water-compilation-dsw-19...
8,WpeST5ABdooaaHUe5ywG,0.620454,Government of Qc - 2019 Flood,CGDIWH-117987
9,gZeST5ABdooaaHUeaiRp,0.620081,Flood_Inondation_EGS_Flood_Product_Active_en,CGDIWH-150532
