Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/app/keycloak_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
from datetime import datetime

from typing import Optional
from fastapi import Security, HTTPException, Depends
from fastapi.security import OAuth2AuthorizationCodeBearer, APIKeyHeader
from itsdangerous.exc import BadSignature
Expand Down
8 changes: 1 addition & 7 deletions backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,7 @@ async def startup_elasticsearch():
# create elasticsearch indices
es = await connect_elasticsearch()
create_index(
es, "file", settings.elasticsearch_setting, indexSettings.file_mappings
)
create_index(
es, "dataset", settings.elasticsearch_setting, indexSettings.dataset_mappings
)
create_index(
es, "metadata", settings.elasticsearch_setting, indexSettings.metadata_mappings
es, "clowder", settings.elasticsearch_setting, indexSettings.es_mappings
)


Expand Down
51 changes: 10 additions & 41 deletions backend/app/models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,50 +30,19 @@ class ElasticsearchEntry(BaseModel):
"""These Entries are used to generate the JSON for files/datasets/etc. that go into Elasticsearch index.
user_ids is a list of email addresses that have permission to view the document."""

resource_type: str
creator: str
created: datetime
modified: Optional[datetime] = None
user_ids: List[str] = []


class ESFileEntry(ElasticsearchEntry):
"""See file_mappings in search/config.py to change how ES indexes the fields."""

name: str
content_type: str
content_type_main: str
dataset_id: str
folder_id: str
bytes: int
description: Optional[str]
downloads: int


class ESDatasetEntry(ElasticsearchEntry):
"""See dataset_mappings in search/config.py to change how ES indexes the fields."""

name: str
description: str
downloads: int


class ESMetadataEntry(ElasticsearchEntry):
"""See metadata_mappings in search/config.py to change how ES indexes the fields."""

resource_id: str
resource_type: str = "file"
resource_created: datetime
resource_creator: str
content: dict
context_url: Optional[str] = None
context: Optional[List[dict]] = []
definition: Optional[str] = None
# File fields (for UI display)
name: Optional[str] = None
content_type: Optional[str] = None
content_type_main: Optional[str] = None
dataset_id: Optional[str] = None
folder_id: Optional[str] = None
bytes: Optional[int] = None
downloads: Optional[int] = None
# Dataset fields (for UI display)
description: Optional[str] = None
# file-specific fields
content_type: Optional[str]
content_type_main: Optional[str]
dataset_id: Optional[str]
folder_id: Optional[str]
bytes: Optional[int]
# metadata fields
metadata: Optional[List[dict]] = []
32 changes: 3 additions & 29 deletions backend/app/routers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,15 @@
)
from app.models.files import FileOut, FileDB, FileDBViewList
from app.models.folders import FolderOut, FolderIn, FolderDB, FolderDBViewList
from app.models.metadata import MetadataDB, MetadataOut
from app.models.metadata import MetadataDB
from app.models.pyobjectid import PyObjectId
from app.models.users import UserOut
from app.rabbitmq.listeners import submit_dataset_job
from app.routers.files import add_file_entry, remove_file_entry
from app.search.connect import (
delete_document_by_id,
delete_document_by_query,
)
from app.search.index import index_dataset, index_dataset_metadata
from app.search.index import index_dataset

router = APIRouter()
security = HTTPBearer()
Expand Down Expand Up @@ -290,18 +289,6 @@ async def edit_dataset(

# Update entry to the dataset index
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
# updating metadata in elasticsearch
if (
metadata := await MetadataDB.find_one(
MetadataDB.resource.resource_id == ObjectId(dataset_id)
)
) is not None:
await index_dataset_metadata(
es,
DatasetOut(**dataset.dict()),
MetadataOut(**metadata.dict()),
update=True,
)
return dataset.dict()
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")

Expand All @@ -325,18 +312,6 @@ async def patch_dataset(

# Update entry to the dataset index
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
# updating metadata in elasticsearch
if (
metadata := await MetadataDB.find_one(
MetadataDB.resource.resource_id == ObjectId(dataset_id)
)
) is not None:
await index_dataset_metadata(
es,
DatasetOut(**dataset.dict()),
MetadataOut(**metadata.dict()),
update=True,
)
return dataset.dict()


Expand All @@ -349,8 +324,7 @@ async def delete_dataset(
):
if (dataset := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
# delete from elasticsearch
delete_document_by_id(es, "dataset", dataset_id)
delete_document_by_query(es, "metadata", {"match": {"resource_id": dataset_id}})
delete_document_by_id(es, "clowder", dataset_id)
# delete dataset first to minimize files/folder being uploaded to a delete dataset
await dataset.delete()
await MetadataDB.find(
Expand Down
64 changes: 39 additions & 25 deletions backend/app/routers/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,54 @@
import json

from fastapi import Depends
from fastapi.routing import APIRouter, Request

from app.keycloak_auth import get_current_username
from app.search.connect import connect_elasticsearch, search_index

router = APIRouter()


@router.put("/search", response_model=str)
async def search(index_name: str, query: str):
es = await connect_elasticsearch()
return search_index(es, index_name, query)


@router.post("/file/_msearch")
async def search_file(request: Request):
es = await connect_elasticsearch()
query = await request.body()
return search_index(es, "file", query)
def _add_permissions_clause(query, username: str):
"""Append filter to Elasticsearch object that restricts permissions based on the requesting user."""
# TODO: Add public filter once added
user_clause = {
"bool": {
"should": [
{"term": {"creator": username}},
{"term": {"user_ids": username}},
]
}
}

updated_query = ""
for content in query.decode().split("\n"):
# Query can have multiple clauses separated by \n for things like aggregates, reactivesearch GUI queries
if len(content) == 0:
continue # last line
json_content = json.loads(content)
if "query" in json_content:
json_content["query"] = {
"bool": {"must": [user_clause, json_content["query"]]}
}
updated_query += json.dumps(json_content) + "\n"
return updated_query.encode()


@router.post("/dataset/_msearch")
async def search_dataset(request: Request):
es = await connect_elasticsearch()
query = await request.body()
return search_index(es, "dataset", query)


@router.post("/metadata/_msearch")
async def search_metadata(request: Request):
@router.put("/search", response_model=str)
async def search(index_name: str, query: str, username=Depends(get_current_username)):
es = await connect_elasticsearch()
query = await request.body()
return search_index(es, "metadata", query)
query = _add_permissions_clause(query, username)
return search_index(es, index_name, query)


@router.post("/file,dataset,metadata/_msearch")
async def search_file_dataset_and_metadata(request: Request):
@router.post("/all/_msearch")
async def msearch(
request: Request,
username=Depends(get_current_username),
):
es = await connect_elasticsearch()
query = await request.body()
r = search_index(es, ["file", "dataset", "metadata"], query)
query = _add_permissions_clause(query, username)
r = search_index(es, ["clowder"], query)
return r
13 changes: 4 additions & 9 deletions backend/app/routers/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
delete_document_by_id,
insert_record,
update_record,
delete_document_by_query,
)
from app.search.index import index_file

Expand Down Expand Up @@ -104,11 +103,6 @@ async def add_file_entry(
file: bytes to upload
"""

# Check all connection and abort if any one of them is not available
if fs is None or es is None:
raise HTTPException(status_code=503, detail="Service not available")
return

await new_file.insert()
new_file_id = new_file.id
content_type_obj = get_content_type(content_type, file)
Expand All @@ -120,6 +114,7 @@ async def add_file_entry(
file,
length=-1,
part_size=settings.MINIO_UPLOAD_CHUNK_SIZE,
content_type=new_file.content_type.content_type,
) # async write chunk to minio
version_id = response.version_id
bytes = len(fs.get_object(settings.MINIO_BUCKET_NAME, str(new_file_id)).data)
Expand Down Expand Up @@ -163,8 +158,7 @@ async def remove_file_entry(
return
fs.remove_object(settings.MINIO_BUCKET_NAME, str(file_id))
# delete from elasticsearch
delete_document_by_id(es, "file", str(file_id))
delete_document_by_query(es, "metadata", {"match": {"resource_id": str(file_id)}})
delete_document_by_id(es, "clowder", str(file_id))
if (file := await FileDB.get(PydanticObjectId(file_id))) is not None:
await file.delete()
await MetadataDB.find(MetadataDB.resource.resource_id == ObjectId(file_id)).delete()
Expand Down Expand Up @@ -205,6 +199,7 @@ async def update_file(
file.file,
length=-1,
part_size=settings.MINIO_UPLOAD_CHUNK_SIZE,
content_type=updated_file.content_type.content_type,
) # async write chunk to minio
version_id = response.version_id

Expand Down Expand Up @@ -407,7 +402,7 @@ async def resubmit_file_extractions(


@router.patch("/{file_id}/thumbnail/{thumbnail_id}", response_model=FileOut)
async def add_dataset_thumbnail(
async def add_file_thumbnail(
file_id: str,
thumbnail_id: str,
allow: bool = Depends(FileAuthorization("editor")),
Expand Down
14 changes: 5 additions & 9 deletions backend/app/routers/metadata_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
MetadataDefinitionDB,
)
from app.search.connect import delete_document_by_id
from app.search.index import index_dataset_metadata
from app.search.index import index_dataset

router = APIRouter()

Expand Down Expand Up @@ -118,7 +118,7 @@ async def add_dataset_metadata(
await md.insert()

# Add an entry to the metadata index
await index_dataset_metadata(es, dataset, MetadataOut(**md.dict()))
await index_dataset(es, dataset)
return md.dict()


Expand Down Expand Up @@ -170,9 +170,7 @@ async def replace_dataset_metadata(
await md.replace()

# Update entry to the metadata index
await index_dataset_metadata(
es, dataset, MetadataOut(**md.dict()), update=True
)
await index_dataset(es, dataset, update=True)
return md.dict()
else:
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")
Expand Down Expand Up @@ -238,9 +236,7 @@ async def update_dataset_metadata(

md = await MetadataDB.find_one(*query)
if md is not None:
await index_dataset_metadata(
es, dataset, MetadataOut(**md.dict()), update=True
)
await index_dataset(es, dataset, update=True)
return await patch_metadata(md, content, es)
else:
raise HTTPException(
Expand Down Expand Up @@ -327,7 +323,7 @@ async def delete_dataset_metadata(
query.append(MetadataDB.agent.creator.id == agent.creator.id)

# delete from elasticsearch
delete_document_by_id(es, "metadata", str(metadata_in.metadata_id))
delete_document_by_id(es, "clowder", str(metadata_in.metadata_id))

md = await MetadataDB.find_one(*query)
if md is not None:
Expand Down
14 changes: 5 additions & 9 deletions backend/app/routers/metadata_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
MetadataDefinitionDB,
)
from app.search.connect import delete_document_by_id
from app.search.index import index_file_metadata
from app.search.index import index_file

router = APIRouter()

Expand Down Expand Up @@ -150,7 +150,7 @@ async def add_file_metadata(
await md.insert()

# Add an entry to the metadata index
await index_file_metadata(es, FileOut(**file.dict()), MetadataOut(**md.dict()))
await index_file(es, FileOut(**file.dict()))
return md.dict()


Expand Down Expand Up @@ -220,9 +220,7 @@ async def replace_file_metadata(
await md.save()

# Update entry to the metadata index
await index_file_metadata(
es, FileOut(**file.dict()), MetadataOut(**md.dict()), update=True
)
await index_file(es, FileOut(**file.dict()), update=True)
return md.dict()
else:
raise HTTPException(status_code=404, detail=f"No metadata found to update")
Expand Down Expand Up @@ -320,9 +318,7 @@ async def update_file_metadata(

md = await MetadataDB.find_one(query)
if md:
await index_file_metadata(
es, FileOut(**file.dict()), MetadataOut(**md.dict()), update=True
)
await index_file(es, FileOut(**file.dict()), update=True)
return await patch_metadata(md, content, es)
else:
raise HTTPException(status_code=404, detail=f"No metadata found to update")
Expand Down Expand Up @@ -455,7 +451,7 @@ async def delete_file_metadata(
query.append(MetadataDB.agent.creator.id == agent.creator.id)

# delete from elasticsearch
delete_document_by_id(es, "metadata", str(metadata_in.metadata_id))
delete_document_by_id(es, "clowder", str(metadata_in.metadata_id))

if (md := await MetadataDB.find_one(*query)) is not None:
await md.delete()
Expand Down
1 change: 1 addition & 0 deletions backend/app/routers/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ async def add_Visualization(
file.file,
length=-1,
part_size=settings.MINIO_UPLOAD_CHUNK_SIZE,
content_type=visualization_db.content_type.content_type,
) # async write chunk to minio
visualization_db.bytes = len(
fs.get_object(settings.MINIO_BUCKET_NAME, str(visualization_id)).data
Expand Down
Loading