From 73fd1c8965a9399e6f66a8b9a9c0e9067fac1a73 Mon Sep 17 00:00:00 2001 From: Wille Marcel Date: Wed, 24 Sep 2025 14:06:05 -0300 Subject: [PATCH] Add Tabular and Vector data filters based on data/metadata JSON fields --- requirements.txt | 2 +- vbos/datasets/filters.py | 68 +++++++++++++++++++++++- vbos/datasets/test/test_tabular_views.py | 46 ++++++++++++++-- vbos/datasets/test/test_vector_views.py | 27 ++++++++-- vbos/datasets/views.py | 10 +++- 5 files changed, 142 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1c5bae5..cad6e2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ django_unique_upload==0.2.1 # Rest apis djangorestframework==3.16.1 djangorestframework-gis==1.2.0 -django-filter==24.3 +django-filter==25.1 drf_spectacular==0.28.0 django-cors-headers==4.7.0 drf-excel==2.5.3 diff --git a/vbos/datasets/filters.py b/vbos/datasets/filters.py index a35997a..9dc83d1 100644 --- a/vbos/datasets/filters.py +++ b/vbos/datasets/filters.py @@ -1,13 +1,19 @@ from django_filters import ( FilterSet, - BooleanFilter, CharFilter, OrderingFilter, DateFromToRangeFilter, ModelChoiceFilter, ) -from .models import RasterDataset, VectorDataset, TabularDataset, Cluster +from .models import ( + RasterDataset, + TabularItem, + VectorDataset, + TabularDataset, + Cluster, + VectorItem, +) class DatasetFilter(FilterSet): @@ -42,3 +48,61 @@ class TabularDatasetFilter(DatasetFilter): class Meta: model = TabularDataset fields = ["name", "source", "cluster", "created", "updated"] + + +class TabularItemFilter(FilterSet): + filter = CharFilter( + field_name="data", + method="filter_metadata", + help_text="""Filter by the content of the data JSONField.""", + ) + + def split_values(self, value): + return [ + [i.strip() for i in t.split("=")] # remove leading and ending spaces + for t in value.split(",") + if len(t.split("=")) == 2 + ] + + def filter_metadata(self, queryset, name, value): + queries = self.split_values(value) + + if not queries: + return queryset + + for key, val in queries: + # For exact matching (current behavior) + try: + # Try numeric types + if "." in val: + filter_value = float(val) + else: + filter_value = int(val) + except ValueError: + # Handle booleans + if val.lower() in ["true", "false"]: + filter_value = val.lower() == "true" + else: + filter_value = val + + # Use exact lookup + lookup = f"{name}__{key}" + queryset = queryset.filter(**{lookup: filter_value}) + + return queryset + + class Meta: + model = TabularItem + fields = ["filter", "id"] + + +class VectorItemFilter(TabularItemFilter): + filter = CharFilter( + field_name="metadata", + method="filter_metadata", + help_text="""Filter by the content of the metadata JSONField.""", + ) + + class Meta: + model = VectorItem + fields = ["filter", "id"] diff --git a/vbos/datasets/test/test_tabular_views.py b/vbos/datasets/test/test_tabular_views.py index 8c122c6..813672e 100644 --- a/vbos/datasets/test/test_tabular_views.py +++ b/vbos/datasets/test/test_tabular_views.py @@ -80,19 +80,39 @@ def setUp(self): ) TabularItem.objects.create( dataset=self.dataset_2, - data={"employed_population": 0.75, "year": 2025, "month": 1}, + data={ + "employed_population": 0.75, + "year": 2025, + "month": 1, + "region": "North", + }, ) TabularItem.objects.create( dataset=self.dataset_2, - data={"employed_population": 0.85, "year": 2024, "month": 7}, + data={ + "employed_population": 0.85, + "year": 2024, + "month": 7, + "region": "North", + }, ) TabularItem.objects.create( dataset=self.dataset_2, - data={"employed_population": 0.82, "year": 2024, "month": 1}, + data={ + "employed_population": 0.82, + "year": 2024, + "month": 1, + "region": "South", + }, ) TabularItem.objects.create( dataset=self.dataset_2, - data={"employed_population": 0.80, "year": 2023, "month": 7}, + data={ + "employed_population": 0.80, + "year": 2023, + "month": 7, + "region": "East", + }, ) self.url = reverse("datasets:tabular-data", args=[self.dataset_1.id]) @@ -114,3 +134,21 @@ def test_tabular_datasets_data(self): assert req.data.get("results")[0]["employed_population"] == 0.75 assert req.data.get("results")[0]["month"] == 1 assert req.data.get("results")[0]["year"] == 2025 + + def test_filter_data(self): + url = reverse("datasets:tabular-data", args=[self.dataset_2.id]) + req = self.client.get(url, {"filter": "year=2024"}) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 2 + + req = self.client.get(url, {"filter": "year=2024,month=1"}) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 1 + + req = self.client.get(url, {"filter": "year__gte=2024,region__icontains=south"}) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 1 + + req = self.client.get(url, {"filter": "region__icontains=north"}) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 2 diff --git a/vbos/datasets/test/test_vector_views.py b/vbos/datasets/test/test_vector_views.py index d6dfb54..fd0db41 100644 --- a/vbos/datasets/test/test_vector_views.py +++ b/vbos/datasets/test/test_vector_views.py @@ -60,17 +60,17 @@ def setUp(self): VectorItem.objects.create( dataset=self.dataset_1, geometry=Point(80.5, 10.232), - metadata={"type": "administrative", "name": "Point 1"}, + metadata={"type": "administrative", "name": "Point 1", "area": 5000}, ) VectorItem.objects.create( dataset=self.dataset_1, geometry=LineString([(0, 0), (0, 3), (3, 3), (3, 0), (6, 6), (0, 0)]), - metadata={"type": "administrative", "name": "Line 123"}, + metadata={"type": "administrative", "name": "Line 123", "area": 5321}, ) VectorItem.objects.create( dataset=self.dataset_2, geometry=Polygon([(0, 0), (0, 3), (3, 3), (3, 0), (0, 0)]), - metadata={"type": "administrative", "name": "Area 1"}, + metadata={"type": "administrative", "name": "Area 1", "area": 3432}, ) self.url = reverse("datasets:vector-data", args=[self.dataset_1.id]) @@ -109,3 +109,24 @@ def test_filters(self): "coordinates": [80.5, 10.232], } assert req.data.get("features")[0]["properties"]["name"] == "Point 1" + + # filter by metadata + req = self.client.get(self.url, {"filter": "name__icontains=Point"}) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 1 + + req = self.client.get(self.url, {"filter": "area__lt=5000"}) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 0 + + req = self.client.get( + self.url, {"filter": "area__gte=5000, type=administrative"} + ) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 2 + + req = self.client.get( + self.url, {"filter": "area__gte=5000", "in_bbox": "80,10,81,11"} + ) + assert req.status_code == status.HTTP_200_OK + assert req.data.get("count") == 1 diff --git a/vbos/datasets/views.py b/vbos/datasets/views.py index 85a0c8c..645144d 100644 --- a/vbos/datasets/views.py +++ b/vbos/datasets/views.py @@ -1,4 +1,5 @@ from django.shortcuts import render +import django_filters.rest_framework from rest_framework.generics import ListAPIView, RetrieveAPIView from rest_framework.permissions import IsAuthenticatedOrReadOnly from rest_framework_gis.pagination import GeoJsonPagination @@ -7,7 +8,9 @@ from vbos.datasets.filters import ( RasterDatasetFilter, TabularDatasetFilter, + TabularItemFilter, VectorDatasetFilter, + VectorItemFilter, ) from .models import ( @@ -70,7 +73,11 @@ class VectorDatasetDataView(ListAPIView): permission_classes = [IsAuthenticatedOrReadOnly] pagination_class = GeoJsonPagination bbox_filter_field = "geometry" - filter_backends = (InBBoxFilter,) + filterset_class = VectorItemFilter + filter_backends = ( + InBBoxFilter, + django_filters.rest_framework.DjangoFilterBackend, + ) def get_queryset(self): return VectorItem.objects.filter(dataset=self.kwargs.get("pk")) @@ -91,6 +98,7 @@ class TabularDatasetDetailView(RetrieveAPIView): class TabularDatasetDataView(ListAPIView): + filterset_class = TabularItemFilter permission_classes = [IsAuthenticatedOrReadOnly] def get_queryset(self):