Skip to content

Commit

Permalink
Merge pull request #788 from cortex-lab/v1.12
Browse files Browse the repository at this point in the history
* Batched process of datasets frame
* subject, revision and collection validation
* Validate nickname field only
  • Loading branch information
k1o0 committed May 11, 2023
2 parents d89c6b8 + b96cb31 commit e8ebe8e
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 124 deletions.
2 changes: 1 addition & 1 deletion alyx/alyx/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = __version__ = '1.11.0'
VERSION = __version__ = '1.12.0'
2 changes: 2 additions & 0 deletions alyx/alyx/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import uuid
from collections import OrderedDict
from rest_framework import serializers
import one.alf.spec
from datetime import datetime

from django import forms
Expand All @@ -34,6 +35,7 @@

DATA_DIR = op.abspath(op.join(op.dirname(__file__), '../../data'))
DISABLE_MAIL = False # used for testing
ALF_SPEC = dict(one.alf.spec._DEFAULT) # Regex for ALF part validation


class CharNullField(models.CharField):
Expand Down
21 changes: 17 additions & 4 deletions alyx/data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from alyx.settings import TIME_ZONE, AUTH_USER_MODEL
from actions.models import Session
from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet
from alyx.base import BaseModel, modify_fields, BaseManager, CharNullField, BaseQuerySet, ALF_SPEC

logger = structlog.get_logger(__name__)

Expand Down Expand Up @@ -246,7 +246,11 @@ class Revision(BaseModel):
Dataset revision information
"""
objects = NameManager()
name = models.CharField(max_length=255, blank=True, help_text="Long name", unique=True)
name_validator = RegexValidator(f"^{ALF_SPEC['revision']}$",
"Revisions must only contain letters, "
"numbers, hyphens, underscores and forward slashes.")
name = models.CharField(max_length=255, blank=True, help_text="Long name",
unique=True, validators=[name_validator])
description = models.CharField(max_length=1023, blank=True)
created_datetime = models.DateTimeField(blank=True, null=True, default=timezone.now,
help_text="created date")
Expand All @@ -257,6 +261,10 @@ class Meta:
def __str__(self):
return "<Revision %s>" % self.name

def save(self, *args, **kwargs):
self.clean_fields()
return super(Revision, self).save(*args, **kwargs)


class DatasetQuerySet(BaseQuerySet):
"""A Queryset that checks for protected datasets before deletion"""
Expand Down Expand Up @@ -316,9 +324,13 @@ class Dataset(BaseExperimentalData):
version = models.CharField(blank=True, null=True, max_length=64,
help_text="version of the algorithm generating the file")

# while the collection is seen more as a data revision
# the collection comprises session sub-folders
collection_validator = RegexValidator(f"^{ALF_SPEC['collection']}$",
"Collections must only contain letters, "
"numbers, hyphens, underscores and forward slashes.")
collection = models.CharField(blank=True, null=True, max_length=255,
help_text='file subcollection or subfolder')
help_text='file subcollection or subfolder',
validators=[collection_validator])

dataset_type = models.ForeignKey(
DatasetType, blank=False, null=False, on_delete=models.SET_DEFAULT,
Expand Down Expand Up @@ -372,6 +384,7 @@ def save(self, *args, **kwargs):
super(Dataset, self).save(*args, **kwargs)
if self.collection is None:
return
self.clean_fields() # Validate collection field
from experiments.models import ProbeInsertion
parts = self.collection.rsplit('/')
if len(parts) > 1:
Expand Down
14 changes: 13 additions & 1 deletion alyx/data/tests.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from django.test import TestCase
from django.core.exceptions import ValidationError
from django.db import transaction
from django.db.utils import IntegrityError
from django.db.models import ProtectedError

from data.models import Dataset, DatasetType, Tag
from data.models import Dataset, DatasetType, Tag, Revision
from subjects.models import Subject
from misc.models import Lab
from data.transfers import get_dataset_type
Expand All @@ -25,6 +26,11 @@ def test_generic_foreign_key(self):

self.assertIs(dset.content_object, subj)

def test_validation(self):
# Expect raises when using special characters
self.assertRaises(ValidationError, Dataset.objects.create,
name='toto.npy', collection='~alf/.*')

def test_delete(self):
(dset, _) = Dataset.objects.get_or_create(name='foo.npy')
(tag, _) = Tag.objects.get_or_create(name='protected_tag', protected=True)
Expand Down Expand Up @@ -69,3 +75,9 @@ def test_model_methods(self):
for filename, dataname in filename_typename:
with self.subTest(filename=filename):
self.assertEqual(get_dataset_type(filename, dtypes).name, dataname)


class TestRevisionModel(TestCase):
def test_validation(self):
# Expect raises when using special characters
self.assertRaises(ValidationError, Revision.objects.create, name='#2022-01-01.#')
8 changes: 4 additions & 4 deletions alyx/data/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,12 +371,12 @@ def _assert_registration(self, r, data):
self.assertEqual(d1['data_format'], 'e2')

self.assertEqual(d0['file_records'][0]['data_repository'], 'dr')
self.assertEqual(d0['file_records'][0]['relative_path'],
PurePosixPath(data['path'], 'a.b.e1').as_posix())
self.assertEqual(PurePosixPath(data['path'], 'a.b.e1').as_posix(),
d0['file_records'][0]['relative_path'])

self.assertEqual(d1['file_records'][0]['data_repository'], 'dr')
self.assertEqual(d1['file_records'][0]['relative_path'],
PurePosixPath(data['path'], 'a.c.e2').as_posix())
self.assertEqual(PurePosixPath(data['path'], 'a.c.e2').as_posix(),
d1['file_records'][0]['relative_path'])

def test_register_existence_options(self):

Expand Down
58 changes: 29 additions & 29 deletions alyx/data/transfers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from django.db.models import Case, When, Count, Q, F
import globus_sdk
import numpy as np
from one.alf.files import add_uuid_string
from one.alf.files import add_uuid_string, folder_parts
from one.registration import get_dataset_type

from alyx import settings
Expand Down Expand Up @@ -187,36 +187,36 @@ def _get_repositories_for_labs(labs, server_only=False):
return list(repositories)


def _get_name_collection_revision(file, rel_dir_path, subject, date):
def _get_name_collection_revision(file, rel_dir_path):
"""
Extract collection, revision and session parts from the full file path.
:param file: The filename
:param rel_dir_path: The relative path (subject/date/number/collection/revision)
:return: dict of path parts
:return: a REST Response object if ALF path is invalid, otherwise None
"""
# Get collections/revisions for each file
fullpath = Path(rel_dir_path).joinpath(file).as_posix()
# Index of relative path (stuff after session path)
i = re.search(f'{subject}/{date}/' + r'\d{1,3}', fullpath).end()
subdirs = list(Path(fullpath[i:].strip('/')).parent.parts)
# Check for revisions (folders beginning and ending with '#')
# Fringe cases:
# '#' is a collection
# '##' is an empty revision
# '##blah#5#' is a revision named '#blah#5'
is_rev = [len(x) >= 2 and x[0] + x[-1] == '##' for x in subdirs]
if any(is_rev):
# There may be only 1 revision and it cannot contain sub folders
if is_rev.index(True) != len(is_rev) - 1:
data = {'status_code': 400,
'detail': 'Revision folders cannot contain sub folders'}
return None, Response(data=data, status=400)
revision = subdirs.pop()[1:-1]
else:
revision = None

info = dict()
info['full_path'] = fullpath
info['filename'] = Path(file).name
info['collection'] = '/'.join(subdirs)
info['revision'] = revision
info['rel_dir_path'] = fullpath[:i]

fullpath = Path(rel_dir_path).joinpath(file)
try:
info = folder_parts(fullpath.parent, as_dict=True)
if info['revision'] is not None:
path_parts = fullpath.parent.parts
assert path_parts.index(f"#{info['revision']}#") == len(path_parts) - 1
except AssertionError:
data = {'status_code': 400,
'detail': 'Invalid ALF path. There must be only 1 revision and it cannot contain'
'sub folders. A revision folder must be surrounded by pound signs (#).'}
return None, Response(data=data, status=400)
except ValueError:
data = {'status_code': 400,
'detail': 'Invalid ALF path. Only letters, numbers, hyphen and underscores '
'allowed. A revision folder must be surrounded by pound signs (#).'}
return None, Response(data=data, status=400)

info['full_path'] = fullpath.as_posix()
info['filename'] = fullpath.name
info['rel_dir_path'] = '{subject}/{date}/{number}'.format(**info)
return info, None


Expand Down
20 changes: 8 additions & 12 deletions alyx/data/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from rest_framework import generics, viewsets, mixins, serializers
from rest_framework.response import Response
import django_filters
from one.alf.spec import regex

from alyx.base import BaseFilterSet, rest_permission_classes
from subjects.models import Subject, Project
Expand Down Expand Up @@ -312,18 +313,13 @@ def _make_dataset_response(dataset):


def _parse_path(path):
pattern = (r'^(?P<nickname>[a-zA-Z0-9\-\_]+)/'
# '(?P<year>[0-9]{4})\-(?P<month>[0-9]{2})\-(?P<day>[0-9]{2})/'
r'(?P<date>[0-9\-]{10})/'
r'(?P<session_number>[0-9]+)'
r'(.*)$')
pattern = regex(spec='{subject}/{date}/{number}').pattern + '.*'
m = re.match(pattern, path)
if not m:
raise ValueError(r"The path %s should be `nickname/YYYY-MM-DD/n/..." % path)
# date_triplet = (m.group('year'), m.group('month'), m.group('day'))
date = m.group('date')
nickname = m.group('nickname')
session_number = int(m.group('session_number'))
nickname = m.group('subject')
session_number = int(m.group('number'))
# An error is raised if the subject or data repository do not exist.
subject = Subject.objects.get(nickname=nickname)
return subject, date, session_number
Expand Down Expand Up @@ -468,7 +464,7 @@ def create(self, request):
protected = []
for file in filenames:

info, resp = _get_name_collection_revision(file, rel_dir_path, subject, date)
info, resp = _get_name_collection_revision(file, rel_dir_path)

if resp:
return resp
Expand All @@ -488,7 +484,7 @@ def create(self, request):
for filename, hash, fsize, version in zip(filenames, hashes, filesizes, versions):
if not filename:
continue
info, resp = _get_name_collection_revision(filename, rel_dir_path, subject, date)
info, resp = _get_name_collection_revision(filename, rel_dir_path)

if resp:
return resp
Expand Down Expand Up @@ -575,11 +571,11 @@ class DownloadDetail(generics.RetrieveUpdateAPIView):


class DownloadFilter(BaseFilterSet):
json = django_filters.CharFilter(field_name='json', lookup_expr=('icontains'))
json = django_filters.CharFilter(field_name='json', lookup_expr='icontains')
dataset = django_filters.CharFilter('dataset__name')
user = django_filters.CharFilter('user__username')
dataset_type = django_filters.CharFilter(field_name='dataset__dataset_type__name',
lookup_expr=('icontains'))
lookup_expr='icontains')

class Meta:
model = Download
Expand Down
Loading

0 comments on commit e8ebe8e

Please sign in to comment.