From 682db04441878146f836ba5ffa3dd29622af5bb3 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Wed, 28 Feb 2024 22:16:33 +0530 Subject: [PATCH 1/4] Add file upload validation --- backend/file_management/serializer.py | 16 +++++- backend/utils/FileValidator.py | 78 +++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 backend/utils/FileValidator.py diff --git a/backend/file_management/serializer.py b/backend/file_management/serializer.py index 5efa0a665d..2c8982f446 100644 --- a/backend/file_management/serializer.py +++ b/backend/file_management/serializer.py @@ -1,5 +1,7 @@ from rest_framework import serializers +from utils.FileValidator import FileValidator + class FileInfoSerializer(serializers.Serializer): name = serializers.CharField() @@ -15,13 +17,23 @@ class FileListRequestSerializer(serializers.Serializer): class FileUploadSerializer(serializers.Serializer): - file = serializers.ListField(child=serializers.FileField(), required=True) + file = serializers.ListField( + child=serializers.FileField(), required=True, + validators=[FileValidator(allowed_extensions=['pdf'], + allowed_mimetypes=['application/pdf'], + min_size=0, + max_size=(10*1024*1024*1024))]) + # FileExtensionValidator(allowed_extensions=['pdf']) connector_id = serializers.UUIDField() path = serializers.CharField() class FileUploadIdeSerializer(serializers.Serializer): - file = serializers.ListField(child=serializers.FileField(), required=True) + file = serializers.ListField(child=serializers.FileField(), required=True, + validators=[FileValidator(allowed_extensions=['pdf'], + allowed_mimetypes=['application/pdf'], + min_size=0, + max_size=(10*1024*1024*1024))]) class FileInfoIdeSerializer(serializers.Serializer): diff --git a/backend/utils/FileValidator.py b/backend/utils/FileValidator.py new file mode 100644 index 0000000000..e902bb74b7 --- /dev/null +++ b/backend/utils/FileValidator.py @@ -0,0 +1,78 @@ +import magic +from os.path import splitext + +from django.core.exceptions import ValidationError +from django.template.defaultfilters import filesizeformat +from django.utils.translation import gettext_lazy as _ +from django.utils.translation import ngettext_lazy + + +class FileValidator(object): + """ + Validator for files, checking the size, extension and mimetype. + + Initialization parameters: + allowed_extensions: iterable with allowed file extensions + ie. ('txt', 'doc') + allowed_mimetypes: iterable with allowed mimetypes + ie. ('image/png', ) + min_size: minimum number of bytes allowed + ie. 100 + max_size: maximum number of bytes allowed + ie. 24*1024*1024 for 24 MB + + """ + + extension_message = _("Extension '%(extension)s' not allowed. Allowed extensions are: '%(allowed_extensions)s.'") + mime_message = _("MIME type '%(mimetype)s' is not valid. Allowed types are: %(allowed_mimetypes)s.") + min_size_message = _('The current file %(size)s, which is too small. The minumum file size is %(allowed_size)s.') + max_size_message = _('The current file %(size)s, which is too large. The maximum file size is %(allowed_size)s.') + + def __init__(self, *args, **kwargs): + self.allowed_extensions = kwargs.pop('allowed_extensions', None) + self.allowed_mimetypes = kwargs.pop('allowed_mimetypes', None) + self.min_size = kwargs.pop('min_size', 0) + self.max_size = kwargs.pop('max_size', None) + + def __call__(self, value): + """ + Check the extension, content type and file size for each file + """ + for file in value: + # Check the extension + ext = splitext(file.name)[1][1:].lower() + if self.allowed_extensions and not ext in self.allowed_extensions: + message = self.extension_message % { + 'extension' : ext, + 'allowed_extensions': ', '.join(self.allowed_extensions) + } + + raise ValidationError(message) + + # Check the content type + mimetype = magic.from_buffer(file.read(2048), mime=True) + if self.allowed_mimetypes and not mimetype in self.allowed_mimetypes: + message = self.mime_message % { + 'mimetype': mimetype, + 'allowed_mimetypes': ', '.join(self.allowed_mimetypes) + } + + raise ValidationError(message) + + # Check the file size + filesize = len(file) + if self.max_size and filesize > self.max_size: + message = self.max_size_message % { + 'size': filesizeformat(filesize), + 'allowed_size': filesizeformat(self.max_size) + } + + raise ValidationError(message) + + elif filesize < self.min_size: + message = self.min_size_message % { + 'size': filesizeformat(filesize), + 'allowed_size': filesizeformat(self.min_size) + } + + raise ValidationError(message) \ No newline at end of file From 7317bb27979cccae5783bdbbcd258925d1cc7b56 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Wed, 28 Feb 2024 22:18:27 +0530 Subject: [PATCH 2/4] Code formatting --- backend/utils/FileValidator.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/utils/FileValidator.py b/backend/utils/FileValidator.py index e902bb74b7..15331ab0e9 100644 --- a/backend/utils/FileValidator.py +++ b/backend/utils/FileValidator.py @@ -23,10 +23,14 @@ class FileValidator(object): """ - extension_message = _("Extension '%(extension)s' not allowed. Allowed extensions are: '%(allowed_extensions)s.'") - mime_message = _("MIME type '%(mimetype)s' is not valid. Allowed types are: %(allowed_mimetypes)s.") - min_size_message = _('The current file %(size)s, which is too small. The minumum file size is %(allowed_size)s.') - max_size_message = _('The current file %(size)s, which is too large. The maximum file size is %(allowed_size)s.') + extension_message = _("Extension '%(extension)s' not allowed. " + "Allowed extensions are: '%(allowed_extensions)s.'") + mime_message = _("MIME type '%(mimetype)s' is not valid. " + "Allowed types are: %(allowed_mimetypes)s.") + min_size_message = _('The current file %(size)s, which is too small. ' + 'The minumum file size is %(allowed_size)s.') + max_size_message = _('The current file %(size)s, which is too large. ' + 'The maximum file size is %(allowed_size)s.') def __init__(self, *args, **kwargs): self.allowed_extensions = kwargs.pop('allowed_extensions', None) From c78eba173c086fe2e0adcdec6598fe7e9abaece4 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Wed, 28 Feb 2024 22:20:41 +0530 Subject: [PATCH 3/4] Code formatting --- backend/utils/FileValidator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/utils/FileValidator.py b/backend/utils/FileValidator.py index 15331ab0e9..62bbf22a56 100644 --- a/backend/utils/FileValidator.py +++ b/backend/utils/FileValidator.py @@ -55,7 +55,8 @@ def __call__(self, value): # Check the content type mimetype = magic.from_buffer(file.read(2048), mime=True) - if self.allowed_mimetypes and not mimetype in self.allowed_mimetypes: + if (self.allowed_mimetypes and + not mimetype in self.allowed_mimetypes): message = self.mime_message % { 'mimetype': mimetype, 'allowed_mimetypes': ', '.join(self.allowed_mimetypes) From bbbfa11d5702a916c2af10e3b1831603dfe1f0ef Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Thu, 29 Feb 2024 15:04:39 +0530 Subject: [PATCH 4/4] Change values to constants --- backend/file_management/constants.py | 3 +++ backend/file_management/serializer.py | 34 +++++++++++++++++++-------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/backend/file_management/constants.py b/backend/file_management/constants.py index 82c16e045d..e12a31d9b8 100644 --- a/backend/file_management/constants.py +++ b/backend/file_management/constants.py @@ -3,3 +3,6 @@ class FileInformationKey: FILE_TYPE = "type" FILE_LAST_MODIFIED = "LastModified" FILE_SIZE = "size" + FILE_UPLOAD_MAX_SIZE = 100 * 1024 * 1024 + FILE_UPLOAD_ALLOWED_EXTENSIONS = ['pdf'] + FILE_UPLOAD_ALLOWED_MIMETYPES = ['application/pdf'] \ No newline at end of file diff --git a/backend/file_management/serializer.py b/backend/file_management/serializer.py index 2c8982f446..dfd220cfa7 100644 --- a/backend/file_management/serializer.py +++ b/backend/file_management/serializer.py @@ -1,5 +1,6 @@ from rest_framework import serializers +from file_management.constants import FileInformationKey from utils.FileValidator import FileValidator @@ -18,22 +19,35 @@ class FileListRequestSerializer(serializers.Serializer): class FileUploadSerializer(serializers.Serializer): file = serializers.ListField( - child=serializers.FileField(), required=True, - validators=[FileValidator(allowed_extensions=['pdf'], - allowed_mimetypes=['application/pdf'], - min_size=0, - max_size=(10*1024*1024*1024))]) + child=serializers.FileField(), + required=True, + validators=[ + FileValidator( + allowed_extensions=FileInformationKey.FILE_UPLOAD_ALLOWED_EXTENSIONS, + allowed_mimetypes=FileInformationKey.FILE_UPLOAD_ALLOWED_MIMETYPES, + min_size=0, + max_size=FileInformationKey.FILE_UPLOAD_MAX_SIZE, + ) + ], + ) # FileExtensionValidator(allowed_extensions=['pdf']) connector_id = serializers.UUIDField() path = serializers.CharField() class FileUploadIdeSerializer(serializers.Serializer): - file = serializers.ListField(child=serializers.FileField(), required=True, - validators=[FileValidator(allowed_extensions=['pdf'], - allowed_mimetypes=['application/pdf'], - min_size=0, - max_size=(10*1024*1024*1024))]) + file = serializers.ListField( + child=serializers.FileField(), + required=True, + validators=[ + FileValidator( + allowed_extensions=FileInformationKey.FILE_UPLOAD_ALLOWED_EXTENSIONS, + allowed_mimetypes=FileInformationKey.FILE_UPLOAD_ALLOWED_MIMETYPES, + min_size=0, + max_size=FileInformationKey.FILE_UPLOAD_MAX_SIZE, + ) + ], + ) class FileInfoIdeSerializer(serializers.Serializer):