diff --git a/.github/workflows/py_analysis-coverage.yml b/.github/workflows/py_analysis-coverage.yml index 2b33147e..27739eee 100644 --- a/.github/workflows/py_analysis-coverage.yml +++ b/.github/workflows/py_analysis-coverage.yml @@ -2,7 +2,7 @@ name: (Py)Analysis & Coverage on: pull_request: - branches: [master] + branches: [main] types: [opened, edited, reopened, synchronize] paths: - 'nc_py_api/*.*' @@ -10,7 +10,7 @@ on: - 'setup.*' - 'pyproject.toml' push: - branches: [master] + branches: [main] paths: - 'nc_py_api/*.*' - 'tests/nc_py_api/**' diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fca498c..8d07775a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,20 @@ _# Changelog All notable changes to this project will be documented in this file. -## [0.0.7 - 2022-12-11] +## [0.0.7 - 2022-12-12] ### Added -- Python, FS: `list_directory`, `get_file_data` functions. +- FS functions: + * `fs_apply_exclude_lists` + * `fs_apply_ignore_flags` + * `fs_extract_sub_dirs` + * `fs_filter_by` + * `fs_get_file_data` + * `fs_get_obj_info` + * `fs_get_objs_info` + * `fs_list_directory` + * `fs_sort_by_id` ### Changed diff --git a/nc_py_api/__init__.py b/nc_py_api/__init__.py index 398b1410..6676c641 100644 --- a/nc_py_api/__init__.py +++ b/nc_py_api/__init__.py @@ -1,9 +1,20 @@ -from . import signal_handler +from . import mimetype from ._version import __version__ from .config import CONFIG from .db_api import close_connection, execute_commit, execute_fetchall from .db_misc import TABLES, get_time -from .db_requests import get_mimetype_id, get_paths_by_ids -from .files import get_file_data, list_directory +from .db_requests import get_mimetype_id +from .files import ( + FsNodeInfo, + fs_apply_exclude_lists, + fs_apply_ignore_flags, + fs_extract_sub_dirs, + fs_filter_by, + fs_get_file_data, + fs_get_obj_info, + fs_get_objs_info, + fs_list_directory, + fs_sort_by_id, +) from .log import cpa_logger from .occ import get_cloud_app_config_value, occ_call, occ_call_decode diff --git a/nc_py_api/db_requests.py b/nc_py_api/db_requests.py index c6745bd4..a13fad64 100644 --- a/nc_py_api/db_requests.py +++ b/nc_py_api/db_requests.py @@ -4,15 +4,21 @@ from .db_api import execute_fetchall from .db_misc import TABLES +FIELD_NAME_LIST = ( + "fcache.fileid, fcache.storage, fcache.path, fcache.storage, fcache.name, " + "fcache.mimetype, fcache.mimepart, " + "fcache.size, fcache.mtime, fcache.encrypted, fcache.etag, fcache.permissions, fcache.checksum" +) + def get_paths_by_ids(file_ids: list) -> list: """For each element of list in file_ids return [path, fileid, storage]. Order of file_ids is not preserved.""" query = ( - "SELECT path, fileid, storage " - f"FROM {TABLES.file_cache} " + "SELECT fcache.path, fcache.fileid, fcache.storage " + f"FROM {TABLES.file_cache} AS fcache " f"WHERE fileid IN ({','.join(str(x) for x in file_ids)}) " - "ORDER BY fileid ASC;" + "ORDER BY fcache.fileid ASC;" ) return execute_fetchall(query) @@ -61,6 +67,26 @@ def get_mimetype_id(mimetype: str) -> int: return result[0]["id"] +def get_fileid_info(file_id: int) -> dict: + """Returns dictionary with information for given file id.""" + + query = f"SELECT {FIELD_NAME_LIST} FROM {TABLES.file_cache} AS fcache WHERE fcache.fileid = {file_id};" + result = execute_fetchall(query) + if result: + return result[0] + return {} + + +def get_fileids_info(file_ids: list[int]) -> list[dict]: + """Returns dictionaries with information for given file ids.""" + + query = ( + f"SELECT {FIELD_NAME_LIST} FROM {TABLES.file_cache} AS fcache " + f"WHERE fcache.fileid IN ({','.join(str(x) for x in file_ids)});" + ) + return execute_fetchall(query) + + def get_directory_list(dir_id: int, mount_points_ids: list[int]) -> list[dict]: """Lists the provided directory @@ -72,12 +98,7 @@ def get_directory_list(dir_id: int, mount_points_ids: list[int]) -> list[dict]: mp_query = "" if mount_points_ids: mp_query = f" OR fcache.fileid IN ({','.join(str(x) for x in mount_points_ids)})" - query = ( - "SELECT fcache.fileid, fcache.storage, fcache.path, fcache.storage, fcache.name, fcache.mimetype, fcache.size, " - "fcache.mtime, fcache.encrypted, fcache.etag, fcache.permissions, fcache.checksum " - f"FROM {TABLES.file_cache} AS fcache " - f"WHERE (fcache.parent = {dir_id}{mp_query});" - ) + query = f"SELECT {FIELD_NAME_LIST} FROM {TABLES.file_cache} AS fcache WHERE (fcache.parent = {dir_id}{mp_query});" return execute_fetchall(query) diff --git a/nc_py_api/files.py b/nc_py_api/files.py index 9a79fbd4..9f621bab 100644 --- a/nc_py_api/files.py +++ b/nc_py_api/files.py @@ -1,14 +1,17 @@ """ Helper functions related to get files content or storages info. """ +from fnmatch import fnmatch from os import environ, path from pathlib import Path -from typing import TypedDict +from typing import Literal, Optional, TypedDict +from . import mimetype from .config import CONFIG from .db_requests import ( get_directory_list, - get_mimetype_id, + get_fileid_info, + get_fileids_info, get_non_direct_access_filesize_limit, get_paths_by_ids, get_storages_info, @@ -21,7 +24,8 @@ class FsNodeInfo(TypedDict): id: int is_dir: bool is_local: bool - mimetype: str + mimetype: int + mimepart: int name: str internal_path: str abs_path: str @@ -37,52 +41,92 @@ class FsNodeInfo(TypedDict): direct_access: bool +FsNodeInfoField = Literal["is_dir", "is_local", "mimetype", "mimepart", "name", "direct_access"] + + USER_ID = environ.get("USER_ID", "") -DIR_MIMETYPE = get_mimetype_id("'httpd/unix-directory'") STORAGES_INFO = get_storages_info() ND_ACCESS_LIMIT = get_non_direct_access_filesize_limit() """A value from the config that defines the maximum file size allowed to be requested from php.""" -def list_directory(file_id: int, user_id=USER_ID) -> list[FsNodeInfo]: +def fs_get_obj_info(file_id: int) -> Optional[FsNodeInfo]: + raw_result = get_fileid_info(file_id) + if raw_result: + return db_record_to_fs_node(raw_result) + return None + + +def fs_get_objs_info(file_ids: list[int]) -> list[FsNodeInfo]: + raw_result = get_fileids_info(file_ids) + return [db_record_to_fs_node(i) for i in raw_result] + + +def fs_list_directory(file_id: int, user_id=USER_ID) -> list[FsNodeInfo]: _ = user_id # noqa # will be used in 0.4.0 version dir_info = get_paths_by_ids([file_id]) file_mounts = [] if dir_info: file_mounts = get_mounts_to(dir_info[0]["storage"], dir_info[0]["path"]) raw_result = get_directory_list(file_id, file_mounts) - result: list[FsNodeInfo] = [] - for i in raw_result: - result.append( - { - "id": i["fileid"], - "is_dir": i["mimetype"] == DIR_MIMETYPE, - "is_local": is_local_storage(i["storage"]), - "mimetype": i["mimetype"], - "name": i["name"], - "internal_path": i["path"], - "abs_path": get_file_full_path(i["storage"], i["path"]), - "size": i["size"], - "permissions": i["permissions"], - "mtime": i["mtime"], - "checksum": i["checksum"], - "encrypted": i["encrypted"], - "etag": i["etag"], - "ownerName": get_storage_user_id(i["storage"]), - "storageId": i["storage"], - "mountId": get_storage_root_id(i["storage"]), - "direct_access": can_directly_access_file(i), - } - ) - return result - - -def get_file_data(file_info: FsNodeInfo) -> bytes: + return [db_record_to_fs_node(i) for i in raw_result] + + +def fs_apply_exclude_lists(fs_objs: list[FsNodeInfo], excl_file_ids: list[int], excl_mask: list[str]) -> None: + """Purge all records according to exclude_(mask/fileid) from `where_to_purge`(or from fs_records).""" + + indexes_to_purge = [] + for index, fs_obj in enumerate(fs_objs): + if fs_obj["id"] in excl_file_ids: + indexes_to_purge.append(index) + elif is_path_in_exclude(fs_obj["internal_path"], excl_mask): + indexes_to_purge.append(index) + for index in reversed(indexes_to_purge): + del fs_objs[index] + + +def fs_extract_sub_dirs(fs_objs: list[FsNodeInfo]) -> list[FsNodeInfo]: + sub_dirs = [] + indexes_to_purge = [] + for index, fs_obj in enumerate(fs_objs): + if fs_obj["mimetype"] == mimetype.DIR: + sub_dirs.append(fs_obj) + indexes_to_purge.append(index) + for index in reversed(indexes_to_purge): + del fs_objs[index] + return sub_dirs + + +def fs_apply_ignore_flags(fs_objs: list[FsNodeInfo]) -> None: + ignore_flag = any(fs_obj["name"] in (".noimage", ".nomedia") for fs_obj in fs_objs) + if ignore_flag: + fs_filter_by(fs_objs, "mimepart", [mimetype.IMAGE, mimetype.VIDEO], reverse_filter=True) + fs_apply_exclude_lists(fs_objs, [], [".noimage", ".nomedia"]) + + +def fs_filter_by(fs_objs: list[FsNodeInfo], field: FsNodeInfoField, values: list, reverse_filter=False) -> None: + indexes_to_purge = [] + if reverse_filter: + for index, fs_obj in enumerate(fs_objs): + if fs_obj[field] in values: + indexes_to_purge.append(index) + else: + for index, fs_obj in enumerate(fs_objs): + if fs_obj[field] not in values: + indexes_to_purge.append(index) + for index in reversed(indexes_to_purge): + del fs_objs[index] + + +def fs_sort_by_id(fs_objs: list[FsNodeInfo]) -> list[FsNodeInfo]: + return sorted(fs_objs, key=lambda i: i["id"]) + + +def fs_get_file_data(file_info: FsNodeInfo) -> bytes: if file_info["direct_access"]: try: with open(file_info["abs_path"], "rb") as h_file: - data = h_file.read() - return data + return h_file.read() except Exception: # noqa # pylint: disable=broad-except log.exception("Exception during reading %s", file_info["abs_path"]) return request_file_from_php(file_info) @@ -172,3 +216,36 @@ def get_mounts_to(storage_id: int, dir_path: str) -> list[int]: if mount_point_with_dir_path == str(Path(storage_info["mount_point"]).parent): return_list.append(storage_info["root_id"]) return return_list + + +def db_record_to_fs_node(fs_record: dict) -> FsNodeInfo: + return { + "id": fs_record["fileid"], + "is_dir": fs_record["mimetype"] == mimetype.DIR, + "is_local": is_local_storage(fs_record["storage"]), + "mimetype": fs_record["mimetype"], + "mimepart": fs_record["mimepart"], + "name": fs_record["name"], + "internal_path": fs_record["path"], + "abs_path": get_file_full_path(fs_record["storage"], fs_record["path"]), + "size": fs_record["size"], + "permissions": fs_record["permissions"], + "mtime": fs_record["mtime"], + "checksum": fs_record["checksum"], + "encrypted": fs_record["encrypted"], + "etag": fs_record["etag"], + "ownerName": get_storage_user_id(fs_record["storage"]), + "storageId": fs_record["storage"], + "mountId": get_storage_root_id(fs_record["storage"]), + "direct_access": can_directly_access_file(fs_record), + } + + +def is_path_in_exclude(fs_path: str, exclude_patterns: list[str]) -> bool: + """Checks with fnmatch if `path` is in `exclude_patterns`. Returns ``True`` if yes.""" + + name = path.basename(fs_path) + for pattern in exclude_patterns: + if fnmatch(name, pattern): + return True + return False diff --git a/nc_py_api/mimetype.py b/nc_py_api/mimetype.py new file mode 100644 index 00000000..7f86e63c --- /dev/null +++ b/nc_py_api/mimetype.py @@ -0,0 +1,5 @@ +from .db_requests import get_mimetype_id + +DIR = get_mimetype_id("'httpd/unix-directory'") +IMAGE = get_mimetype_id("'image'") +VIDEO = get_mimetype_id("'video'")