Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new file info paths #14

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions opsml/app/routes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import json
from pathlib import Path
from typing import Optional, cast

from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse, RedirectResponse, StreamingResponse
from fastapi.templating import Jinja2Templates

from opsml import DataInterface
from opsml.app.routes.files import download_artifacts_ui, download_file
from opsml.app.routes.pydantic_models import CardRequest, DataCardMetadata
from opsml.app.routes.route_helpers import DataRouteHelper
from opsml.app.routes.utils import error_to_500
from opsml.cards.data import DataCard
Expand Down Expand Up @@ -98,3 +101,45 @@ def download_data_profile(
datacard = cast(DataCard, registry.load_card(uid=uid))
load_path = Path(datacard.uri / SaveName.DATA_PROFILE.value).with_suffix(Suffix.HTML.value)
return download_file(request, str(load_path))


@router.post("/data/card", name="data_card", response_model=DataCardMetadata)
def get_data_card(request: Request, payload: CardRequest) -> DataCardMetadata:
"""Get a data card"""

registry: CardRegistry = request.app.state.registries.data

card: DataCard = registry.load_card(
name=payload.name,
repository=payload.repository,
version=payload.version,
uid=payload.uid,
)

data_splits = None
sql_logic = None
feature_map = None

if isinstance(card.interface, DataInterface):
# if data_splits are not None, convert to json string
if card.interface.data_splits is not None:
splits = [data_split.model_dump() for data_split in card.interface.data_splits]
data_splits = json.dumps(splits, indent=4)

if card.metadata.feature_map is not None:
feature_map = {key: val.model_dump() for key, val in card.metadata.feature_map.items()}
feature_map = json.dumps(feature_map, indent=4)

sql_logic = card.interface.sql_logic

return DataCardMetadata(
name=card.name,
repository=card.repository,
contact=card.contact,
version=card.version,
uid=card.uid,
interface_type=card.metadata.interface_type,
data_splits=data_splits,
feature_map=feature_map,
sql_logic=sql_logic,
)
13 changes: 11 additions & 2 deletions opsml/app/routes/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import io
import json
import tempfile
import zipfile as zp
from pathlib import Path
from typing import Dict
from typing import Any, Dict, List, Optional

import streaming_form_data
from fastapi import APIRouter, Depends, HTTPException, Request, status
Expand All @@ -15,6 +16,7 @@
from streaming_form_data import StreamingFormDataParser
from streaming_form_data.validators import MaxSizeValidator

from opsml import CardRegistry
from opsml.app.core.dependencies import (
reverse_swap_opsml_root,
swap_opsml_root,
Expand All @@ -23,23 +25,30 @@
from opsml.app.routes.pydantic_models import (
DeleteFileResponse,
FileExistsResponse,
FileViewResponse,
ListFileInfoResponse,
ListFileResponse,
ReadMeRequest,
)
from opsml.app.routes.utils import (
ExternalFileTarget,
MaxBodySizeException,
MaxBodySizeValidator,
calculate_file_size,
)
from opsml.helpers.logging import ArtifactLogger
from opsml.settings.config import config
from opsml.storage.client import StorageClientBase
from opsml.types import RegistryTableNames
from opsml.types.extra import PresignableTypes

logger = ArtifactLogger.get_logger()


MAX_FILE_SIZE = 1024 * 1024 * 1024 * 50 # = 50GB
MAX_VIEWSIZE = 1024 * 1024 * 2 # = 2MB
MAX_REQUEST_BODY_SIZE = MAX_FILE_SIZE + 1024

PRESIGN_DEFAULT_EXPIRATION = 60
router = APIRouter()


Expand Down
51 changes: 51 additions & 0 deletions opsml/app/routes/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,38 @@ class ListFileResponse(BaseModel):
files: List[str]


class FileInfo(BaseModel):
uri: str
name: str
size: str
type: str
created: float
islink: bool
mode: int
uid: int
gid: int
mtime: float
ino: int
nlink: int
viewable: bool = False
suffix: Optional[str] = None


class ViewContent(BaseModel):
content: Optional[str] = None
view_type: Optional[str] = None


class FileViewResponse(BaseModel):
file_info: FileInfo
content: ViewContent


class ListFileInfoResponse(BaseModel):
files: List[FileInfo]
mtime: float


class DeleteFileResponse(BaseModel):
deleted: bool

Expand Down Expand Up @@ -253,6 +285,18 @@ class CompareMetricResponse(BaseModel):
report: Dict[str, List[BattleReport]]


class DataCardMetadata(BaseModel):
name: str
version: str
repository: str
contact: str
uid: str
interface_type: str
data_splits: Optional[str] = None
sql_logic: Dict[str, str] = {}
feature_map: Optional[str] = None


def form_body(cls: Any) -> Any:
args = []
params = cls.__signature__.parameters
Expand Down Expand Up @@ -403,3 +447,10 @@ class MetricRequest(BaseModel):

class MetricResponse(BaseModel):
metrics: Metrics


class ReadMeRequest(BaseModel):
name: str
repository: str
registry_type: str
content: str
3 changes: 2 additions & 1 deletion opsml/app/routes/route_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,11 +575,12 @@ def load_graphs(self, runcard: RunCard) -> Dict[str, Any]:
return loaded_graphs

paths = client.storage_client.ls(graph_path)
paths = cast(List[Path], paths)
logger.debug("Found {} graphs in {}", paths, graph_path)
if paths:
with tempfile.TemporaryDirectory() as tmp_dir:
for path in paths:
rpath = graph_path / Path(path).name
rpath = graph_path / path.name
lpath = Path(tmp_dir) / rpath.name
client.storage_client.get(rpath, lpath)
graph: Dict[str, Any] = joblib.load(lpath)
Expand Down
18 changes: 18 additions & 0 deletions opsml/app/routes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,3 +351,21 @@ def write_records_to_csv(
media_type="text/csv",
headers={"Content-Disposition": "filename=audit_file.csv"},
)


def calculate_file_size(size: int) -> str:
"""Calculates file size in human readable format
Args:
size:
File size in bytes
Returns:
Human readable file size
"""
if size < 1024:
return f"{size} B"
if size < 1024**2:
return f"{size / 1024:.2f} KB"
if size < 1024**3:
return f"{size / 1024 ** 2:.2f} MB"

return f"{size / 1024 ** 3:.2f} GB"
19 changes: 14 additions & 5 deletions opsml/storage/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import warnings
from functools import cached_property
from pathlib import Path
from typing import Any, BinaryIO, Iterator, List, Optional, Protocol, cast
from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Protocol, Union, cast

from fsspec.implementations.local import LocalFileSystem

Expand Down Expand Up @@ -58,8 +58,10 @@ class _FileSystemProtocol(Protocol):
def get(self, lpath: str, rpath: str, recursive: bool) -> None:
"""Copies file(s) from remote path (rpath) to local path (lpath)"""

def ls(self, path: str) -> List[str]: # pylint: disable=invalid-name
"""Lists files"""
def ls( # pylint: disable=invalid-name
self, path: str, detail: bool = False
) -> Union[List[str], List[Dict[str, Any]]]:
pass

def find(self, path: str) -> List[str]:
"""Recursively list all files excluding directories"""
Expand Down Expand Up @@ -112,8 +114,15 @@ def get(self, rpath: Path, lpath: Path) -> None:

self.client.get(rpath=abs_rpath, lpath=abs_lpath, recursive=recursive)

def ls(self, path: Path) -> List[Path]:
return [Path(p) for p in self.client.ls(str(path))]
def ls(self, path: Path, detail: bool = False) -> Union[List[Path], List[Dict[str, Any]]]:
files = self.client.ls(str(path), detail=detail)

if detail:
files = cast(List[Dict[str, Any]], files)
return files

files = cast(List[str], files)
return [Path(f) for f in files]

def find(self, path: Path) -> List[Path]:
return [Path(p) for p in self.client.find(str(path))]
Expand Down
17 changes: 17 additions & 0 deletions opsml/types/extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,23 @@ def from_str(name: str) -> "GraphStyle":
raise ValueError(f"GraphStyle {name} not found")


class PresignableTypes(str, Enum):
JPEG = ".jpeg"
JPG = ".jpg"
PNG = ".png"
PDF = ".pdf"
MD = ".md"
TEXT = ".txt"
CSV = ".csv"
JSON = ".json"
TIFF = ".tiff"
GIF = ".gif"
MP4 = ".mp4"
PY = ".py"
YML = ".yml"
YAML = ".yaml"


class UserScope(BaseModel):
read: bool = True
write: bool = False
Expand Down
4 changes: 3 additions & 1 deletion opsml/types/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ class StorageClientProtocol(Protocol):
def get(self, rpath: Path, lpath: Path) -> None:
"""Copies file(s) from remote path (rpath) to local path (lpath)"""

def ls(self, path: Path) -> List[Path]: # pylint: disable=invalid-name
def ls( # pylint: disable=invalid-name
self, path: Path, detail: bool = False
) -> Union[List[Path], List[Dict[str, Any]]]:
"""Lists files in directory (not recursive)"""

def find(self, path: Path) -> List[Path]:
Expand Down
Loading
Loading