Skip to content

Commit

Permalink
Merge pull request #16 from demml/test-ci
Browse files Browse the repository at this point in the history
Update README.md
  • Loading branch information
thorrester authored May 7, 2024
2 parents fddc717 + 81433a5 commit 270c4c5
Show file tree
Hide file tree
Showing 10 changed files with 425 additions and 14 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,4 @@ Thanks goes to these phenomenal [projects and people](./ATTRIBUTIONS.md) for cre

<a href="https://github.com/demml/opsml/graphs/contributors">
<img src="https://contrib.rocks/image?repo=shipt/opsml" />
</a>



</a>
45 changes: 45 additions & 0 deletions opsml/app/routes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import json
from pathlib import Path
from typing import Optional, cast

from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse, RedirectResponse, StreamingResponse
from fastapi.templating import Jinja2Templates

from opsml import DataInterface
from opsml.app.routes.files import download_artifacts_ui, download_file
from opsml.app.routes.pydantic_models import CardRequest, DataCardMetadata
from opsml.app.routes.route_helpers import DataRouteHelper
from opsml.app.routes.utils import error_to_500
from opsml.cards.data import DataCard
Expand Down Expand Up @@ -98,3 +101,45 @@ def download_data_profile(
datacard = cast(DataCard, registry.load_card(uid=uid))
load_path = Path(datacard.uri / SaveName.DATA_PROFILE.value).with_suffix(Suffix.HTML.value)
return download_file(request, str(load_path))


@router.post("/data/card", name="data_card", response_model=DataCardMetadata)
def get_data_card(request: Request, payload: CardRequest) -> DataCardMetadata:
"""Get a data card"""

registry: CardRegistry = request.app.state.registries.data

card: DataCard = registry.load_card(
name=payload.name,
repository=payload.repository,
version=payload.version,
uid=payload.uid,
)

data_splits = None
sql_logic = None
feature_map = None

if isinstance(card.interface, DataInterface):
# if data_splits are not None, convert to json string
if card.interface.data_splits is not None:
splits = [data_split.model_dump() for data_split in card.interface.data_splits]
data_splits = json.dumps(splits, indent=4)

if card.metadata.feature_map is not None:
feature_map = {key: val.model_dump() for key, val in card.metadata.feature_map.items()}
feature_map = json.dumps(feature_map, indent=4)

sql_logic = card.interface.sql_logic

return DataCardMetadata(
name=card.name,
repository=card.repository,
contact=card.contact,
version=card.version,
uid=card.uid,
interface_type=card.metadata.interface_type,
data_splits=data_splits,
feature_map=feature_map,
sql_logic=sql_logic,
)
164 changes: 162 additions & 2 deletions opsml/app/routes/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import io
import json
import tempfile
import zipfile as zp
from pathlib import Path
from typing import Dict
from typing import Any, Dict, List, Optional

import streaming_form_data
from fastapi import APIRouter, Depends, HTTPException, Request, status
Expand All @@ -15,6 +16,7 @@
from streaming_form_data import StreamingFormDataParser
from streaming_form_data.validators import MaxSizeValidator

from opsml import CardRegistry
from opsml.app.core.dependencies import (
reverse_swap_opsml_root,
swap_opsml_root,
Expand All @@ -23,23 +25,30 @@
from opsml.app.routes.pydantic_models import (
DeleteFileResponse,
FileExistsResponse,
FileViewResponse,
ListFileInfoResponse,
ListFileResponse,
ReadMeRequest,
)
from opsml.app.routes.utils import (
ExternalFileTarget,
MaxBodySizeException,
MaxBodySizeValidator,
calculate_file_size,
)
from opsml.helpers.logging import ArtifactLogger
from opsml.settings.config import config
from opsml.storage.client import StorageClientBase
from opsml.types import RegistryTableNames
from opsml.types.extra import PresignableTypes

logger = ArtifactLogger.get_logger()


MAX_FILE_SIZE = 1024 * 1024 * 1024 * 50 # = 50GB
MAX_VIEWSIZE = 1024 * 1024 * 2 # = 2MB
MAX_REQUEST_BODY_SIZE = MAX_FILE_SIZE + 1024

PRESIGN_DEFAULT_EXPIRATION = 60
router = APIRouter()


Expand Down Expand Up @@ -278,3 +287,154 @@ def delete_files(request: Request, path: str) -> DeleteFileResponse:
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"There was an error deleting files. {error}",
) from error


@router.get("/files/list/info", name="list_files_info")
def list_files_info(request: Request, path: str, subdir: Optional[str] = None) -> ListFileInfoResponse:
"""Lists files
Args:
request:
request object
path:
path to read
subdir:
subdirectory to read
Returns:
`ListFileResponse`
"""
storage_path = Path(path)

if subdir:
storage_path = storage_path / subdir

swapped_path = swap_opsml_root(request, storage_path)
storage_client: StorageClientBase = request.app.state.storage_client

files: List[Dict[str, Any]] = storage_client.ls(swapped_path, True)

mtimes = []
for file_ in files:
# conversion of timestamp is done on client side to take timezone into account
mtime = file_["mtime"] * 1000
uri = Path(file_["name"])
file_["uri"] = str(reverse_swap_opsml_root(request, uri))
file_["name"] = uri.name
file_["size"] = calculate_file_size(file_["size"])
file_["mtime"] = mtime
mtimes.append(mtime)

try:
return ListFileInfoResponse(
files=files,
mtime=max(mtimes),
)

except Exception as error:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"There was an error listing files. {error}",
) from error


@router.get("/files/view", name="presign_uri")
def get_file_to_view(request: Request, path: str) -> FileViewResponse:
"""Downloads a file
Args:
request:
request object
path:
path to file
Returns:
Streaming file response
"""

swapped_path = swap_opsml_root(request, Path(path))
storage_client: StorageClientBase = request.app.state.storage_client
storage_root: str = request.app.state.storage_root
view_meta: Dict[str, str] = {}
try:
file_info = storage_client.client.info(path=swapped_path)
size = file_info["size"]
file_info["size"] = calculate_file_size(size)
file_info["name"] = swapped_path.name
file_info["mtime"] = file_info["mtime"] * 1000
file_info["uri"] = path
file_info["suffix"] = swapped_path.suffix

if swapped_path.suffix in list(PresignableTypes):
if size < MAX_VIEWSIZE and swapped_path.suffix in [".txt", ".log", ".json", ".csv", ".py", ".md"]:
# download load file to string
with tempfile.TemporaryDirectory() as tmpdirname:
lpath = Path(tmpdirname) / swapped_path.name
storage_client.get(swapped_path, lpath)

with lpath.open("rb") as file_:
file_ = file_.read().decode("utf-8")

if swapped_path.suffix == ".json":
view_meta["content"] = json.dumps(json.loads(file_), indent=4) # type: ignore

else:
view_meta["content"] = file_

view_meta["view_type"] = "code"

else:
view_meta["view_type"] = "iframe"

# get remote path relative to storage root
file_info["uri"] = storage_client.generate_presigned_url(
path=swapped_path.relative_to(storage_root),
expiration=PRESIGN_DEFAULT_EXPIRATION,
)

return FileViewResponse(file_info=file_info, content=view_meta)

except Exception as error:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"There was an error generating the presigned uri. {error}",
) from error


@router.post("/files/readme", name="create_readme")
async def create_readme(
request: Request,
payload: ReadMeRequest,
) -> bool:
"""UI route that creates a readme file"""

try:
# check name and repo exist before saving
storage_client: StorageClientBase = request.app.state.storage_client
registry: CardRegistry = getattr(request.app.state.registries, payload.registry_type)

cards = registry.list_cards(name=payload.name, repository=payload.repository)

if not cards:
logger.warning("No cards found for name {} and repository {}", payload.name, payload.repository)
return False

# save payload.content to readme in temp file
with tempfile.TemporaryDirectory() as tmpdirname:
lpath = Path(tmpdirname) / "README.md"
with lpath.open("w") as file_:
file_.write(payload.content)

rpath = (
Path(config.opsml_storage_uri)
/ RegistryTableNames.from_str(payload.registry_type).value
/ payload.repository
/ payload.name
/ lpath.name
)
# save to storage
storage_client.put(lpath, rpath)

logger.info("Readme file created for {} in {}", payload.name, rpath)

return True

except Exception as error: # pylint: disable=broad-except
logger.error("Error creating readme file {}", error)
return False
51 changes: 51 additions & 0 deletions opsml/app/routes/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,38 @@ class ListFileResponse(BaseModel):
files: List[str]


class FileInfo(BaseModel):
uri: str
name: str
size: str
type: str
created: float
islink: bool
mode: int
uid: int
gid: int
mtime: float
ino: int
nlink: int
viewable: bool = False
suffix: Optional[str] = None


class ViewContent(BaseModel):
content: Optional[str] = None
view_type: Optional[str] = None


class FileViewResponse(BaseModel):
file_info: FileInfo
content: ViewContent


class ListFileInfoResponse(BaseModel):
files: List[FileInfo]
mtime: float


class DeleteFileResponse(BaseModel):
deleted: bool

Expand Down Expand Up @@ -253,6 +285,18 @@ class CompareMetricResponse(BaseModel):
report: Dict[str, List[BattleReport]]


class DataCardMetadata(BaseModel):
name: str
version: str
repository: str
contact: str
uid: str
interface_type: str
data_splits: Optional[str] = None
sql_logic: Dict[str, str] = {}
feature_map: Optional[str] = None


def form_body(cls: Any) -> Any:
args = []
params = cls.__signature__.parameters
Expand Down Expand Up @@ -403,3 +447,10 @@ class MetricRequest(BaseModel):

class MetricResponse(BaseModel):
metrics: Metrics


class ReadMeRequest(BaseModel):
name: str
repository: str
registry_type: str
content: str
3 changes: 2 additions & 1 deletion opsml/app/routes/route_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,11 +575,12 @@ def load_graphs(self, runcard: RunCard) -> Dict[str, Any]:
return loaded_graphs

paths = client.storage_client.ls(graph_path)
paths = cast(List[Path], paths)
logger.debug("Found {} graphs in {}", paths, graph_path)
if paths:
with tempfile.TemporaryDirectory() as tmp_dir:
for path in paths:
rpath = graph_path / Path(path).name
rpath = graph_path / path.name
lpath = Path(tmp_dir) / rpath.name
client.storage_client.get(rpath, lpath)
graph: Dict[str, Any] = joblib.load(lpath)
Expand Down
18 changes: 18 additions & 0 deletions opsml/app/routes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,3 +351,21 @@ def write_records_to_csv(
media_type="text/csv",
headers={"Content-Disposition": "filename=audit_file.csv"},
)


def calculate_file_size(size: int) -> str:
"""Calculates file size in human readable format
Args:
size:
File size in bytes
Returns:
Human readable file size
"""
if size < 1024:
return f"{size} B"
if size < 1024**2:
return f"{size / 1024:.2f} KB"
if size < 1024**3:
return f"{size / 1024 ** 2:.2f} MB"

return f"{size / 1024 ** 3:.2f} GB"
Loading

0 comments on commit 270c4c5

Please sign in to comment.