Skip to content

Commit

Permalink
Added tests. Fixed memory view issue.
Browse files Browse the repository at this point in the history
  • Loading branch information
khustup committed Mar 29, 2022
1 parent c60bec8 commit c7a39a1
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 60 deletions.
42 changes: 42 additions & 0 deletions hub/visualizer/tests/test_visualizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np
import requests
from hub.visualizer.visualizer import visualizer
from hub.tests.dataset_fixtures import *


@pytest.mark.parametrize(
"ds_generator",
[
"local_ds_generator",
"s3_ds_generator",
"gcs_ds_generator",
"hub_cloud_ds_generator",
],
indirect=True,
)
def test_local_server(ds_generator):
ds = ds_generator()
ds.create_tensor("images", htype="image", sample_compression="jpg")
ds.images.append(np.random.randint(0, 255, size=(400, 400, 3), dtype="uint8"))
id = visualizer.add(ds)
url = f"http://localhost:{visualizer.port}/{id}/"
response = requests.request("GET", url + "dataset_meta.json")
assert response.status_code == 206
j = response.json()
assert "tensors" in j
assert "images" in j["tensors"]

response = requests.request("GET", url + "images/tensor_meta.json")
assert response.status_code == 206
j = response.json()
assert j["sample_compression"] == "jpeg"
assert j["length"] == 1
assert j["htype"] == "image"
assert j["dtype"] == "uint8"

response = requests.request("GET", url + "images/chunks_index/unsharded")
assert response.status_code == 206
assert len(response.content) == 14

response = requests.request("GET", url + "not_exists/not_exists")
assert response.status_code == 404
132 changes: 72 additions & 60 deletions hub/visualizer/visualizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Union
from typing import Dict, Optional, Union
import uuid
from flask import Flask, request, Response # type: ignore
from hub.core.dataset import Dataset
Expand All @@ -11,19 +11,16 @@

from IPython.display import IFrame, display # type: ignore

visualizer = None

_PORT: Optional[int] = None
_SERVER_THREAD: Optional[threading.Thread] = None
_APP = Flask("dataset_visualizer")

log = logging.getLogger("werkzeug")
log.setLevel(logging.ERROR)


def _run_app():
def _run_app(port: int):
try:
_APP.run(host="0.0.0.0", port=_PORT, threaded=True)
_APP.run(host="0.0.0.0", port=port, threaded=True)
except Exception:
pass

Expand All @@ -36,52 +33,14 @@ def after_request(response):
return response


@_APP.route("/<path:path>")
def access_data(path):
try:
paths = path.split("/", 1)
range_header = request.headers.get("Range", None)
start, end = 0, None
ds: Dataset = visualizer.get(paths[0])
storage: StorageProvider = ds.storage
if request.method == "HEAD":
if paths[1] in storage.keys:
return Response("OK", 200)
else:
return Response("", 404)
if range_header:
match = re.search(r"(\d+)-(\d*)", range_header)
groups = match.groups()

if groups[0]:
start = int(groups[0])
if groups[1]:
end = int(groups[1]) + 1

c = storage.get_bytes(paths[1], start, end)
resp = Response(
c,
206,
content_type="application/octet-stream",
)
resp.headers.add("Connection", "keep-alive")
resp.headers.add("Accept-Ranges", "bytes")
resp.headers.add("Content-Range", "bytes {0}-{1}".format(start, end))
return resp

except Exception as e:
return Response(
"Not Found",
404,
content_type="application/octet-stream",
)


class _Visualizer:
"""
Visualizer class to manage visualization of the datasets.
"""

_port: Optional[int] = None
_datasets: Dict = {}

def __init__(self):
self.start_server()
self._datasets = {}
Expand All @@ -94,6 +53,10 @@ def add(self, ds: Dataset) -> str:
def get(self, id: str) -> Dataset:
return self._datasets[id]

@property
def port(self):
return self._port

def get_free_port(self):
with socketserver.TCPServer(("localhost", 0), None) as s:
return s.server_address[1]
Expand All @@ -104,14 +67,17 @@ def is_server_running(self) -> bool:
return False

def start_server(self):
global _PORT
global _SERVER_THREAD
if self.is_server_running():
return
_PORT = self.get_free_port()
_SERVER_THREAD = threading.Thread(target=_run_app, daemon=True)
self._port = self.get_free_port()

def run_app():
_run_app(port=self.port)

_SERVER_THREAD = threading.Thread(target=run_app, daemon=True)
_SERVER_THREAD.start()
return f"http://localhost:{_PORT}/"
return f"http://localhost:{self.port}/"

def stop_server(self):
global _SERVER_THREAD
Expand All @@ -120,6 +86,9 @@ def stop_server(self):
terminate_thread(_SERVER_THREAD)
_SERVER_THREAD = None

def __del__(self):
self.stop_server()


visualizer = _Visualizer()

Expand All @@ -134,14 +103,57 @@ def visualize(
Args:
ds: dataset The dataset to visualize.
width: Union[int, str, None] Optional width of the visualizer canvas.
height: Union[int, str, None] Optional height of the visualizer canvas.
"""
global visualizer
if visualizer:
id = visualizer.add(ds)
url = f"http://localhost:{_PORT}/{id}/"
iframe = IFrame(
f"https://app.dev.activeloop.ai/visualizer/hub?url={url}",
width=width or "100%",
height=height or 900,
id = visualizer.add(ds)
url = f"http://localhost:{visualizer.port}/{id}/"
iframe = IFrame(
f"https://app.dev.activeloop.ai/visualizer/hub?url={url}",
width=width or "100%",
height=height or 900,
)
display(iframe)


@_APP.route("/<path:path>")
def access_data(path):
try:
paths = path.split("/", 1)
range_header = request.headers.get("Range", None)
start, end = 0, None
ds: Dataset = visualizer.get(paths[0])
storage: StorageProvider = ds.storage
if request.method == "HEAD":
if paths[1] in storage.keys:
return Response("OK", 200)
else:
return Response("", 404)
if range_header:
match = re.search(r"(\d+)-(\d*)", range_header)
groups = match.groups()

if groups[0]:
start = int(groups[0])
if groups[1]:
end = int(groups[1]) + 1

c = storage.get_bytes(paths[1], start, end)
if isinstance(c, memoryview):
c = c.tobytes()
resp = Response(
c,
206,
content_type="application/octet-stream",
)
resp.headers.add("Connection", "keep-alive")
resp.headers.add("Accept-Ranges", "bytes")
resp.headers.add("Content-Range", "bytes {0}-{1}".format(start, end))
return resp

except Exception as e:
return Response(
"Not Found",
404,
content_type="application/octet-stream",
)
display(iframe)

0 comments on commit c7a39a1

Please sign in to comment.