Skip to content

Commit

Permalink
Removing backend workflow (#1204)
Browse files Browse the repository at this point in the history
  • Loading branch information
manasaV3 committed Aug 8, 2023
1 parent 49f11e2 commit 20d80c0
Show file tree
Hide file tree
Showing 10 changed files with 15 additions and 393 deletions.
7 changes: 0 additions & 7 deletions backend/api/app.py
Expand Up @@ -13,7 +13,6 @@
from api.model import (
get_index,
get_excluded_plugins,
update_cache,
move_artifact_to_s3,
get_manifest,
)
Expand Down Expand Up @@ -72,12 +71,6 @@ def plugin_index() -> Response:
return jsonify(get_index())


@app.route("/update", methods=["POST"])
def update() -> Response:
update_cache()
return app.make_response(("Complete", 204))


@app.route("/plugins")
def plugins() -> Response:
return jsonify(plugin_model.get_latest_by_visibility())
Expand Down
215 changes: 4 additions & 211 deletions backend/api/model.py
@@ -1,65 +1,16 @@
from concurrent import futures
from datetime import datetime
import json
import os
from typing import Tuple, Dict, List, Callable, Any
from typing import Dict, List, Any
from zipfile import ZipFile
from io import BytesIO
from collections import defaultdict
from api.models import (
install_activity,
plugin as plugin_model,
plugin_blocked,
plugin_metadata as plugin_metadata_model,
category as category_model
)
from utils.github import get_github_metadata, get_artifact
from utils.pypi import query_pypi, get_plugin_pypi_metadata
from api.s3 import get_cache, cache
from utils.utils import (
render_description,
send_alert,
get_attribute,
get_category_mapping,
parse_manifest
)
from utils.datadog import report_metrics
import boto3
import logging

logger = logging.getLogger(__name__)
index_subset = {'name', 'summary', 'description_text', 'description_content_type',
'authors', 'license', 'python_version', 'operating_system',
'release_date', 'version', 'first_released',
'development_status', 'category', 'display_name', 'plugin_types', 'reader_file_extensions',
'writer_file_extensions', 'writer_save_layers', 'npe2', 'error_message', 'code_repository',
'total_installs', }


def get_public_plugins() -> Dict[str, str]:
"""
Get the dictionary of public plugins and versions.
:return: dict of public plugins and their versions
"""
return plugin_model.get_latest_by_visibility()


def discover_manifest(plugin: str, version: str = None):
"""
Invoke plugins lambda to generate manifest & write to cache.
:param plugin: name of the plugin to get
:param version: version of the plugin manifest
"""
client = boto3.client('lambda')
lambda_event = {'plugin': plugin, 'version': version}
# this lambda invocation will call
# `napari-hub/plugins/get_plugin_manifest/generate_manifest`
client.invoke(
FunctionName=os.environ.get('PLUGINS_LAMBDA_NAME'),
InvocationType='Event',
Payload=json.dumps(lambda_event),
)
from utils.github import get_artifact
from api.s3 import cache
from utils.utils import get_attribute


def get_manifest(name: str, version: str = None) -> dict:
Expand Down Expand Up @@ -102,16 +53,6 @@ def get_index() -> List[Dict[str, Any]]:
return plugins


def slice_metadata_to_index_columns(plugins_metadata: List[dict]) -> List[dict]:
"""
slice index to only include specified indexing related columns.
:param plugins_metadata: plugin metadata dictionary
:return: sliced dict metadata for the plugin
"""
return [{k: plugin_metadata[k] for k in index_subset if k in plugin_metadata}
for plugin_metadata in plugins_metadata]


def get_excluded_plugins() -> Dict[str, str]:
"""
Get the excluded plugins.
Expand All @@ -123,154 +64,6 @@ def get_excluded_plugins() -> Dict[str, str]:
}


def build_manifest_metadata(plugin: str, version: str) -> Tuple[str, dict]:
manifest = get_manifest(plugin, version)
if 'error' in manifest:
if 'Manifest not yet processed' in manifest['error']:
# this will invoke the plugins lambda & write manifest to cache
discover_manifest(plugin, version)
# return just default values for now
metadata = parse_manifest()
else:
metadata = parse_manifest(manifest)
return plugin, metadata


def build_plugin_metadata(plugin: str, version: str) -> Tuple[str, dict]:
"""
Build plugin metadata from multiple sources, reuse cached ones if available.
:return: dict for aggregated plugin metadata
"""
cached_plugin = get_cache(f'cache/{plugin}/{version}.json')
if cached_plugin:
return plugin, cached_plugin
metadata = get_plugin_pypi_metadata(plugin, version=version)
if not metadata:
return plugin, metadata
github_repo_url = metadata.get('code_repository')
if github_repo_url and github_repo_url.startswith("https://github.com/"):
metadata = {**metadata, **get_github_metadata(github_repo_url)}
if 'description' in metadata:
metadata['description_text'] = render_description(metadata.get('description'))
if 'labels' in metadata:
category_mappings = category_model.get_all_categories(metadata['labels']['ontology'])
categories = defaultdict(list)
category_hierarchy = defaultdict(list)
for category in metadata['labels']['terms']:
mapped_category = get_category_mapping(category, category_mappings)
for match in mapped_category:
if match['label'] not in categories[match['dimension']]:
categories[match['dimension']].append(match['label'])
match['hierarchy'][0] = match['label']
category_hierarchy[match['dimension']].append(match['hierarchy'])
metadata['category'] = categories
metadata['category_hierarchy'] = category_hierarchy
del metadata['labels']
cache(metadata, f'cache/{plugin}/{version}.json')
return plugin, metadata


def generate_index(plugins_metadata: Dict[str, Any]):
"""
Adds total_installs to plugins, and slice index to only include specified indexing related columns
:param plugins_metadata: plugin metadata dictionary
:return: sliced dict metadata for the plugin
"""
total_install_by_plugin_name = install_activity.get_total_installs_by_plugins()
for plugin_metadata in plugins_metadata.values():
name = plugin_metadata.get('name')
plugin_metadata['total_installs'] = total_install_by_plugin_name.get(name, 0)
return slice_metadata_to_index_columns(list(plugins_metadata.values()))


def update_cache():
"""
Update existing caches to reflect new/updated plugins. Files updated:
- excluded_plugins.json (overwrite)
- cache/public-plugins.json (overwrite)
- cache/hidden-plugins.json (overwrite)
- cache/index.json (overwrite)
- cache/{plugin}/{version}.json (skip if exists)
"""
plugins = query_pypi()
plugins_metadata = get_plugin_metadata_async(plugins, build_plugin_metadata)
manifest_metadata = get_plugin_metadata_async(plugins, build_manifest_metadata)
for plugin in plugins:
plugins_metadata[plugin].update(manifest_metadata[plugin])
excluded_plugins = get_updated_plugin_exclusion(plugins_metadata)
visibility_plugins = {"public": {}, "hidden": {}}
for plugin, version in plugins.items():
if plugin in excluded_plugins:
visibility = excluded_plugins[plugin]
else:
visibility = plugins_metadata[plugin].get('visibility', 'public')
if visibility in visibility_plugins:
visibility_plugins[visibility][plugin] = version

for plugin, _ in excluded_plugins.items():
if plugin in plugins_metadata:
del (plugins_metadata[plugin])

if visibility_plugins['public']:
cache(excluded_plugins, 'excluded_plugins.json')
cache(visibility_plugins['public'], 'cache/public-plugins.json')
cache(visibility_plugins['hidden'], 'cache/hidden-plugins.json')
cache(generate_index(plugins_metadata), 'cache/index.json')
report_metrics('napari_hub.plugins.count', len(visibility_plugins['public']), ['visibility:public'])
report_metrics('napari_hub.plugins.count', len(visibility_plugins['hidden']), ['visibility:hidden'])
report_metrics('napari_hub.plugins.excluded', len(excluded_plugins))
logger.info("plugin update successful")
else:
send_alert(f"({datetime.now()})Actions Required! Failed to query pypi for "
f"napari plugin packages, switching to backup analysis dump")


def get_updated_plugin_exclusion(plugins_metadata):
"""
Update plugin visibility information with latest metadata.
Override existing visibility information if existing entry is not 'blocked' (disabled by hub admin)
public: fully visible (default)
hidden: plugin page exists, but doesn't show up in search listings
disabled: no plugin page created, does not show up in search listings
blocked: no plugin page created, does not show up in search listings
:param plugins_metadata: plugin metadata containing visibility information
:return: updated exclusion list
"""
excluded_plugins = get_excluded_plugins()
for plugin, plugin_metadata in plugins_metadata.items():
if not plugin_metadata:
excluded_plugins[plugin] = 'invalid'
if 'visibility' not in plugin_metadata:
continue
if plugin in excluded_plugins and excluded_plugins[plugin] != "blocked":
if plugin_metadata['visibility'] == 'public':
del excluded_plugins[plugin]
else:
excluded_plugins[plugin] = plugin_metadata['visibility']
elif plugin not in excluded_plugins and plugin_metadata['visibility'] != 'public':
excluded_plugins[plugin] = plugin_metadata['visibility']
return excluded_plugins


def get_plugin_metadata_async(plugins: Dict[str, str], metadata_builder: Callable) -> dict:
"""
Query plugin metadata async.
:param plugins: plugin name and versions to query
:param metadata_builder: function to read and parse metadata files
:return: plugin metadata list
"""
plugins_metadata = {}
with futures.ThreadPoolExecutor(max_workers=32) as executor:
plugin_futures = [executor.submit(metadata_builder, k, v)
for k, v in plugins.items()]
for future in futures.as_completed(plugin_futures):
plugins_metadata[future.result()[0]] = (future.result()[1])
return plugins_metadata


def move_artifact_to_s3(payload, client):
"""
move preview page build artifact zip to public s3.
Expand Down
25 changes: 1 addition & 24 deletions backend/api/s3.py
Expand Up @@ -3,15 +3,12 @@
import mimetypes
import os
import os.path
import time
from datetime import datetime
from typing import Union, IO, List, Dict
from typing import Union, IO

import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from utils.utils import send_alert
from utils.time import print_perf_duration

# Environment variable set through ecs stack terraform module
bucket = os.environ.get('BUCKET')
Expand All @@ -21,23 +18,6 @@
s3_client = boto3.client("s3", endpoint_url=endpoint_url, config=Config(max_pool_connections=50))


def get_cache(key: str) -> Union[Dict, List, None]:
"""
Get the cached json file or manifest file for a given key if exists, None otherwise.
:param key: key to the cache to get
:return: file content for the key if exists, None otherwise
"""
try:
start = time.perf_counter()
result = json.loads(s3_client.get_object(Bucket=bucket, Key=os.path.join(bucket_path, key))['Body'].read())
print_perf_duration(start, f"get_cache({key})")
return result
except ClientError:
print(f"Not cached: {key}")
return None


def cache(content: Union[dict, list, IO[bytes]], key: str, mime: str = None):
"""
Cache the given content to the key location.
Expand Down Expand Up @@ -65,6 +45,3 @@ def cache(content: Union[dict, list, IO[bytes]], key: str, mime: str = None):
Key=os.path.join(bucket_path, key), ExtraArgs=extra_args)


def _get_complete_path(path):
return os.path.join(bucket_path, path)

2 changes: 0 additions & 2 deletions backend/requirements.txt
Expand Up @@ -5,8 +5,6 @@ Flask-GitHubApp==0.3.0
gunicorn==20.1.0
pyyaml==6.0
requests==2.28.0
Markdown==3.4.1
bs4==0.0.1
cffconvert==2.0.0
GitPython==3.1.30
setuptools==65.5.1
Expand Down
8 changes: 4 additions & 4 deletions backend/utils/_tests/test_github.py
@@ -1,9 +1,9 @@
import unittest
from unittest.mock import patch
import requests
from backend.utils.github import get_citation_author, get_github_metadata

from utils.github import get_github_repo_url, get_license, get_citations
from utils.github import (
get_github_repo_url, get_license, get_citations, get_citation_author,
get_github_metadata
)
from utils.test_utils import (
FakeResponse, license_response, no_license_response, citation_string,
config_yaml, config_yaml_authors_result, citations_authors_result,
Expand Down
16 changes: 3 additions & 13 deletions backend/utils/_tests/test_pypi.py
Expand Up @@ -2,26 +2,16 @@
from unittest.mock import patch

from requests import HTTPError
from backend.utils.pypi import format_plugin

from utils.pypi import query_pypi, get_plugin_pypi_metadata
from utils.pypi import get_plugin_pypi_metadata
from utils.test_utils import (
FakeResponse, plugin, plugin_list,
split_comma_correct_result, split_comma_plugin,
FakeResponse, plugin,split_comma_correct_result, split_comma_plugin,
split_and_correct_result, split_and_plugin, split_ampersand_correct_result,
split_ampersand_plugin, empty_split_plugin, empty_split_correct_result
)

class TestPypi(unittest.TestCase):

@patch(
'requests.get', return_value=FakeResponse(data=plugin_list)
)
def test_query_pypi(self, mock_get):
result = query_pypi()
assert len(result) == 2
assert result['package1'] == "0.2.7"
assert result['package2'] == "0.1.0"
class TestPypi(unittest.TestCase):

@patch(
'requests.get', return_value=FakeResponse(data=plugin)
Expand Down
12 changes: 0 additions & 12 deletions backend/utils/datadog.py

This file was deleted.

0 comments on commit 20d80c0

Please sign in to comment.