Skip to content

Commit

Permalink
merge git-import to dev
Browse files Browse the repository at this point in the history
  • Loading branch information
giannisdoukas committed Jun 6, 2020
2 parents 86c1d09 + 7680063 commit 95e2d06
Show file tree
Hide file tree
Showing 14 changed files with 499 additions and 34 deletions.
7 changes: 5 additions & 2 deletions cwlkernel/CWLKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
from ruamel import yaml
from ruamel.yaml import YAML

from cwlkernel.CWLBuilder import CWLSnippetBuilder
from cwlkernel.CWLLogger import CWLLogger
from .CWLBuilder import CWLSnippetBuilder
from .CWLExecuteConfigurator import CWLExecuteConfigurator
from .CWLLogger import CWLLogger
from .CoreExecutor import CoreExecutor
from .IOManager import IOFileManager
from .cwlrepository.CWLComponent import WorkflowComponentFactory, CWLWorkflow
from .cwlrepository.cwlrepository import WorkflowRepository
from .git.CWLGitResolver import CWLGitResolver

logger = logging.Logger('CWLKernel')

Expand Down Expand Up @@ -52,6 +53,8 @@ def __init__(self, **kwargs):
Path(os.sep.join([conf.CWLKERNEL_BOOT_DIRECTORY, self.ident, 'repo'])))
self._snippet_builder = CWLSnippetBuilder()
self._workflow_composer: Optional[CWLWorkflow] = None
self._github_resolver: CWLGitResolver = CWLGitResolver(
Path(os.sep.join([conf.CWLKERNEL_BOOT_DIRECTORY, self.ident, 'git'])))

@staticmethod
def register_magic(magic: Callable):
Expand Down
28 changes: 16 additions & 12 deletions cwlkernel/cwlrepository/CWLComponent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import uuid
from abc import ABC, abstractmethod
from copy import deepcopy
from io import StringIO
Expand All @@ -7,21 +8,22 @@


class WorkflowComponent(ABC):
_id: str

def __init__(self, id: str, component: Optional[Dict]):
self._id = id
self._id: str = id
if component is not None:
if isinstance(component['inputs'], Dict):
component['inputs'] = self._convert_inputs_from_dict_to_list(component['inputs'])
if isinstance(component['outputs'], Dict):
component['outputs'] = self._convert_inputs_from_dict_to_list(component['outputs'])

@property
def id(self):
def id(self) -> str:
return self._id

@abstractmethod
def to_yaml(self, nested=False) -> str:
def to_yaml(self) -> str:
pass

@abstractmethod
Expand Down Expand Up @@ -61,7 +63,7 @@ def command_line_tool(self) -> Dict:
def command_line_tool(self, command_line_tool: Dict):
self._command_line_tool = command_line_tool

def to_yaml(self, nested=False) -> str:
def to_yaml(self) -> str:
yaml_text = StringIO()
yaml.dump(self.command_line_tool, yaml_text)
return yaml_text.getvalue()
Expand All @@ -85,8 +87,8 @@ def _packed_steps(self) -> Dict:

class CWLWorkflow(WorkflowComponent):

def __init__(self, id: str, workflow: Optional[Dict] = None) -> None:
super().__init__(id, workflow)
def __init__(self, workflow_id: str, workflow: Optional[Dict] = None) -> None:
super().__init__(workflow_id, workflow)
if workflow is None:
self._inputs: List[Dict] = []
self._outputs: List[Dict] = []
Expand All @@ -96,14 +98,17 @@ def __init__(self, id: str, workflow: Optional[Dict] = None) -> None:
self._inputs: List[Dict] = deepcopy(workflow['inputs'])
self._outputs: List[Dict] = deepcopy(workflow['outputs'])
self._steps: Dict = deepcopy(workflow['steps'])
self._requirements: Dict = deepcopy(workflow['requirements'])
self._requirements = {}
if 'requirements' in workflow:
self._requirements: Dict = deepcopy(workflow['requirements'])

@property
def steps(self):
steps = {}
for step in self._steps:
steps[step] = deepcopy(self._steps[step])
steps[step]['run'] = f"{steps[step]['run']._id}.cwl"
if not isinstance(steps[step]['run'], str):
steps[step]['run'] = f"{steps[step]['run']._id}.cwl"
return deepcopy(steps)

"""
Expand All @@ -125,8 +130,7 @@ def add_input(self, workflow_input: Dict, step_id: str, in_step_id: str):
self._inputs.append(workflow_input)
self._steps[step_id]['in'][in_step_id] = workflow_input['id']

def to_yaml(self, nested=False) -> str:
# TODO: remove nested argument
def to_yaml(self) -> str:
yaml_text = StringIO()
result = self.to_dict()
yaml.dump(result, yaml_text)
Expand Down Expand Up @@ -162,11 +166,11 @@ def outputs(self) -> List[Dict]:
return deepcopy(self._outputs)


class WorkflowComponentFactory():
class WorkflowComponentFactory:
def get_workflow_component(self, yaml_string: str) -> WorkflowComponent:
component = yaml.load(StringIO(yaml_string), yaml.SafeLoader)
if 'id' not in component:
raise ValueError("cwl must contains an id")
component['id'] = str(uuid.uuid4())
if component['class'] == 'CommandLineTool':
return CWLTool(component['id'], component)
elif component['class'] == 'Workflow':
Expand Down
55 changes: 55 additions & 0 deletions cwlkernel/git/CWLGitResolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import base64
import os.path
from io import BytesIO
from pathlib import Path
from typing import List, Tuple, Dict
from urllib.parse import urlparse

import requests
import ruamel.yaml as yaml
from requests.compat import urljoin


class CWLGitResolver:
"""CWLGitResolver fetches the required cwl files from a remote git url"""

def __init__(self, local_directory: Path):
self._local_root_directory = local_directory
self._local_root_directory.mkdir(exist_ok=True)

def resolve(self, github_url: str) -> List[str]:
github_path = urlparse(github_url).path.split('/')
git_owner = github_path[1]
git_repo = github_path[2]
git_branch = github_path[4]
git_path = '/'.join(github_path[5:])
workflow_files = set()
root_path = git_path[:git_path.rfind('/')]
search_stack = {git_path}
while len(search_stack) > 0:
current_path = search_stack.pop()
if current_path not in workflow_files:
workflow_filename, workflow = self._resolve_file(current_path, git_owner, git_repo, git_branch)
workflow_files.add(workflow_filename)
if 'steps' in workflow:
for step in workflow['steps']:
if isinstance(workflow['steps'][step]['run'], str):
file = '/'.join([root_path, workflow['steps'][step]['run']])
if file not in workflow_files and file not in search_stack:
search_stack.add(file)
return list(workflow_files)

def _resolve_file(self, path: str, git_owner: str, git_repo: str, git_branch: str) -> Tuple[str, Dict]:
url = urljoin(f"https://api.github.com/repos/{git_owner}/{git_repo}/contents/",
f"{path}?ref={git_branch}")
github_response = requests.get(url)
if github_response.status_code != 200:
raise RuntimeError(
f"Error on github api call for: {url}: {github_response.status_code}: {github_response.text}")
github_response = github_response.json()
workflow = yaml.load(BytesIO(base64.b64decode(github_response['content'])), yaml.Loader)
workflow_filename = os.path.join(str(self._local_root_directory), git_owner, git_repo, path)
Path(os.path.dirname(workflow_filename)).mkdir(exist_ok=True, parents=True)
with open(workflow_filename, 'w') as f:
yaml.dump(workflow, f)
return workflow_filename, workflow
Empty file added cwlkernel/git/__init__.py
Empty file.
23 changes: 22 additions & 1 deletion cwlkernel/kernel_magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from ruamel.yaml import YAML

from cwlkernel.cwlrepository.CWLComponent import CWLWorkflow, WorkflowComponent
from .CWLKernel import CWLKernel
from .cwlrepository.CWLComponent import CWLWorkflow, WorkflowComponent, WorkflowComponentFactory


@CWLKernel.register_magic
Expand Down Expand Up @@ -215,3 +215,24 @@ def data(kernel: CWLKernel, *args):
}
}
)


@CWLKernel.register_magic
def githubImport(kernel: CWLKernel, url: str):
cwl_factory = WorkflowComponentFactory()
for cwl_file in kernel._github_resolver.resolve(url):
with open(cwl_file) as f:
file_data = f.read()
cwl_component = cwl_factory.get_workflow_component(file_data)
kernel._workflow_repository.register_tool(cwl_component)
kernel.send_response(kernel.iopub_socket, 'stream',
{'name': 'stdout', 'text': f"tool '{cwl_component.id}' registered\n"})


@CWLKernel.register_magic
def viewTool(kernel: CWLKernel, workflow_id: str):
workflow = kernel._workflow_repository.__repo__.get_by_id(workflow_id)
if workflow is not None:
kernel._send_json_response(workflow.to_dict())
else:
kernel._send_error_response(f"Tool '{workflow_id}' is not registered")
103 changes: 103 additions & 0 deletions examples/githubImport.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Github Import"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tool 'threesteps' registered\n",
"tool 'head' registered\n",
"tool 'grep' registered\n"
]
}
],
"source": [
"% githubImport https://github.com/giannisdoukas/CWLJNIKernel/blob/dev/tests/cwl/3stepWorkflow.cwl"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Add data in memory"
]
},
{
"data": {
"application/json": {
"outputfile": {
"basename": "grepoutput.out",
"checksum": "sha1$b0358a00b0d42af80bef65e80097e39ea0aa2790",
"class": "File",
"http://commonwl.org/cwltool#generation": 0,
"id": "outputfile",
"location": "file:///private/tmp/CWLKERNEL_DATA/21a791a2-b2a2-4d75-abfb-20cd5ea2b35b/runtime_data/grepoutput.out",
"nameext": ".out",
"nameroot": "grepoutput",
"size": 70
},
"outputfile2": {
"basename": "grepoutput.out",
"checksum": "sha1$66285c44ede6dbac842fda1bf455b339648249f3",
"class": "File",
"http://commonwl.org/cwltool#generation": 0,
"id": "outputfile2",
"location": "file:///private/tmp/CWLKERNEL_DATA/21a791a2-b2a2-4d75-abfb-20cd5ea2b35b/runtime_data/grepoutput.out_2",
"nameext": ".out",
"nameroot": "grepoutput",
"size": 96
}
},
"text/plain": [
"{\"outputfile\": {\"location\": \"file:///private/tmp/CWLKERNEL_DATA/21a791a2-b2a2-4d75-abfb-20cd5ea2b35b/runtime_data/grepoutput.out\", \"basename\": \"grepoutput.out\", \"nameroot\": \"grepoutput\", \"nameext\": \".out\", \"class\": \"File\", \"checksum\": \"sha1$b0358a00b0d42af80bef65e80097e39ea0aa2790\", \"size\": 70, \"http://commonwl.org/cwltool#generation\": 0, \"id\": \"outputfile\"}, \"outputfile2\": {\"location\": \"file:///private/tmp/CWLKERNEL_DATA/21a791a2-b2a2-4d75-abfb-20cd5ea2b35b/runtime_data/grepoutput.out_2\", \"basename\": \"grepoutput.out\", \"nameroot\": \"grepoutput\", \"nameext\": \".out\", \"class\": \"File\", \"checksum\": \"sha1$66285c44ede6dbac842fda1bf455b339648249f3\", \"size\": 96, \"http://commonwl.org/cwltool#generation\": 0, \"id\": \"outputfile2\"}}"
]
},
"metadata": {
"application/json": {
"expanded": false,
"root": "root"
}
},
"output_type": "display_data"
}
],
"source": [
"% execute threesteps\n",
"inputfile:\n",
" class: File\n",
" location: /Users/dks/Workspaces/CWLKernel/tests/cwl/3stepWorkflow.cwl\n",
"query: id"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Common Workflow Languages",
"language": "cwl",
"name": "cwlkernel"
},
"language_info": {
"file_extension": ".cwl",
"mimetype": "text/x-cwl",
"name": "yaml"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ traitlets==4.3.3
uritools==3.0.0
PyYAML==5.3.1
pandas==1.0.4
notebook==6.0.3
notebook==6.0.3
requests==2.23.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_version(rel_path):
setup(
name=name,
version=get_version(f"{name}/__init__.py"),
packages=['cwlkernel', 'cwlkernel.cwlrepository'],
packages=['cwlkernel', 'cwlkernel.cwlrepository', 'cwlkernel.git'],
url='https://github.com/giannisdoukas/CWLJNIKernel',
author='Yannis Doukas',
author_email='giannisdoukas2311@gmail.com',
Expand Down
3 changes: 2 additions & 1 deletion test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ coverage==5.1
coveralls==1.11.1
pycodestyle==2.5.0
jupyterlab==2.1.3
matplotlib==3.2.1
matplotlib==3.2.1
mockito==1.2.1
5 changes: 5 additions & 0 deletions tests/cwl/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/grepoutput.out
/grepoutput.out_2
/*.txt
/*.tar
/*.out
Loading

0 comments on commit 95e2d06

Please sign in to comment.