diff --git a/README.md b/README.md
index 6ac76c76..de04a821 100644
--- a/README.md
+++ b/README.md
@@ -3,12 +3,12 @@
![Python](https://img.shields.io/badge/python-3.9-blue.svg)
![Python](https://img.shields.io/badge/python-3.10-blue.svg)
[![PyPI version](https://badge.fury.io/py/biocypher.svg)](https://badge.fury.io/py/biocypher)
-[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
-![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yml/badge.svg)
-[![Downloads](https://static.pepy.tech/badge/biocypher)](https://pepy.tech/project/biocypher)
-[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
+[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
+![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yml/badge.svg)
+[![Downloads](https://static.pepy.tech/badge/biocypher)](https://pepy.tech/project/biocypher)
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com)
-[![Powered by the Bioregistry](https://img.shields.io/static/v1?label=Powered%20by&message=Bioregistry&color=BA274A&style=flat&logo=image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAYAAACM/rhtAAAACXBIWXMAAAEnAAABJwGNvPDMAAAAGXRFWHRTb2Z0d2FyZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAACi9JREFUWIWtmXl41MUZxz/z291sstmQO9mQG0ISwHBtOOSwgpUQhApWgUfEowKigKI81actypaqFbWPVkGFFKU0Vgs+YgvhEAoqEUESrnDlEEhCbkLYJtlkk9399Y/N/rKbzQXt96+Zed+Z9/t7Z+adeecnuA1s5yFVSGrLOAf2qTiEEYlUZKIAfYdKE7KoBLkQSc4XgkPfXxz/owmT41ZtiVtR3j94eqxQq5aDeASIvkVb12RBtt0mb5xZsvfa/5XgnqTMcI3Eq7IQjwM+7jJJo8YvNhK/qDBUOl8A7JZWWqqu01Jeg6Pd1nW4NuBjjax6eWrRruv/M8EDqTMflmXeB0Jcbb6RIRhmTCJ0ymgC0wYjadTd9nW0tWMu+In63NNU7c3FWtvgJpXrZVlakVGU8/ltEcwzGjU3miI/ABa72vwTB5K45AEi7x2PUEl9fZsHZLuDmgPHuLJpJ82lle6iTSH6mpXp+fnt/Sa4yzhbp22yfwFkgnMaBy17kPhFmQh1997qLxztNkq35XB505fINtf0iz1WvfTQ7Pxdlj4Jdnjuny5yvpEhjHh7FQOGD/YyZi4owS86HJ+QQMDpJaBf3jUXlHD21+8q0y4LDppV/vfNO7+jzV3Pa6SOac0E8I8fSPonpm7JAVR+eRhzwU/Ofj+e49tpT/HdtGXcyLvQJ8HAtCTGfmJCF2dwfpTMz4NszX/uqqdyr+xPyVwoEK+C03PGrDX4GkJ7NBJ+txH/hCgAit7cRlNxOY62dmzmZgwzJvZJUh2gI/xnRmoOHsfe3AqQ/kho0qXs+pLzLh3FgwdT54YKxLsAQq0mbf1zHuTsltZejemHJSrlgGGDPGTXc09zdM5qTi59jZbKOg+Zb1QYI95+XokEQogPDifPDnPJFQ8uCkl8FyGmACQtn4dhxp3KINX7jnHi0ZeJnT8dla8Plbu+48zzfyJ08kh8ggIACB4zlIAhsURm3EnML6eB6Fzep1a+SUt5DS2VddTs+4GQccPRhgV1kowIQRaChhMXAPxkIev/Vl+8R/HgnqTMmI4gjH/iQOIXZSqdzQUlXDB9RPyi+1DrdVx67WMursvCkDERXYxB0ROSIOKecURMG+tBzkXAhbYbZk6teNPLkwmPzUIX71wuMiw+MHx2nEJQrWIFHSdE4pIHlFDisLZxYe1HhIwfTtLK+RSu30rVnlxGvrOapOcW9DsW3vH6CgKS4zxIXlz3Fw8dSaMmcfEcV9XHYbc/DSCZMEkgFoJzY0TeO17pVL7jANbaBoauWUJlTi4VOw+T9sazBKYl0ZB/qV/kALThQRi3vOJB0lpzw0vPMONOtOHOqRcyi7bzkEqanJo3HogBMGROUrziaGundGsOsQsyUPn6UPx2NvELZxIybhinn3uLyx9uVwaW7XbqjxdQmr2X0uy93Dh+Dtlu9zCu9vdj1PsvEWwcii7OwJAXFnoRFCoVhoxJrmr0gOQWo9qBfaorXodOHq0o1x8roN3cSMyC6ZT942uQBIlL53Jl804sV6oY9/fXAGg4WcjFdZuxlFV7GNPFRzFs7VKCRiV7ejJrTa/eDr1rFKXZOQCocEyTgHQAyUdD4B2d4cF8pohg4zC0YUFU7z5C9Jy7sVvbKPtsH6GT0tCGBtFwspBTz/zRixyApbSKk8te5+aZ4l4JdUVQWpIScmQhjGocUjJCRhcTieSjURQTF89FtttpuVaLpaya8Knp1B3OQ5Zlag/nU//9cmScS6EnONrauWjazIQv3kCoVD3quUPS+uAXHU7z1SpATpEQchSA78AwD0WVnxa1XkdjURlCJRGQHMfN/EuEjk9jyr4NRN47Hltjc58Gm0sraTjZ/w3l5BLuKkZJdFzT1f5+3Sq3NZjRDNAjaX1orb2BX2wEmkA9fvGGbvW7Q+OlUu+2wlIqdx+h3dzkJVPrda5iQJ93p+DRqcQ/PhsAw8xJ6AfHdkhuIVvoEribLl/jxKOv4Gi34T8omgnb1yOk7sdTA01AiK3J6yoGgP+gaPwHOdOP6LlTlXb3mNYXAlI8da9/e0pJBZovV2BrakYzQK/I3bg0SsiiCqClqs/0wAPB6UOVo6k3+CdEETwm1aPtP+dLlLJPSKAHOYDWCoVLlYTkKAKcCU4vO7IrhErFsLVLPXZ+V0haDcN+v8xjB9strdQfPavUA0ckefRxWNuwVNS6rBRKQB44r+Lmc5f7TRAgaFQyYzb9Dv/4gd18ASQ8/gsC0zwJNJVcw97aeWmOcDtaAW6eLXZLBchTC8EhWXbW6o+cInhMipetuu9OUvTWNnwNodzx+krlvAQIGjmECV+spyH/Ak3F5QDok+OoPXicip2HiJiWTuH6rQx6eh7BxlT0STH4xUbSUl6Df/xAIqaO9bBVn3taKUuy/ZAwYZImpvx4FYjVRgQzOec9r1vK0TmrldMiIDkO45ZXegxLLrRW13P0/heQHQ4CUhIYvfElNIHOtWaztNJ4qZQBqfFKLg3OMz135rNY624ClB0tHJcomTA5ZMGnANbaBmoOHPMy5hvZebNuLCoj71frXIN0i9pDJzj24IsIlUTCo7NI3/KyQg5ArfMleEyKBzmA6r1HO8eV+dSEySEB2G3yRpwZP1c2f+n1GjB07RIlcwNoKi7j3G839EhQF2cg6fmHmbznPRKevJ/GorIedV1wtLVzJesrV9WqQtoIHRfWjreSjwGar1ZRui3Ho7PfwHBGb3jRg6S1roGeoIuNJGBIPKV/zSF31irOrn4HXAu9B1zduhtLecelQxZZ9xTtrgC342Df8IwQyaYqBMKEWo0xaw1BI4d4DNJSWcfF32fRWnuD5NWPEDZ5lIe8NDuHq1v+ha2xGdkho4szYJg1hbj501EH6OgJ5oIS8hf/oWPm5HqNrE51vdt4nC/7k+9bIIT8GYA2Ipixn5jwjQrrZsju0XT5GubTRfiEBqFPisUvOrzPPi0VdeQ9YcJ63bWmxbzphTk7XHKvA/DrlJkfAU+Bcy2N+fA3vZK0WVoxny4idOKIfn+IO7lTz7zRObWCjdMv7VnhruOV9dws9F8u4CsAS1k1J54wYS4o6arWaaS8hvLP998yuZtnisl7wuROLkdjsKzqqtfL45FjB8gzwZnIJy6dS8Jjs3p8ausvHG3tXN26mytZO5W8Rcjsbg1Qze/X45ELHY9I7wHLXG26+CgSl8zFkDGh3zdkF2S7nep9PzhzmnK3FEGwUWOwrJr6zTdeL529EnRhf3LmfCHEBkBZiNrwIAwZkwi9a5Qzh9D6dNvXYW3jZkEJ9UdOOYPwdY/gXgdiufuGuC2C4Hy3kWXrOhmeBLQeA6jV6GLC8Y0KR613Hn+2phZaK69jqah1P/hdsCKLLIfGtnbG+f3eyfHtEHTh38mzom2SY4WQWQjE9tnBE+XIZKuQNrqCcH9wSwRdMGGSJiTnpatwTJOFMIKcgvPVX/kNIcM1gSgC8iTZfii3aEL+7fyG+C+6O8izl1GE5gAAAABJRU5ErkJggg==)](https://github.com/biopragmatics/bioregistry)
+[![Powered by the Bioregistry](https://img.shields.io/static/v1?label=Powered%20by&message=Bioregistry&color=BA274A&style=flat&logo=image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAYAAACM/rhtAAAACXBIWXMAAAEnAAABJwGNvPDMAAAAGXRFWHRTb2Z0d2FyZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAACi9JREFUWIWtmXl41MUZxz/z291sstmQO9mQG0ISwHBtOOSwgpUQhApWgUfEowKigKI81actypaqFbWPVkGFFKU0Vgs+YgvhEAoqEUESrnDlEEhCbkLYJtlkk9399Y/N/rKbzQXt96+Zed+Z9/t7Z+adeecnuA1s5yFVSGrLOAf2qTiEEYlUZKIAfYdKE7KoBLkQSc4XgkPfXxz/owmT41ZtiVtR3j94eqxQq5aDeASIvkVb12RBtt0mb5xZsvfa/5XgnqTMcI3Eq7IQjwM+7jJJo8YvNhK/qDBUOl8A7JZWWqqu01Jeg6Pd1nW4NuBjjax6eWrRruv/M8EDqTMflmXeB0Jcbb6RIRhmTCJ0ymgC0wYjadTd9nW0tWMu+In63NNU7c3FWtvgJpXrZVlakVGU8/ltEcwzGjU3miI/ABa72vwTB5K45AEi7x2PUEl9fZsHZLuDmgPHuLJpJ82lle6iTSH6mpXp+fnt/Sa4yzhbp22yfwFkgnMaBy17kPhFmQh1997qLxztNkq35XB505fINtf0iz1WvfTQ7Pxdlj4Jdnjuny5yvpEhjHh7FQOGD/YyZi4owS86HJ+QQMDpJaBf3jUXlHD21+8q0y4LDppV/vfNO7+jzV3Pa6SOac0E8I8fSPonpm7JAVR+eRhzwU/Ofj+e49tpT/HdtGXcyLvQJ8HAtCTGfmJCF2dwfpTMz4NszX/uqqdyr+xPyVwoEK+C03PGrDX4GkJ7NBJ+txH/hCgAit7cRlNxOY62dmzmZgwzJvZJUh2gI/xnRmoOHsfe3AqQ/kho0qXs+pLzLh3FgwdT54YKxLsAQq0mbf1zHuTsltZejemHJSrlgGGDPGTXc09zdM5qTi59jZbKOg+Zb1QYI95+XokEQogPDifPDnPJFQ8uCkl8FyGmACQtn4dhxp3KINX7jnHi0ZeJnT8dla8Plbu+48zzfyJ08kh8ggIACB4zlIAhsURm3EnML6eB6Fzep1a+SUt5DS2VddTs+4GQccPRhgV1kowIQRaChhMXAPxkIev/Vl+8R/HgnqTMmI4gjH/iQOIXZSqdzQUlXDB9RPyi+1DrdVx67WMursvCkDERXYxB0ROSIOKecURMG+tBzkXAhbYbZk6teNPLkwmPzUIX71wuMiw+MHx2nEJQrWIFHSdE4pIHlFDisLZxYe1HhIwfTtLK+RSu30rVnlxGvrOapOcW9DsW3vH6CgKS4zxIXlz3Fw8dSaMmcfEcV9XHYbc/DSCZMEkgFoJzY0TeO17pVL7jANbaBoauWUJlTi4VOw+T9sazBKYl0ZB/qV/kALThQRi3vOJB0lpzw0vPMONOtOHOqRcyi7bzkEqanJo3HogBMGROUrziaGundGsOsQsyUPn6UPx2NvELZxIybhinn3uLyx9uVwaW7XbqjxdQmr2X0uy93Dh+Dtlu9zCu9vdj1PsvEWwcii7OwJAXFnoRFCoVhoxJrmr0gOQWo9qBfaorXodOHq0o1x8roN3cSMyC6ZT942uQBIlL53Jl804sV6oY9/fXAGg4WcjFdZuxlFV7GNPFRzFs7VKCRiV7ejJrTa/eDr1rFKXZOQCocEyTgHQAyUdD4B2d4cF8pohg4zC0YUFU7z5C9Jy7sVvbKPtsH6GT0tCGBtFwspBTz/zRixyApbSKk8te5+aZ4l4JdUVQWpIScmQhjGocUjJCRhcTieSjURQTF89FtttpuVaLpaya8Knp1B3OQ5Zlag/nU//9cmScS6EnONrauWjazIQv3kCoVD3quUPS+uAXHU7z1SpATpEQchSA78AwD0WVnxa1XkdjURlCJRGQHMfN/EuEjk9jyr4NRN47Hltjc58Gm0sraTjZ/w3l5BLuKkZJdFzT1f5+3Sq3NZjRDNAjaX1orb2BX2wEmkA9fvGGbvW7Q+OlUu+2wlIqdx+h3dzkJVPrda5iQJ93p+DRqcQ/PhsAw8xJ6AfHdkhuIVvoEribLl/jxKOv4Gi34T8omgnb1yOk7sdTA01AiK3J6yoGgP+gaPwHOdOP6LlTlXb3mNYXAlI8da9/e0pJBZovV2BrakYzQK/I3bg0SsiiCqClqs/0wAPB6UOVo6k3+CdEETwm1aPtP+dLlLJPSKAHOYDWCoVLlYTkKAKcCU4vO7IrhErFsLVLPXZ+V0haDcN+v8xjB9strdQfPavUA0ckefRxWNuwVNS6rBRKQB44r+Lmc5f7TRAgaFQyYzb9Dv/4gd18ASQ8/gsC0zwJNJVcw97aeWmOcDtaAW6eLXZLBchTC8EhWXbW6o+cInhMipetuu9OUvTWNnwNodzx+krlvAQIGjmECV+spyH/Ak3F5QDok+OoPXicip2HiJiWTuH6rQx6eh7BxlT0STH4xUbSUl6Df/xAIqaO9bBVn3taKUuy/ZAwYZImpvx4FYjVRgQzOec9r1vK0TmrldMiIDkO45ZXegxLLrRW13P0/heQHQ4CUhIYvfElNIHOtWaztNJ4qZQBqfFKLg3OMz135rNY624ClB0tHJcomTA5ZMGnANbaBmoOHPMy5hvZebNuLCoj71frXIN0i9pDJzj24IsIlUTCo7NI3/KyQg5ArfMleEyKBzmA6r1HO8eV+dSEySEB2G3yRpwZP1c2f+n1GjB07RIlcwNoKi7j3G839EhQF2cg6fmHmbznPRKevJ/GorIedV1wtLVzJesrV9WqQtoIHRfWjreSjwGar1ZRui3Ho7PfwHBGb3jRg6S1roGeoIuNJGBIPKV/zSF31irOrn4HXAu9B1zduhtLecelQxZZ9xTtrgC342Df8IwQyaYqBMKEWo0xaw1BI4d4DNJSWcfF32fRWnuD5NWPEDZ5lIe8NDuHq1v+ha2xGdkho4szYJg1hbj501EH6OgJ5oIS8hf/oWPm5HqNrE51vdt4nC/7k+9bIIT8GYA2Ipixn5jwjQrrZsju0XT5GubTRfiEBqFPisUvOrzPPi0VdeQ9YcJ63bWmxbzphTk7XHKvA/DrlJkfAU+Bcy2N+fA3vZK0WVoxny4idOKIfn+IO7lTz7zRObWCjdMv7VnhruOV9dws9F8u4CsAS1k1J54wYS4o6arWaaS8hvLP998yuZtnisl7wuROLkdjsKzqqtfL45FjB8gzwZnIJy6dS8Jjs3p8ausvHG3tXN26mytZO5W8Rcjsbg1Qze/X45ELHY9I7wHLXG26+CgSl8zFkDGh3zdkF2S7nep9PzhzmnK3FEGwUWOwrJr6zTdeL529EnRhf3LmfCHEBkBZiNrwIAwZkwi9a5Qzh9D6dNvXYW3jZkEJ9UdOOYPwdY/gXgdiufuGuC2C4Hy3kWXrOhmeBLQeA6jV6GLC8Y0KR613Hn+2phZaK69jqah1P/hdsCKLLIfGtnbG+f3eyfHtEHTh38mzom2SY4WQWQjE9tnBE+XIZKuQNrqCcH9wSwRdMGGSJiTnpatwTJOFMIKcgvPVX/kNIcM1gSgC8iTZfii3aEL+7fyG+C+6O8izl1GE5gAAAABJRU5ErkJggg==)](https://github.com/biopragmatics/bioregistry)
## ❓ Description
Knowledge graphs (KGs) are an [approach to knowledge
@@ -60,8 +60,8 @@ please join our community at https://biocypher.zulipchat.com!
> This disclaimer was adapted from the [Pooch](https://github.com/fatiando/pooch) project.
## ✍️ Citation
-The BioCypher paper has been peer-reviewed in
-[Nature Biotechnology](https://www.nature.com/articles/s41587-023-01848-y).
+The BioCypher paper has been peer-reviewed in
+[Nature Biotechnology](https://www.nature.com/articles/s41587-023-01848-y).
Before, it was available as a preprint at https://arxiv.org/abs/2212.13543.
## Acknowledgements
diff --git a/biocypher/__init__.py b/biocypher/__init__.py
index 52c067da..6222ea76 100644
--- a/biocypher/__init__.py
+++ b/biocypher/__init__.py
@@ -13,14 +13,14 @@
"""
__all__ = [
- '__version__',
- '__author__',
- 'module_data',
- 'config',
- 'logfile',
- 'log',
- 'Driver',
- 'BioCypher',
+ "__version__",
+ "__author__",
+ "module_data",
+ "config",
+ "logfile",
+ "log",
+ "Driver",
+ "BioCypher",
]
from ._core import BioCypher
@@ -30,11 +30,10 @@
class Driver(BioCypher):
-
# initialise parent class but log a warning
def __init__(self, *args, **kwargs):
logger.warning(
- 'The class `Driver` is deprecated and will be removed in a future '
- 'release. Please use `BioCypher` instead.'
+ "The class `Driver` is deprecated and will be removed in a future "
+ "release. Please use `BioCypher` instead."
)
super().__init__(*args, **kwargs)
diff --git a/biocypher/_config/__init__.py b/biocypher/_config/__init__.py
index 584a30a5..3d421c1e 100644
--- a/biocypher/_config/__init__.py
+++ b/biocypher/_config/__init__.py
@@ -23,10 +23,10 @@
import yaml
import appdirs
-__all__ = ['module_data', 'module_data_path', 'read_config', 'config', 'reset']
+__all__ = ["module_data", "module_data_path", "read_config", "config", "reset"]
-_USER_CONFIG_DIR = appdirs.user_config_dir('biocypher', 'saezlab')
-_USER_CONFIG_FILE = os.path.join(_USER_CONFIG_DIR, 'conf.yaml')
+_USER_CONFIG_DIR = appdirs.user_config_dir("biocypher", "saezlab")
+_USER_CONFIG_FILE = os.path.join(_USER_CONFIG_DIR, "conf.yaml")
class MyLoader(yaml.SafeLoader):
@@ -34,18 +34,18 @@ def construct_scalar(self, node):
# Check if the scalar contains double quotes and an escape sequence
value = super().construct_scalar(node)
q = bool(node.style == '"')
- b = bool('\\' in value.encode('unicode_escape').decode('utf-8'))
+ b = bool("\\" in value.encode("unicode_escape").decode("utf-8"))
if q and b:
warnings.warn(
(
- 'Double quotes detected in YAML configuration scalar: '
+ "Double quotes detected in YAML configuration scalar: "
f"{value.encode('unicode_escape')}. "
- 'These allow escape sequences and may cause problems, for '
+ "These allow escape sequences and may cause problems, for "
"instance with the Neo4j admin import files (e.g. '\\t'). "
- 'Make sure you wanted to do this, and use single quotes '
- 'whenever possible.'
+ "Make sure you wanted to do this, and use single quotes "
+ "whenever possible."
),
- category=UserWarning
+ category=UserWarning,
)
return value
@@ -57,7 +57,7 @@ def module_data_path(name: str) -> str:
here = os.path.dirname(os.path.abspath(__file__))
- return os.path.join(here, f'{name}.yaml')
+ return os.path.join(here, f"{name}.yaml")
def module_data(name: str) -> Any:
@@ -71,11 +71,8 @@ def module_data(name: str) -> Any:
def _read_yaml(path: str) -> Optional[dict]:
-
if os.path.exists(path):
-
- with open(path, 'r') as fp:
-
+ with open(path, "r") as fp:
return yaml.load(fp.read(), Loader=MyLoader)
@@ -89,18 +86,22 @@ def read_config() -> dict:
TODO explain path configuration
"""
- defaults = module_data('biocypher_config')
+ defaults = module_data("biocypher_config")
user = _read_yaml(_USER_CONFIG_FILE) or {}
# TODO account for .yml?
- local = _read_yaml('biocypher_config.yaml'
- ) or _read_yaml('config/biocypher_config.yaml') or {}
+ local = (
+ _read_yaml("biocypher_config.yaml")
+ or _read_yaml("config/biocypher_config.yaml")
+ or {}
+ )
for key in defaults:
-
- value = local[key] if key in local else user[key] if key in user else None
+ value = (
+ local[key] if key in local else user[key] if key in user else None
+ )
if value is not None:
- if type(defaults[key]) == str: # first level config (like title)
+ if type(defaults[key]) == str: # first level config (like title)
defaults[key] = value
else:
defaults[key].update(value)
@@ -114,20 +115,17 @@ def config(*args, **kwargs) -> Optional[Any]:
"""
if args and kwargs:
-
raise ValueError(
- 'Setting and getting values in the same call is not allowed.',
+ "Setting and getting values in the same call is not allowed.",
)
if args:
-
- result = tuple(globals()['_config'].get(key, None) for key in args)
+ result = tuple(globals()["_config"].get(key, None) for key in args)
return result[0] if len(result) == 1 else result
for key, value in kwargs.items():
-
- globals()['_config'][key].update(value)
+ globals()["_config"][key].update(value)
def reset():
@@ -135,7 +133,7 @@ def reset():
Reload configuration from the config files.
"""
- globals()['_config'] = read_config()
+ globals()["_config"] = read_config()
reset()
diff --git a/biocypher/_config/biocypher_config.yaml b/biocypher/_config/biocypher_config.yaml
index 8fae981f..a31167be 100644
--- a/biocypher/_config/biocypher_config.yaml
+++ b/biocypher/_config/biocypher_config.yaml
@@ -109,5 +109,3 @@ postgresql:
delimiter: '\t'
# import_call_bin_prefix: '' # path to "psql"
# import_call_file_prefix: '/path/to/files'
-
-
\ No newline at end of file
diff --git a/biocypher/_connect.py b/biocypher/_connect.py
index 88f3b3aa..3e2a2a93 100644
--- a/biocypher/_connect.py
+++ b/biocypher/_connect.py
@@ -13,7 +13,7 @@
"""
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from typing import Optional
from collections.abc import Iterable
@@ -27,10 +27,10 @@
from ._ontology import Ontology
from ._translate import Translator
-__all__ = ['_Neo4jDriver']
+__all__ = ["_Neo4jDriver"]
-class _Neo4jDriver():
+class _Neo4jDriver:
"""
Manages a BioCypher connection to a Neo4j database using the
``neo4j_utils.Driver`` class.
@@ -58,6 +58,7 @@ class _Neo4jDriver():
translator (Translator): The translator to use for mapping.
"""
+
def __init__(
self,
database_name: str,
@@ -71,7 +72,6 @@ def __init__(
fetch_size: int = 1000,
increment_version: bool = True,
):
-
self._ontology = ontology
self._translator = translator
@@ -89,23 +89,18 @@ def __init__(
# check for biocypher config in connected graph
if wipe:
-
self.init_db()
if increment_version:
-
# set new current version node
self._update_meta_graph()
def _update_meta_graph(self):
-
- logger.info('Updating Neo4j meta graph.')
+ logger.info("Updating Neo4j meta graph.")
# find current version node
db_version = self._driver.query(
- 'MATCH (v:BioCypher) '
- 'WHERE NOT (v)-[:PRECEDES]->() '
- 'RETURN v',
+ "MATCH (v:BioCypher) " "WHERE NOT (v)-[:PRECEDES]->() " "RETURN v",
)
# add version node
self.add_biocypher_nodes(self._ontology)
@@ -113,11 +108,11 @@ def _update_meta_graph(self):
# connect version node to previous
if db_version[0]:
previous = db_version[0][0]
- previous_id = previous['v']['id']
+ previous_id = previous["v"]["id"]
e_meta = BioCypherEdge(
previous_id,
- self._ontology.get_dict().get('node_id'),
- 'PRECEDES',
+ self._ontology.get_dict().get("node_id"),
+ "PRECEDES",
)
self.add_biocypher_edges(e_meta)
@@ -132,7 +127,7 @@ def init_db(self):
need of the database
"""
- logger.info('Initialising database.')
+ logger.info("Initialising database.")
self._create_constraints()
def _create_constraints(self):
@@ -144,17 +139,16 @@ def _create_constraints(self):
constraints on the id of all entities represented as nodes.
"""
- logger.info('Creating constraints for node types in config.')
+ logger.info("Creating constraints for node types in config.")
# get structure
for leaf in self._ontology.extended_schema.items():
label = _misc.sentencecase_to_pascalcase(leaf[0])
- if leaf[1]['represented_as'] == 'node':
-
+ if leaf[1]["represented_as"] == "node":
s = (
- f'CREATE CONSTRAINT `{label}_id` '
- f'IF NOT EXISTS ON (n:`{label}`) '
- 'ASSERT n.id IS UNIQUE'
+ f"CREATE CONSTRAINT `{label}_id` "
+ f"IF NOT EXISTS ON (n:`{label}`) "
+ "ASSERT n.id IS UNIQUE"
)
self._driver.query(s)
@@ -246,38 +240,36 @@ def add_biocypher_nodes(
"""
try:
-
nodes = _misc.to_list(nodes)
entities = [node.get_dict() for node in nodes]
except AttributeError:
-
- msg = 'Nodes must have a `get_dict` method.'
+ msg = "Nodes must have a `get_dict` method."
logger.error(msg)
raise ValueError(msg)
- logger.info(f'Merging {len(entities)} nodes.')
+ logger.info(f"Merging {len(entities)} nodes.")
entity_query = (
- 'UNWIND $entities AS ent '
- 'CALL apoc.merge.node([ent.node_label], '
- '{id: ent.node_id}, ent.properties, ent.properties) '
- 'YIELD node '
- 'RETURN node'
+ "UNWIND $entities AS ent "
+ "CALL apoc.merge.node([ent.node_label], "
+ "{id: ent.node_id}, ent.properties, ent.properties) "
+ "YIELD node "
+ "RETURN node"
)
- method = 'explain' if explain else 'profile' if profile else 'query'
+ method = "explain" if explain else "profile" if profile else "query"
result = getattr(self._driver, method)(
entity_query,
parameters={
- 'entities': entities,
+ "entities": entities,
},
)
- logger.info('Finished merging nodes.')
+ logger.info("Finished merging nodes.")
return result
@@ -326,28 +318,23 @@ def add_biocypher_edges(
rels = []
try:
-
for e in edges:
-
- if hasattr(e, 'get_node'):
-
+ if hasattr(e, "get_node"):
nodes.append(e.get_node())
rels.append(e.get_source_edge().get_dict())
rels.append(e.get_target_edge().get_dict())
else:
-
rels.append(e.get_dict())
except AttributeError:
-
- msg = 'Edges and nodes must have a `get_dict` method.'
+ msg = "Edges and nodes must have a `get_dict` method."
logger.error(msg)
raise ValueError(msg)
self.add_biocypher_nodes(nodes)
- logger.info(f'Merging {len(rels)} edges.')
+ logger.info(f"Merging {len(rels)} edges.")
# cypher query
@@ -355,41 +342,40 @@ def add_biocypher_edges(
# properties on match and on create;
# TODO add node labels?
node_query = (
- 'UNWIND $rels AS r '
- 'MERGE (src {id: r.source_id}) '
- 'MERGE (tar {id: r.target_id}) '
+ "UNWIND $rels AS r "
+ "MERGE (src {id: r.source_id}) "
+ "MERGE (tar {id: r.target_id}) "
)
- self._driver.query(node_query, parameters={'rels': rels})
+ self._driver.query(node_query, parameters={"rels": rels})
edge_query = (
- 'UNWIND $rels AS r '
- 'MATCH (src {id: r.source_id}) '
- 'MATCH (tar {id: r.target_id}) '
- 'WITH src, tar, r '
- 'CALL apoc.merge.relationship'
- '(src, r.relationship_label, NULL, '
- 'r.properties, tar, r.properties) '
- 'YIELD rel '
- 'RETURN rel'
+ "UNWIND $rels AS r "
+ "MATCH (src {id: r.source_id}) "
+ "MATCH (tar {id: r.target_id}) "
+ "WITH src, tar, r "
+ "CALL apoc.merge.relationship"
+ "(src, r.relationship_label, NULL, "
+ "r.properties, tar, r.properties) "
+ "YIELD rel "
+ "RETURN rel"
)
- method = 'explain' if explain else 'profile' if profile else 'query'
+ method = "explain" if explain else "profile" if profile else "query"
- result = getattr(self._driver,
- method)(edge_query, parameters={
- 'rels': rels
- })
+ result = getattr(self._driver, method)(
+ edge_query, parameters={"rels": rels}
+ )
- logger.info('Finished merging edges.')
+ logger.info("Finished merging edges.")
return result
def get_driver(
dbms: str,
- translator: 'Translator',
- ontology: 'Ontology',
+ translator: "Translator",
+ ontology: "Ontology",
):
"""
Function to return the writer class.
@@ -400,14 +386,14 @@ def get_driver(
dbms_config = _config(dbms)
- if dbms == 'neo4j':
+ if dbms == "neo4j":
return _Neo4jDriver(
- database_name=dbms_config['database_name'],
- wipe=dbms_config['wipe'],
- uri=dbms_config['uri'],
- user=dbms_config['user'],
- password=dbms_config['password'],
- multi_db=dbms_config['multi_db'],
+ database_name=dbms_config["database_name"],
+ wipe=dbms_config["wipe"],
+ uri=dbms_config["uri"],
+ user=dbms_config["user"],
+ password=dbms_config["password"],
+ multi_db=dbms_config["multi_db"],
ontology=ontology,
translator=translator,
)
diff --git a/biocypher/_core.py b/biocypher/_core.py
index a6096eb0..2cf6f796 100644
--- a/biocypher/_core.py
+++ b/biocypher/_core.py
@@ -12,34 +12,36 @@
BioCypher core module. Interfaces with the user and distributes tasks to
submodules.
"""
-from typing import Dict, List, Optional
+from typing import Optional
+
from more_itertools import peekable
+
import pandas as pd
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from ._write import get_writer
-from ._pandas import Pandas
from ._config import config as _config
from ._config import update_from_file as _file_update
from ._create import BioCypherEdge, BioCypherNode
+from ._pandas import Pandas
from ._connect import get_driver
from ._mapping import OntologyMapping
from ._ontology import Ontology
from ._translate import Translator
from ._deduplicate import Deduplicator
-__all__ = ['BioCypher']
+__all__ = ["BioCypher"]
-SUPPORTED_DBMS = ['neo4j', 'postgresql']
+SUPPORTED_DBMS = ["neo4j", "postgresql"]
REQUIRED_CONFIG = [
- 'dbms',
- 'offline',
- 'strict_mode',
- 'head_ontology',
+ "dbms",
+ "offline",
+ "strict_mode",
+ "head_ontology",
]
@@ -75,6 +77,7 @@ class BioCypher:
provided, the default value 'biocypher-out' will be used.
"""
+
def __init__(
self,
dbms: str = None,
@@ -88,65 +91,64 @@ def __init__(
# legacy params
db_name: str = None,
):
-
# Update configuration if custom path is provided
if biocypher_config_path:
_file_update(biocypher_config_path)
if db_name:
logger.warning(
- 'The parameter `db_name` is deprecated. Please set the '
- '`database_name` setting in the `biocypher_config.yaml` file '
- 'instead.'
+ "The parameter `db_name` is deprecated. Please set the "
+ "`database_name` setting in the `biocypher_config.yaml` file "
+ "instead."
)
- _config(**{db_name: {'database_name': db_name}})
+ _config(**{db_name: {"database_name": db_name}})
# Load configuration
- self.base_config = _config('biocypher')
+ self.base_config = _config("biocypher")
# Check for required configuration
for key in REQUIRED_CONFIG:
if key not in self.base_config:
- raise ValueError(f'Configuration key {key} is required.')
+ raise ValueError(f"Configuration key {key} is required.")
# Set configuration - mandatory
- self._dbms = dbms or self.base_config['dbms']
+ self._dbms = dbms or self.base_config["dbms"]
if offline is None:
- self._offline = self.base_config['offline']
+ self._offline = self.base_config["offline"]
else:
self._offline = offline
if strict_mode is None:
- self._strict_mode = self.base_config['strict_mode']
+ self._strict_mode = self.base_config["strict_mode"]
else:
self._strict_mode = strict_mode
self._schema_config_path = schema_config_path or self.base_config.get(
- 'schema_config_path'
+ "schema_config_path"
)
if not self._schema_config_path:
raise ValueError(
- 'BioCypher requires a schema configuration; please provide a '
- 'path to the schema configuration YAML file via '
- '`biocypher_config.yaml` or `BioCypher` class parameter.'
+ "BioCypher requires a schema configuration; please provide a "
+ "path to the schema configuration YAML file via "
+ "`biocypher_config.yaml` or `BioCypher` class parameter."
)
- self._head_ontology = head_ontology or self.base_config['head_ontology']
+ self._head_ontology = head_ontology or self.base_config["head_ontology"]
# Set configuration - optional
self._output_directory = output_directory or self.base_config.get(
- 'output_directory'
+ "output_directory"
)
self._tail_ontologies = tail_ontologies or self.base_config.get(
- 'tail_ontologies'
+ "tail_ontologies"
)
if self._dbms not in SUPPORTED_DBMS:
raise ValueError(
- f'DBMS {self._dbms} not supported. '
- f'Please select from {SUPPORTED_DBMS}.'
+ f"DBMS {self._dbms} not supported. "
+ f"Please select from {SUPPORTED_DBMS}."
)
# Initialize
@@ -156,7 +158,7 @@ def __init__(
self._ontology = None
self._writer = None
self._pd = None
-
+
def _get_deduplicator(self) -> Deduplicator:
"""
Create deduplicator if not exists and return.
@@ -222,7 +224,7 @@ def _get_writer(self):
strict_mode=self._strict_mode,
)
else:
- raise NotImplementedError('Cannot get writer in online mode.')
+ raise NotImplementedError("Cannot get writer in online mode.")
def _get_driver(self):
"""
@@ -237,12 +239,12 @@ def _get_driver(self):
deduplicator=self._get_deduplicator(),
)
else:
- raise NotImplementedError('Cannot get driver in offline mode.')
+ raise NotImplementedError("Cannot get driver in offline mode.")
def write_nodes(self, nodes, batch_size: int = int(1e6)) -> bool:
"""
Write nodes to database. Either takes an iterable of tuples (if given,
- translates to ``BioCypherNode`` objects) or an iterable of
+ translates to ``BioCypherNode`` objects) or an iterable of
``BioCypherNode`` objects.
Args:
@@ -287,7 +289,7 @@ def write_edges(self, edges, batch_size: int = int(1e6)) -> bool:
# write edge files
return self._writer.write_edges(tedges, batch_size=batch_size)
- def to_df(self) -> List[pd.DataFrame]:
+ def to_df(self) -> list[pd.DataFrame]:
"""
Convert entities to a pandas DataFrame for each entity type and return
a list.
@@ -303,9 +305,8 @@ def to_df(self) -> List[pd.DataFrame]:
raise ValueError(
"No pandas instance found. Please call `add()` first."
)
-
+
return self._pd.dfs
-
def add(self, entities):
"""
@@ -323,7 +324,9 @@ def add(self, entities):
entities = peekable(entities)
- if isinstance(entities.peek(), BioCypherNode) or isinstance(entities.peek(), BioCypherEdge):
+ if isinstance(entities.peek(), BioCypherNode) or isinstance(
+ entities.peek(), BioCypherEdge
+ ):
tentities = entities
elif len(entities.peek()) < 4:
tentities = self._translator.translate_nodes(entities)
@@ -367,11 +370,11 @@ def merge_edges(self, edges) -> bool:
Merge edges into database. Either takes an iterable of tuples (if given,
translates to ``BioCypherEdge`` objects) or an iterable of
``BioCypherEdge`` objects.
-
+
Args:
- edges (iterable): An iterable of edges to merge into the database.
+ edges (iterable): An iterable of edges to merge into the database.
- Returns:
+ Returns:
bool: True if successful.
"""
@@ -388,7 +391,7 @@ def merge_edges(self, edges) -> bool:
# OVERVIEW AND CONVENIENCE METHODS ###
- def log_missing_input_labels(self) -> Optional[Dict[str, List[str]]]:
+ def log_missing_input_labels(self) -> Optional[dict[str, list[str]]]:
"""
Get the set of input labels encountered without an entry in the
@@ -405,19 +408,19 @@ def log_missing_input_labels(self) -> Optional[Dict[str, List[str]]]:
if mt:
msg = (
- 'Input entities not accounted for due to them not being '
- 'present in the `schema_config.yaml` configuration file '
- '(this is not necessarily a problem, if you did not intend '
- 'to include them in the database; see the log for details): \n'
+ "Input entities not accounted for due to them not being "
+ "present in the `schema_config.yaml` configuration file "
+ "(this is not necessarily a problem, if you did not intend "
+ "to include them in the database; see the log for details): \n"
)
for k, v in mt.items():
- msg += f' {k}: {v} \n'
+ msg += f" {k}: {v} \n"
logger.info(msg)
return mt
else:
- logger.info('No missing labels in input.')
+ logger.info("No missing labels in input.")
return None
def log_duplicates(self) -> None:
@@ -429,46 +432,44 @@ def log_duplicates(self) -> None:
dn = self._deduplicator.get_duplicate_nodes()
if dn:
-
ntypes = dn[0]
nids = dn[1]
- msg = ('Duplicate node types encountered (IDs in log): \n')
+ msg = "Duplicate node types encountered (IDs in log): \n"
for typ in ntypes:
- msg += f' {typ}\n'
+ msg += f" {typ}\n"
logger.info(msg)
- idmsg = ('Duplicate node IDs encountered: \n')
+ idmsg = "Duplicate node IDs encountered: \n"
for _id in nids:
- idmsg += f' {_id}\n'
+ idmsg += f" {_id}\n"
logger.debug(idmsg)
else:
- logger.info('No duplicate nodes in input.')
+ logger.info("No duplicate nodes in input.")
de = self._deduplicator.get_duplicate_edges()
if de:
-
etypes = de[0]
eids = de[1]
- msg = ('Duplicate edge types encountered (IDs in log): \n')
+ msg = "Duplicate edge types encountered (IDs in log): \n"
for typ in etypes:
- msg += f' {typ}\n'
+ msg += f" {typ}\n"
logger.info(msg)
- idmsg = ('Duplicate edge IDs encountered: \n')
+ idmsg = "Duplicate edge IDs encountered: \n"
for _id in eids:
- idmsg += f' {_id}\n'
+ idmsg += f" {_id}\n"
logger.debug(idmsg)
else:
- logger.info('No duplicate edges in input.')
+ logger.info("No duplicate edges in input.")
def show_ontology_structure(self, **kwargs) -> None:
"""
@@ -498,7 +499,7 @@ def write_import_call(self) -> None:
if not self._offline:
raise NotImplementedError(
- 'Cannot write import call in online mode.'
+ "Cannot write import call in online mode."
)
self._writer.write_import_call()
@@ -520,7 +521,7 @@ def translate_term(self, term: str) -> str:
self.start_ontology()
return self._translator.translate_term(term)
-
+
def summary(self) -> None:
"""
Wrapper for showing ontology structure and logging duplicates and
diff --git a/biocypher/_create.py b/biocypher/_create.py
index ca33e21b..0e6b7c00 100644
--- a/biocypher/_create.py
+++ b/biocypher/_create.py
@@ -13,16 +13,16 @@
"""
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from typing import Union
from dataclasses import field, dataclass
import os
__all__ = [
- 'BioCypherEdge',
- 'BioCypherNode',
- 'BioCypherRelAsNode',
+ "BioCypherEdge",
+ "BioCypherNode",
+ "BioCypherRelAsNode",
]
@@ -53,7 +53,7 @@ class BioCypherNode:
node_id: str
node_label: str
- preferred_id: str = 'id'
+ preferred_id: str = "id"
properties: dict = field(default_factory=dict)
def __post_init__(self):
@@ -64,47 +64,50 @@ def __post_init__(self):
Replace unwanted characters in properties.
"""
- self.properties['id'] = self.node_id
- self.properties['preferred_id'] = self.preferred_id or None
+ self.properties["id"] = self.node_id
+ self.properties["preferred_id"] = self.preferred_id or None
# TODO actually make None possible here; as is, "id" is the default in
# the dataclass as well as in the configuration file
- if ':TYPE' in self.properties.keys():
+ if ":TYPE" in self.properties.keys():
logger.warning(
"Keyword ':TYPE' is reserved for Neo4j. "
- 'Removing from properties.',
+ "Removing from properties.",
# "Renaming to 'type'."
)
# self.properties["type"] = self.properties[":TYPE"]
- del self.properties[':TYPE']
+ del self.properties[":TYPE"]
for k, v in self.properties.items():
if isinstance(v, str):
self.properties[k] = (
v.replace(
os.linesep,
- ' ',
- ).replace(
- '\n',
- ' ',
- ).replace(
- '\r',
- ' ',
+ " ",
+ )
+ .replace(
+ "\n",
+ " ",
+ )
+ .replace(
+ "\r",
+ " ",
)
)
elif isinstance(v, list):
- self.properties[k] = (
- [
- val.replace(
- os.linesep,
- ' ',
- ).replace(
- '\n',
- ' ',
- ).replace('\r', ' ') for val in v
- ]
- )
+ self.properties[k] = [
+ val.replace(
+ os.linesep,
+ " ",
+ )
+ .replace(
+ "\n",
+ " ",
+ )
+ .replace("\r", " ")
+ for val in v
+ ]
def get_id(self) -> str:
"""
@@ -123,7 +126,7 @@ def get_label(self) -> str:
str: node_label
"""
return self.node_label
-
+
def get_type(self) -> str:
"""
Returns primary node label.
@@ -161,9 +164,9 @@ def get_dict(self) -> dict:
properties as second-level dict.
"""
return {
- 'node_id': self.node_id,
- 'node_label': self.node_label,
- 'properties': self.properties,
+ "node_id": self.node_id,
+ "node_label": self.node_label,
+ "properties": self.properties,
}
@@ -204,30 +207,30 @@ def __post_init__(self):
Check for reserved keywords.
"""
- if ':TYPE' in self.properties.keys():
+ if ":TYPE" in self.properties.keys():
logger.debug(
"Keyword ':TYPE' is reserved for Neo4j. "
- 'Removing from properties.',
+ "Removing from properties.",
# "Renaming to 'type'."
)
# self.properties["type"] = self.properties[":TYPE"]
- del self.properties[':TYPE']
- elif 'id' in self.properties.keys():
+ del self.properties[":TYPE"]
+ elif "id" in self.properties.keys():
logger.debug(
"Keyword 'id' is reserved for Neo4j. "
- 'Removing from properties.',
+ "Removing from properties.",
# "Renaming to 'type'."
)
# self.properties["type"] = self.properties[":TYPE"]
- del self.properties['id']
- elif '_ID' in self.properties.keys():
+ del self.properties["id"]
+ elif "_ID" in self.properties.keys():
logger.debug(
"Keyword '_ID' is reserved for Postgres. "
- 'Removing from properties.',
+ "Removing from properties.",
# "Renaming to 'type'."
)
# self.properties["type"] = self.properties[":TYPE"]
- del self.properties['_ID']
+ del self.properties["_ID"]
def get_id(self) -> Union[str, None]:
"""
@@ -295,11 +298,11 @@ def get_dict(self) -> dict:
dict.
"""
return {
- 'relationship_id': self.relationship_id or None,
- 'source_id': self.source_id,
- 'target_id': self.target_id,
- 'relationship_label': self.relationship_label,
- 'properties': self.properties,
+ "relationship_id": self.relationship_id or None,
+ "source_id": self.source_id,
+ "target_id": self.target_id,
+ "relationship_label": self.relationship_label,
+ "properties": self.properties,
}
@@ -331,20 +334,20 @@ class BioCypherRelAsNode:
def __post_init__(self):
if not isinstance(self.node, BioCypherNode):
raise TypeError(
- f'BioCypherRelAsNode.node must be a BioCypherNode, '
- f'not {type(self.node)}.',
+ f"BioCypherRelAsNode.node must be a BioCypherNode, "
+ f"not {type(self.node)}.",
)
if not isinstance(self.source_edge, BioCypherEdge):
raise TypeError(
- f'BioCypherRelAsNode.source_edge must be a BioCypherEdge, '
- f'not {type(self.source_edge)}.',
+ f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, "
+ f"not {type(self.source_edge)}.",
)
if not isinstance(self.target_edge, BioCypherEdge):
raise TypeError(
- f'BioCypherRelAsNode.target_edge must be a BioCypherEdge, '
- f'not {type(self.target_edge)}.',
+ f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, "
+ f"not {type(self.target_edge)}.",
)
def get_node(self) -> BioCypherNode:
diff --git a/biocypher/_deduplicate.py b/biocypher/_deduplicate.py
index e1cd5c69..5ac79abb 100644
--- a/biocypher/_deduplicate.py
+++ b/biocypher/_deduplicate.py
@@ -1,9 +1,10 @@
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from ._create import BioCypherEdge, BioCypherNode
+
class Deduplicator:
"""
Singleton class responsible of deduplicating BioCypher inputs. Maintains
@@ -18,13 +19,13 @@ class Deduplicator:
"""
def __init__(self):
- self.seen_node_ids = set()
- self.duplicate_node_ids = set()
- self.duplicate_node_types = set()
+ self.seen_node_ids = set()
+ self.duplicate_node_ids = set()
+ self.duplicate_node_types = set()
- self.seen_edges = {}
- self.duplicate_edge_ids = set()
- self.duplicate_edge_types = set()
+ self.seen_edges = {}
+ self.duplicate_edge_ids = set()
+ self.duplicate_edge_types = set()
def node_seen(self, node: BioCypherNode) -> bool:
"""
@@ -39,13 +40,15 @@ def node_seen(self, node: BioCypherNode) -> bool:
if node.get_id() in self.seen_node_ids:
self.duplicate_node_ids.add(node.get_id())
if node.get_label() not in self.duplicate_node_types:
- logger.warning(f"Duplicate node type {node.get_label()} found. ")
+ logger.warning(
+ f"Duplicate node type {node.get_label()} found. "
+ )
self.duplicate_node_types.add(node.get_label())
return True
-
+
self.seen_node_ids.add(node.get_id())
return False
-
+
def edge_seen(self, edge: BioCypherEdge) -> bool:
"""
Adds an edge to the instance and checks if it has been seen before.
@@ -71,10 +74,10 @@ def edge_seen(self, edge: BioCypherEdge) -> bool:
logger.warning(f"Duplicate edge type {edge.get_type()} found. ")
self.duplicate_edge_types.add(edge.get_type())
return True
-
+
self.seen_edges[edge.get_type()].add(_id)
return False
-
+
def get_duplicate_nodes(self):
"""
Function to return a list of duplicate nodes.
@@ -99,4 +102,4 @@ def get_duplicate_edges(self):
if self.duplicate_edge_types:
return (self.duplicate_edge_types, self.duplicate_edge_ids)
else:
- return None
\ No newline at end of file
+ return None
diff --git a/biocypher/_logger.py b/biocypher/_logger.py
index bb09a825..c936a44f 100644
--- a/biocypher/_logger.py
+++ b/biocypher/_logger.py
@@ -12,7 +12,7 @@
Configuration of the module logger.
"""
-__all__ = ['get_logger', 'log', 'logfile']
+__all__ = ["get_logger", "log", "logfile"]
from datetime import datetime
import os
@@ -23,7 +23,7 @@
from biocypher._metadata import __version__
-def get_logger(name: str = 'biocypher') -> logging.Logger:
+def get_logger(name: str = "biocypher") -> logging.Logger:
"""
Access the module logger, create a new one if does not exist yet.
@@ -45,7 +45,6 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
"""
if not logging.getLogger(name).hasHandlers():
-
# create logger
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
@@ -53,18 +52,19 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
# formatting
file_formatter = logging.Formatter(
- '%(asctime)s\t%(levelname)s\tmodule:%(module)s\n%(message)s',
+ "%(asctime)s\t%(levelname)s\tmodule:%(module)s\n%(message)s",
)
- stdout_formatter = logging.Formatter('%(levelname)s -- %(message)s')
+ stdout_formatter = logging.Formatter("%(levelname)s -- %(message)s")
# file name and creation
now = datetime.now()
- date_time = now.strftime('%Y%m%d-%H%M%S')
+ date_time = now.strftime("%Y%m%d-%H%M%S")
- logdir = _config.config('biocypher'
- ).get('log_directory') or 'biocypher-log'
+ logdir = (
+ _config.config("biocypher").get("log_directory") or "biocypher-log"
+ )
os.makedirs(logdir, exist_ok=True)
- logfile = os.path.join(logdir, f'biocypher-{date_time}.log')
+ logfile = os.path.join(logdir, f"biocypher-{date_time}.log")
# handlers
# stream handler
@@ -75,7 +75,7 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
# file handler
file_handler = logging.FileHandler(logfile)
- if _config.config('biocypher').get('debug'):
+ if _config.config("biocypher").get("debug"):
file_handler.setLevel(logging.DEBUG)
else:
file_handler.setLevel(logging.INFO)
@@ -87,8 +87,8 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
logger.addHandler(stdout_handler)
# startup message
- logger.info(f'This is BioCypher v{__version__}.')
- logger.info(f'Logging into `{logfile}`.')
+ logger.info(f"This is BioCypher v{__version__}.")
+ logger.info(f"Logging into `{logfile}`.")
return logging.getLogger(name)
@@ -107,7 +107,6 @@ def log():
"""
with open(logfile()) as fp:
-
pydoc.pager(fp.read())
diff --git a/biocypher/_mapping.py b/biocypher/_mapping.py
index 7a242bfe..1269b28a 100644
--- a/biocypher/_mapping.py
+++ b/biocypher/_mapping.py
@@ -14,7 +14,7 @@
"""
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from typing import Optional
from urllib.request import urlopen
@@ -29,8 +29,8 @@ class OntologyMapping:
"""
Class to store the ontology mapping and extensions.
"""
- def __init__(self, config_file: str = None):
+ def __init__(self, config_file: str = None):
self.schema = self._read_config(config_file)
self.extended_schema = self._extend_schema()
@@ -40,21 +40,16 @@ def _read_config(self, config_file: str = None):
Read the configuration file and store the ontology mapping and extensions.
"""
if config_file is None:
-
- schema_config = _config.module_data('schema_config')
+ schema_config = _config.module_data("schema_config")
# load yaml file from web
- elif config_file.startswith('http'):
-
+ elif config_file.startswith("http"):
with urlopen(config_file) as f:
-
schema_config = yaml.safe_load(f)
# get graph state from config (assume file is local)
else:
-
- with open(config_file, 'r') as f:
-
+ with open(config_file, "r") as f:
schema_config = yaml.safe_load(f)
return schema_config
@@ -78,30 +73,28 @@ def _extend_schema(self, d: Optional[dict] = None) -> dict:
# first pass: get parent leaves with direct representation in ontology
for k, v in d.items():
-
# k is not an entity
- if 'represented_as' not in v:
+ if "represented_as" not in v:
continue
# preferred_id optional: if not provided, use `id`
- if not v.get('preferred_id'):
- v['preferred_id'] = 'id'
+ if not v.get("preferred_id"):
+ v["preferred_id"] = "id"
# k is an entity that is present in the ontology
- if 'is_a' not in v:
+ if "is_a" not in v:
extended_schema[k] = v
# second pass: "vertical" inheritance
d = self._vertical_property_inheritance(d)
for k, v in d.items():
- if 'is_a' in v:
-
+ if "is_a" in v:
# prevent loops
- if k == v['is_a']:
+ if k == v["is_a"]:
logger.warning(
- f'Loop detected in ontology mapping: {k} -> {v}. '
- 'Removing item. Please fix the inheritance if you want '
- 'to use this item.'
+ f"Loop detected in ontology mapping: {k} -> {v}. "
+ "Removing item. Please fix the inheritance if you want "
+ "to use this item."
)
continue
@@ -112,16 +105,15 @@ def _extend_schema(self, d: Optional[dict] = None) -> dict:
mi_leaves = {}
ms_leaves = {}
for k, v in d.items():
-
# k is not an entity
- if 'represented_as' not in v:
+ if "represented_as" not in v:
continue
- if isinstance(v.get('preferred_id'), list):
+ if isinstance(v.get("preferred_id"), list):
mi_leaves = self._horizontal_inheritance_pid(k, v)
extended_schema.update(mi_leaves)
- elif isinstance(v.get('source'), list):
+ elif isinstance(v.get("source"), list):
ms_leaves = self._horizontal_inheritance_source(k, v)
extended_schema.update(ms_leaves)
@@ -132,40 +124,38 @@ def _vertical_property_inheritance(self, d):
Inherit properties from parents to children and update `d` accordingly.
"""
for k, v in d.items():
-
# k is not an entity
- if 'represented_as' not in v:
+ if "represented_as" not in v:
continue
# k is an entity that is present in the ontology
- if 'is_a' not in v:
+ if "is_a" not in v:
continue
# "vertical" inheritance: inherit properties from parent
- if v.get('inherit_properties', False):
-
+ if v.get("inherit_properties", False):
# get direct ancestor
- if isinstance(v['is_a'], list):
- parent = v['is_a'][0]
+ if isinstance(v["is_a"], list):
+ parent = v["is_a"][0]
else:
- parent = v['is_a']
+ parent = v["is_a"]
# ensure child has properties and exclude_properties
- if 'properties' not in v:
- v['properties'] = {}
- if 'exclude_properties' not in v:
- v['exclude_properties'] = {}
+ if "properties" not in v:
+ v["properties"] = {}
+ if "exclude_properties" not in v:
+ v["exclude_properties"] = {}
# update properties of child
- parent_props = self.schema[parent].get('properties', {})
+ parent_props = self.schema[parent].get("properties", {})
if parent_props:
- v['properties'].update(parent_props)
+ v["properties"].update(parent_props)
parent_excl_props = self.schema[parent].get(
- 'exclude_properties', {}
+ "exclude_properties", {}
)
if parent_excl_props:
- v['exclude_properties'].update(parent_excl_props)
+ v["exclude_properties"].update(parent_excl_props)
# update schema (d)
d[k] = v
@@ -182,9 +172,9 @@ def _horizontal_inheritance_pid(self, key, value):
leaves = {}
- preferred_id = value['preferred_id']
- input_label = value.get('input_label') or value['label_in_input']
- represented_as = value['represented_as']
+ preferred_id = value["preferred_id"]
+ input_label = value.get("input_label") or value["label_in_input"]
+ represented_as = value["represented_as"]
# adjust lengths
max_l = max(
@@ -208,40 +198,38 @@ def _horizontal_inheritance_pid(self, key, value):
reps = represented_as
for pid, lab, rep in zip(pids, input_label, reps):
-
- skey = pid + '.' + key
+ skey = pid + "." + key
svalue = {
- 'preferred_id': pid,
- 'input_label': lab,
- 'represented_as': rep,
+ "preferred_id": pid,
+ "input_label": lab,
+ "represented_as": rep,
# mark as virtual
- 'virtual': True,
+ "virtual": True,
}
# inherit is_a if exists
- if 'is_a' in value.keys():
-
+ if "is_a" in value.keys():
# treat as multiple inheritance
- if isinstance(value['is_a'], list):
- v = list(value['is_a'])
+ if isinstance(value["is_a"], list):
+ v = list(value["is_a"])
v.insert(0, key)
- svalue['is_a'] = v
+ svalue["is_a"] = v
else:
- svalue['is_a'] = [key, value['is_a']]
+ svalue["is_a"] = [key, value["is_a"]]
else:
# set parent as is_a
- svalue['is_a'] = key
+ svalue["is_a"] = key
# inherit everything except core attributes
for k, v in value.items():
if k not in [
- 'is_a',
- 'preferred_id',
- 'input_label',
- 'label_in_input',
- 'represented_as',
+ "is_a",
+ "preferred_id",
+ "input_label",
+ "label_in_input",
+ "represented_as",
]:
svalue[k] = v
@@ -259,9 +247,9 @@ def _horizontal_inheritance_source(self, key, value):
leaves = {}
- source = value['source']
- input_label = value.get('input_label') or value['label_in_input']
- represented_as = value['represented_as']
+ source = value["source"]
+ input_label = value.get("input_label") or value["label_in_input"]
+ represented_as = value["represented_as"]
# adjust lengths
src_l = len(source)
@@ -279,40 +267,38 @@ def _horizontal_inheritance_source(self, key, value):
reps = represented_as
for src, lab, rep in zip(source, labels, reps):
-
- skey = src + '.' + key
+ skey = src + "." + key
svalue = {
- 'source': src,
- 'input_label': lab,
- 'represented_as': rep,
+ "source": src,
+ "input_label": lab,
+ "represented_as": rep,
# mark as virtual
- 'virtual': True,
+ "virtual": True,
}
# inherit is_a if exists
- if 'is_a' in value.keys():
-
+ if "is_a" in value.keys():
# treat as multiple inheritance
- if isinstance(value['is_a'], list):
- v = list(value['is_a'])
+ if isinstance(value["is_a"], list):
+ v = list(value["is_a"])
v.insert(0, key)
- svalue['is_a'] = v
+ svalue["is_a"] = v
else:
- svalue['is_a'] = [key, value['is_a']]
+ svalue["is_a"] = [key, value["is_a"]]
else:
# set parent as is_a
- svalue['is_a'] = key
+ svalue["is_a"] = key
# inherit everything except core attributes
for k, v in value.items():
if k not in [
- 'is_a',
- 'source',
- 'input_label',
- 'label_in_input',
- 'represented_as',
+ "is_a",
+ "source",
+ "input_label",
+ "label_in_input",
+ "represented_as",
]:
svalue[k] = v
diff --git a/biocypher/_metadata.py b/biocypher/_metadata.py
index e8cac084..cbc1426c 100644
--- a/biocypher/_metadata.py
+++ b/biocypher/_metadata.py
@@ -11,7 +11,7 @@
Package metadata (version, authors, etc).
"""
-__all__ = ['get_metadata']
+__all__ = ["get_metadata"]
import os
import pathlib
@@ -19,7 +19,7 @@
import toml
-_VERSION = '0.5.17'
+_VERSION = "0.5.17"
def get_metadata():
@@ -31,46 +31,41 @@ def get_metadata():
"""
here = pathlib.Path(__file__).parent
- pyproj_toml = 'pyproject.toml'
+ pyproj_toml = "pyproject.toml"
meta = {}
for project_dir in (here, here.parent):
-
toml_path = str(project_dir.joinpath(pyproj_toml).absolute())
if os.path.exists(toml_path):
-
pyproject = toml.load(toml_path)
meta = {
- 'name': pyproject['tool']['poetry']['name'],
- 'version': pyproject['tool']['poetry']['version'],
- 'author': pyproject['tool']['poetry']['authors'],
- 'license': pyproject['tool']['poetry']['license'],
- 'full_metadata': pyproject,
+ "name": pyproject["tool"]["poetry"]["name"],
+ "version": pyproject["tool"]["poetry"]["version"],
+ "author": pyproject["tool"]["poetry"]["authors"],
+ "license": pyproject["tool"]["poetry"]["license"],
+ "full_metadata": pyproject,
}
break
if not meta:
-
try:
-
meta = {
k.lower(): v
for k, v in importlib.metadata.metadata(here.name).items()
}
except importlib.metadata.PackageNotFoundError:
-
pass
- meta['version'] = meta.get('version', None) or _VERSION
+ meta["version"] = meta.get("version", None) or _VERSION
return meta
metadata = get_metadata()
-__version__ = metadata.get('version', None)
-__author__ = metadata.get('author', None)
-__license__ = metadata.get('license', None)
+__version__ = metadata.get("version", None)
+__author__ = metadata.get("author", None)
+__license__ = metadata.get("license", None)
diff --git a/biocypher/_misc.py b/biocypher/_misc.py
index 82fc0b40..b516a048 100644
--- a/biocypher/_misc.py
+++ b/biocypher/_misc.py
@@ -13,7 +13,7 @@
"""
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from typing import (
Any,
@@ -31,7 +31,7 @@
import networkx as nx
import stringcase
-__all__ = ['LIST_LIKE', 'SIMPLE_TYPES', 'ensure_iterable', 'to_list']
+__all__ = ["LIST_LIKE", "SIMPLE_TYPES", "ensure_iterable", "to_list"]
SIMPLE_TYPES = (
bytes,
@@ -60,11 +60,9 @@ def to_list(value: Any) -> list:
"""
if isinstance(value, LIST_LIKE):
-
value = list(value)
else:
-
value = [value]
return value
@@ -75,7 +73,7 @@ def ensure_iterable(value: Any) -> Iterable:
Returns iterables, except strings, wraps simple types into tuple.
"""
- return value if isinstance(value, LIST_LIKE) else (value, )
+ return value if isinstance(value, LIST_LIKE) else (value,)
def create_tree_visualisation(inheritance_tree: Union[dict, nx.Graph]) -> str:
@@ -84,7 +82,6 @@ def create_tree_visualisation(inheritance_tree: Union[dict, nx.Graph]) -> str:
"""
if isinstance(inheritance_tree, nx.Graph):
-
inheritance_tree = nx.to_dict_of_lists(inheritance_tree)
# unlist values
inheritance_tree = {k: v[0] for k, v in inheritance_tree.items() if v}
@@ -95,56 +92,48 @@ def create_tree_visualisation(inheritance_tree: Union[dict, nx.Graph]) -> str:
root = list(parents - classes)
if len(root) > 1:
-
- if 'entity' in root:
-
- root = 'entity' # default: good standard? TODO
+ if "entity" in root:
+ root = "entity" # default: good standard? TODO
else:
-
raise ValueError(
- 'Inheritance tree cannot have more than one root node. '
- f'Found {len(root)}: {root}.'
+ "Inheritance tree cannot have more than one root node. "
+ f"Found {len(root)}: {root}."
)
else:
-
root = root[0]
if not root:
# find key whose value is None
- root = list(inheritance_tree.keys())[list(inheritance_tree.values()
- ).index(None)]
+ root = list(inheritance_tree.keys())[
+ list(inheritance_tree.values()).index(None)
+ ]
tree = Tree()
tree.create_node(root, root)
while classes:
-
for child in classes:
-
parent = inheritance_tree[child]
if parent in tree.nodes.keys() or parent == root:
-
tree.create_node(child, child, parent=parent)
for node in tree.nodes.keys():
-
if node in classes:
-
classes.remove(node)
return tree
# string conversion, adapted from Biolink Model Toolkit
-lowercase_pattern = re.compile(r'[a-zA-Z]*[a-z][a-zA-Z]*')
-underscore_pattern = re.compile(r'(? str:
+def from_pascal(s: str, sep: str = " ") -> str:
underscored = underscore_pattern.sub(sep, s)
lowercased = lowercase_pattern.sub(
lambda match: match.group(0).lower(),
@@ -163,7 +152,7 @@ def pascalcase_to_sentencecase(s: str) -> str:
Returns:
string in sentence case form
"""
- return from_pascal(s, sep=' ')
+ return from_pascal(s, sep=" ")
def snakecase_to_sentencecase(s: str) -> str:
@@ -202,7 +191,7 @@ def sentencecase_to_pascalcase(s: str) -> str:
Returns:
string in PascalCase form
"""
- return re.sub(r'(?:^| )([a-zA-Z])', lambda match: match.group(1).upper(), s)
+ return re.sub(r"(?:^| )([a-zA-Z])", lambda match: match.group(1).upper(), s)
def to_lower_sentence_case(s: str) -> str:
@@ -216,9 +205,9 @@ def to_lower_sentence_case(s: str) -> str:
Returns:
string in lower sentence case form
"""
- if '_' in s:
+ if "_" in s:
return snakecase_to_sentencecase(s)
- elif ' ' in s:
+ elif " " in s:
return s.lower()
elif s[0].isupper():
return pascalcase_to_sentencecase(s)
diff --git a/biocypher/_ontology.py b/biocypher/_ontology.py
index 0738d7b9..e22e4465 100644
--- a/biocypher/_ontology.py
+++ b/biocypher/_ontology.py
@@ -17,7 +17,7 @@
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from typing import Optional
from datetime import datetime
@@ -40,6 +40,7 @@ class OntologyAdapter:
labels are formatted in lower sentence case. In some cases, this means that
we replace underscores with spaces.
"""
+
def __init__(
self,
ontology_file: str,
@@ -63,7 +64,7 @@ def __init__(
node in the head ontology that should be used to join to the
root node of the tail ontology. Defaults to None.
- merge_nodes (bool): If True, head and tail join nodes will be
+ merge_nodes (bool): If True, head and tail join nodes will be
merged, using the label of the head join node. If False, the
tail join node will be attached as a child of the head join
node.
@@ -76,7 +77,7 @@ def __init__(
be removed. Defaults to True.
"""
- logger.info(f'Instantiating OntologyAdapter class for {ontology_file}.')
+ logger.info(f"Instantiating OntologyAdapter class for {ontology_file}.")
self._ontology_file = ontology_file
self._root_label = root_label
@@ -93,7 +94,6 @@ def __init__(
)
def _rdf_to_nx(self, g, root_label, switch_id_and_label=True):
-
# Loop through all labels in the ontology
for s, _, o in g.triples((None, rdflib.RDFS.label, None)):
# If the label is the root label, set the root node to the subject of the label
@@ -102,7 +102,7 @@ def _rdf_to_nx(self, g, root_label, switch_id_and_label=True):
break
else:
raise ValueError(
- f'Could not find root node with label {root_label}'
+ f"Could not find root node with label {root_label}"
)
# Create a directed graph to represent the ontology as a tree
@@ -110,7 +110,6 @@ def _rdf_to_nx(self, g, root_label, switch_id_and_label=True):
# Define a recursive function to add subclasses to the graph
def add_subclasses(node):
-
# Only add nodes that have a label
if (node, rdflib.RDFS.label, None) not in g:
return
@@ -119,25 +118,23 @@ def add_subclasses(node):
if nx_id not in G:
G.add_node(nx_id)
- G.nodes[nx_id]['label'] = nx_label
+ G.nodes[nx_id]["label"] = nx_label
# Recursively add all subclasses of the node to the graph
for s, _, o in g.triples((None, rdflib.RDFS.subClassOf, node)):
-
# Only add nodes that have a label
if (s, rdflib.RDFS.label, None) not in g:
continue
s_id, s_label = _get_nx_id_and_label(s)
G.add_node(s_id)
- G.nodes[s_id]['label'] = s_label
+ G.nodes[s_id]["label"] = s_label
G.add_edge(s_id, nx_id)
add_subclasses(s)
add_parents(s)
def add_parents(node):
-
# Only add nodes that have a label
if (node, rdflib.RDFS.label, None) not in g:
return
@@ -146,7 +143,6 @@ def add_parents(node):
# Recursively add all parents of the node to the graph
for s, _, o in g.triples((node, rdflib.RDFS.subClassOf, None)):
-
# Only add nodes that have a label
if (o, rdflib.RDFS.label, None) not in g:
continue
@@ -158,15 +154,16 @@ def add_parents(node):
continue
G.add_node(o_id)
- G.nodes[o_id]['label'] = o_label
+ G.nodes[o_id]["label"] = o_label
G.add_edge(nx_id, o_id)
add_parents(o)
def _get_nx_id_and_label(node):
node_id_str = self._remove_prefix(str(node))
- node_label_str = str(g.value(node,
- rdflib.RDFS.label)).replace('_', ' ')
+ node_label_str = str(g.value(node, rdflib.RDFS.label)).replace(
+ "_", " "
+ )
node_label_str = _misc.to_lower_sentence_case(node_label_str)
nx_id = node_label_str if switch_id_and_label else node_id_str
@@ -185,7 +182,7 @@ def _remove_prefix(self, uri: str) -> str:
everything before the last separator.
"""
if self._remove_prefixes:
- return uri.rsplit('#', 1)[-1].rsplit('/', 1)[-1]
+ return uri.rsplit("#", 1)[-1].rsplit("/", 1)[-1]
else:
return uri
@@ -202,17 +199,17 @@ def _get_format(self, ontology_file):
"""
Get the format of the ontology file.
"""
- if ontology_file.endswith('.owl'):
- return 'application/rdf+xml'
- elif ontology_file.endswith('.obo'):
- raise NotImplementedError('OBO format not yet supported')
- elif ontology_file.endswith('.rdf'):
- return 'application/rdf+xml'
- elif ontology_file.endswith('.ttl'):
- return 'ttl'
+ if ontology_file.endswith(".owl"):
+ return "application/rdf+xml"
+ elif ontology_file.endswith(".obo"):
+ raise NotImplementedError("OBO format not yet supported")
+ elif ontology_file.endswith(".rdf"):
+ return "application/rdf+xml"
+ elif ontology_file.endswith(".ttl"):
+ return "ttl"
else:
raise ValueError(
- f'Could not determine format of ontology file {ontology_file}'
+ f"Could not determine format of ontology file {ontology_file}"
)
def get_nx_graph(self):
@@ -254,10 +251,11 @@ class Ontology:
while an arbitrary number of other resources can become "tail" ontologies at
arbitrary fusion points inside the "head" ontology.
"""
+
def __init__(
self,
head_ontology: dict,
- ontology_mapping: 'OntologyMapping',
+ ontology_mapping: "OntologyMapping",
tail_ontologies: Optional[dict] = None,
):
"""
@@ -311,21 +309,21 @@ def _load_ontologies(self) -> None:
instance variable (head) or a dictionary (tail).
"""
- logger.info('Loading ontologies...')
+ logger.info("Loading ontologies...")
self._head_ontology = OntologyAdapter(
- self._head_ontology_meta['url'],
- self._head_ontology_meta['root_node'],
+ self._head_ontology_meta["url"],
+ self._head_ontology_meta["root_node"],
)
if self._tail_ontology_meta:
self._tail_ontologies = {}
for key, value in self._tail_ontology_meta.items():
self._tail_ontologies[key] = OntologyAdapter(
- ontology_file = value['url'],
- root_label = value['tail_join_node'],
- head_join_node = value['head_join_node'],
- merge_nodes = value.get('merge_nodes', True),
+ ontology_file=value["url"],
+ root_label=value["tail_join_node"],
+ head_join_node=value["head_join_node"],
+ merge_nodes=value.get("merge_nodes", True),
)
def _assert_join_node(self, adapter: OntologyAdapter) -> None:
@@ -342,10 +340,9 @@ def _assert_join_node(self, adapter: OntologyAdapter) -> None:
head_join_node = adapter.get_head_join_node()
if head_join_node not in self._head_ontology.get_nx_graph().nodes:
-
raise ValueError(
- f'Head join node {head_join_node} not found in '
- f'head ontology.'
+ f"Head join node {head_join_node} not found in "
+ f"head ontology."
)
def _join_ontologies(self, adapter: OntologyAdapter) -> None:
@@ -383,11 +380,9 @@ def _join_ontologies(self, adapter: OntologyAdapter) -> None:
# as parent of tail join node
tail_ontology_subtree.add_node(
head_join_node,
- **self._head_ontology.get_nx_graph().nodes[head_join_node]
- )
- tail_ontology_subtree.add_edge(
- tail_join_node, head_join_node
+ **self._head_ontology.get_nx_graph().nodes[head_join_node],
)
+ tail_ontology_subtree.add_edge(tail_join_node, head_join_node)
# else rename tail join node to match head join node if necessary
elif not tail_join_node == head_join_node:
@@ -409,46 +404,43 @@ def _extend_ontology(self) -> None:
self._nx_graph = self._head_ontology.get_nx_graph().copy()
for key, value in self.extended_schema.items():
-
- if not value.get('is_a'):
-
- if self._nx_graph.has_node(value.get('synonym_for')):
-
+ if not value.get("is_a"):
+ if self._nx_graph.has_node(value.get("synonym_for")):
continue
-
+
if not self._nx_graph.has_node(key):
-
raise ValueError(
- f'Node {key} not found in ontology, but also has no '
- 'inheritance definition. Please check your schema for '
- 'spelling errors or a missing `is_a` definition.'
+ f"Node {key} not found in ontology, but also has no "
+ "inheritance definition. Please check your schema for "
+ "spelling errors or a missing `is_a` definition."
)
-
+
continue
- parents = _misc.to_list(value.get('is_a'))
+ parents = _misc.to_list(value.get("is_a"))
child = key
while parents:
parent = parents.pop(0)
if parent not in self._nx_graph.nodes:
-
self._nx_graph.add_node(parent)
self._nx_graph.nodes[parent][
- 'label'] = _misc.sentencecase_to_pascalcase(parent)
+ "label"
+ ] = _misc.sentencecase_to_pascalcase(parent)
# mark parent as user extension
- self._nx_graph.nodes[parent]['user_extension'] = True
+ self._nx_graph.nodes[parent]["user_extension"] = True
self._extended_nodes.add(parent)
if child not in self._nx_graph.nodes:
self._nx_graph.add_node(child)
self._nx_graph.nodes[child][
- 'label'] = _misc.sentencecase_to_pascalcase(child)
+ "label"
+ ] = _misc.sentencecase_to_pascalcase(child)
# mark child as user extension
- self._nx_graph.nodes[child]['user_extension'] = True
+ self._nx_graph.nodes[child]["user_extension"] = True
self._extended_nodes.add(child)
self._nx_graph.add_edge(child, parent)
@@ -463,29 +455,28 @@ def _connect_biolink_classes(self) -> None:
if not self._nx_graph:
self._nx_graph = self._head_ontology.get_nx_graph().copy()
- if 'entity' not in self._nx_graph.nodes:
+ if "entity" not in self._nx_graph.nodes:
return
# biolink classes that are disjoint from entity
disjoint_classes = [
- 'frequency qualifier mixin',
- 'chemical entity to entity association mixin',
- 'ontology class',
- 'relationship quantifier',
- 'physical essence or occurrent',
- 'gene or gene product',
- 'subject of investigation',
+ "frequency qualifier mixin",
+ "chemical entity to entity association mixin",
+ "ontology class",
+ "relationship quantifier",
+ "physical essence or occurrent",
+ "gene or gene product",
+ "subject of investigation",
]
for node in disjoint_classes:
-
if not self._nx_graph.nodes.get(node):
-
self._nx_graph.add_node(node)
self._nx_graph.nodes[node][
- 'label'] = _misc.sentencecase_to_pascalcase(node)
+ "label"
+ ] = _misc.sentencecase_to_pascalcase(node)
- self._nx_graph.add_edge(node, 'entity')
+ self._nx_graph.add_edge(node, "entity")
def _add_properties(self) -> None:
"""
@@ -495,21 +486,18 @@ def _add_properties(self) -> None:
"""
for key, value in self.extended_schema.items():
-
if key in self._nx_graph.nodes:
-
self._nx_graph.nodes[key].update(value)
- if value.get('synonym_for'):
-
+ if value.get("synonym_for"):
# change node label to synonym
- if value['synonym_for'] not in self._nx_graph.nodes:
+ if value["synonym_for"] not in self._nx_graph.nodes:
raise ValueError(
f'Node {value["synonym_for"]} not found in ontology.'
)
self._nx_graph = nx.relabel_nodes(
- self._nx_graph, {value['synonym_for']: key}
+ self._nx_graph, {value["synonym_for"]: key}
)
def get_ancestors(self, node_label: str) -> list:
@@ -541,18 +529,17 @@ def show_ontology_structure(self, to_disk: str = None, full: bool = False):
"""
if not self._nx_graph:
- raise ValueError('Ontology not loaded.')
+ raise ValueError("Ontology not loaded.")
if not self._tail_ontologies:
- msg = f'Showing ontology structure based on {self._head_ontology._ontology_file}'
+ msg = f"Showing ontology structure based on {self._head_ontology._ontology_file}"
else:
- msg = f'Showing ontology structure based on {len(self._tail_ontology_meta)+1} ontologies: '
+ msg = f"Showing ontology structure based on {len(self._tail_ontology_meta)+1} ontologies: "
print(msg)
if not full:
-
# set of leaves and their intermediate parents up to the root
filter_nodes = set(self.extended_schema.keys())
@@ -563,19 +550,17 @@ def show_ontology_structure(self, to_disk: str = None, full: bool = False):
G = self._nx_graph.subgraph(filter_nodes)
else:
-
G = self._nx_graph
if not to_disk:
-
# create tree
tree = _misc.create_tree_visualisation(G)
# add synonym information
for node in self.extended_schema:
- if self.extended_schema[node].get('synonym_for'):
+ if self.extended_schema[node].get("synonym_for"):
tree.nodes[node].tag = (
- f'{node} = '
+ f"{node} = "
f"{self.extended_schema[node].get('synonym_for')}"
)
@@ -584,26 +569,24 @@ def show_ontology_structure(self, to_disk: str = None, full: bool = False):
return tree
else:
-
# convert lists/dicts to strings for vis only
for node in G.nodes:
-
# rename node and use former id as label
- label = G.nodes[node].get('label')
+ label = G.nodes[node].get("label")
if not label:
label = node
G = nx.relabel_nodes(G, {node: label})
- G.nodes[label]['label'] = node
+ G.nodes[label]["label"] = node
for attrib in G.nodes[label]:
if type(G.nodes[label][attrib]) in [list, dict]:
G.nodes[label][attrib] = str(G.nodes[label][attrib])
- path = os.path.join(to_disk, 'ontology_structure.graphml')
+ path = os.path.join(to_disk, "ontology_structure.graphml")
- logger.info(f'Writing ontology structure to {path}.')
+ logger.info(f"Writing ontology structure to {path}.")
nx.write_graphml(G, path)
@@ -616,10 +599,10 @@ def get_dict(self) -> dict:
"""
d = {
- 'node_id': self._get_current_id(),
- 'node_label': 'BioCypher',
- 'properties': {
- 'schema': 'self.extended_schema',
+ "node_id": self._get_current_id(),
+ "node_label": "BioCypher",
+ "properties": {
+ "schema": "self.extended_schema",
},
}
@@ -635,5 +618,4 @@ def _get_current_id(self):
"""
now = datetime.now()
- return now.strftime('v%Y%m%d-%H%M%S')
-
\ No newline at end of file
+ return now.strftime("v%Y%m%d-%H%M%S")
diff --git a/biocypher/_pandas.py b/biocypher/_pandas.py
index 44821d91..24898b4a 100644
--- a/biocypher/_pandas.py
+++ b/biocypher/_pandas.py
@@ -1,5 +1,7 @@
import pandas as pd
-from ._create import BioCypherNode, BioCypherEdge
+
+from ._create import BioCypherEdge, BioCypherNode
+
class Pandas:
def __init__(self, ontology, translator, deduplicator):
@@ -16,9 +18,13 @@ def _separate_entity_types(self, entities):
"""
lists = {}
for entity in entities:
- if not isinstance(entity, BioCypherNode) and not isinstance(entity, BioCypherEdge):
- raise TypeError(f"Expected a BioCypherNode or BioCypherEdge, got {type(entity)}.")
-
+ if not isinstance(entity, BioCypherNode) and not isinstance(
+ entity, BioCypherEdge
+ ):
+ raise TypeError(
+ f"Expected a BioCypherNode or BioCypherEdge, got {type(entity)}."
+ )
+
if isinstance(entity, BioCypherNode):
seen = self.deduplicator.node_seen(entity)
elif isinstance(entity, BioCypherEdge):
@@ -26,7 +32,7 @@ def _separate_entity_types(self, entities):
if seen:
continue
-
+
_type = entity.get_label()
if not _type in lists:
lists[_type] = []
@@ -45,10 +51,14 @@ def add_tables(self, entities):
self._add_entity_df(_type, _entities)
def _add_entity_df(self, _type, _entities):
- df = pd.DataFrame(pd.json_normalize([node.get_dict() for node in _entities]))
- #replace "properties." with "" in column names
+ df = pd.DataFrame(
+ pd.json_normalize([node.get_dict() for node in _entities])
+ )
+ # replace "properties." with "" in column names
df.columns = [col.replace("properties.", "") for col in df.columns]
if _type not in self.dfs:
self.dfs[_type] = df
else:
- self.dfs[_type] = pd.concat([self.dfs[_type], df], ignore_index=True)
+ self.dfs[_type] = pd.concat(
+ [self.dfs[_type], df], ignore_index=True
+ )
diff --git a/biocypher/_translate.py b/biocypher/_translate.py
index 3b3bee29..663d11ab 100644
--- a/biocypher/_translate.py
+++ b/biocypher/_translate.py
@@ -14,7 +14,7 @@
"""
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from typing import Any, Union, Optional
from collections.abc import Iterable, Generator
@@ -25,7 +25,7 @@
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
from ._mapping import OntologyMapping
-__all__ = ['BiolinkAdapter', 'Translator']
+__all__ = ["BiolinkAdapter", "Translator"]
class Translator:
@@ -40,8 +40,9 @@ class Translator:
Provides utility functions for translating between input and output labels
and cypher queries.
"""
+
def __init__(
- self, ontology_mapping: 'OntologyMapping', strict_mode: bool = False
+ self, ontology_mapping: "OntologyMapping", strict_mode: bool = False
):
"""
Args:
@@ -85,30 +86,28 @@ def translate_nodes(
"""
- self._log_begin_translate(id_type_prop_tuples, 'nodes')
+ self._log_begin_translate(id_type_prop_tuples, "nodes")
for _id, _type, _props in id_type_prop_tuples:
-
# check for strict mode requirements
- required_props = ['source', 'licence', 'version']
+ required_props = ["source", "licence", "version"]
if self.strict_mode:
# rename 'license' to 'licence' in _props
- if _props.get('license'):
- _props['licence'] = _props.pop('license')
+ if _props.get("license"):
+ _props["licence"] = _props.pop("license")
for prop in required_props:
if prop not in _props:
raise ValueError(
- f'Property `{prop}` missing from node {_id}. '
- 'Strict mode is enabled, so this is not allowed.'
+ f"Property `{prop}` missing from node {_id}. "
+ "Strict mode is enabled, so this is not allowed."
)
# find the node in leaves that represents biolink node type
_ontology_class = self._get_ontology_mapping(_type)
if _ontology_class:
-
# filter properties for those specified in schema_config if any
_filtered_props = self._filter_props(_ontology_class, _props)
@@ -123,10 +122,9 @@ def translate_nodes(
)
else:
-
self._record_no_type(_type, _id)
- self._log_finish_translate('nodes')
+ self._log_finish_translate("nodes")
def _get_preferred_id(self, _bl_type: str) -> str:
"""
@@ -134,8 +132,9 @@ def _get_preferred_id(self, _bl_type: str) -> str:
"""
return (
- self.extended_schema[_bl_type]['preferred_id'] if 'preferred_id'
- in self.extended_schema.get(_bl_type, {}) else 'id'
+ self.extended_schema[_bl_type]["preferred_id"]
+ if "preferred_id" in self.extended_schema.get(_bl_type, {})
+ else "id"
)
def _filter_props(self, bl_type: str, props: dict) -> dict:
@@ -143,27 +142,22 @@ def _filter_props(self, bl_type: str, props: dict) -> dict:
Filters properties for those specified in schema_config if any.
"""
- filter_props = self.extended_schema[bl_type].get('properties', {})
+ filter_props = self.extended_schema[bl_type].get("properties", {})
# strict mode: add required properties (only if there is a whitelist)
if self.strict_mode and filter_props:
filter_props.update(
- {
- 'source': 'str',
- 'licence': 'str',
- 'version': 'str'
- },
+ {"source": "str", "licence": "str", "version": "str"},
)
exclude_props = self.extended_schema[bl_type].get(
- 'exclude_properties', []
+ "exclude_properties", []
)
if isinstance(exclude_props, str):
exclude_props = [exclude_props]
if filter_props and exclude_props:
-
filtered_props = {
k: v
for k, v in props.items()
@@ -171,21 +165,16 @@ def _filter_props(self, bl_type: str, props: dict) -> dict:
}
elif filter_props:
-
filtered_props = {
- k: v
- for k, v in props.items() if k in filter_props.keys()
+ k: v for k, v in props.items() if k in filter_props.keys()
}
elif exclude_props:
-
filtered_props = {
- k: v
- for k, v in props.items() if k not in exclude_props
+ k: v for k, v in props.items() if k not in exclude_props
}
else:
-
return props
missing_props = [
@@ -193,7 +182,6 @@ def _filter_props(self, bl_type: str, props: dict) -> dict:
]
# add missing properties with default values
for k in missing_props:
-
filtered_props[k] = None
return filtered_props
@@ -218,7 +206,7 @@ def translate_edges(
Can optionally possess its own ID.
"""
- self._log_begin_translate(id_src_tar_type_prop_tuples, 'edges')
+ self._log_begin_translate(id_src_tar_type_prop_tuples, "edges")
# legacy: deal with 4-tuples (no edge id)
# TODO remove for performance reasons once safe
@@ -230,18 +218,17 @@ def translate_edges(
]
for _id, _src, _tar, _type, _props in id_src_tar_type_prop_tuples:
-
# check for strict mode requirements
if self.strict_mode:
- if not 'source' in _props:
+ if not "source" in _props:
raise ValueError(
- f'Edge {_id if _id else (_src, _tar)} does not have a `source` property.',
- ' This is required in strict mode.',
+ f"Edge {_id if _id else (_src, _tar)} does not have a `source` property.",
+ " This is required in strict mode.",
)
- if not 'licence' in _props:
+ if not "licence" in _props:
raise ValueError(
- f'Edge {_id if _id else (_src, _tar)} does not have a `licence` property.',
- ' This is required in strict mode.',
+ f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property.",
+ " This is required in strict mode.",
)
# match the input label (_type) to
@@ -249,14 +236,12 @@ def translate_edges(
bl_type = self._get_ontology_mapping(_type)
if bl_type:
-
# filter properties for those specified in schema_config if any
_filtered_props = self._filter_props(bl_type, _props)
- rep = self.extended_schema[bl_type]['represented_as']
-
- if rep == 'node':
+ rep = self.extended_schema[bl_type]["represented_as"]
+ if rep == "node":
if _id:
# if it brings its own ID, use it
node_id = _id
@@ -264,8 +249,11 @@ def translate_edges(
else:
# source target concat
node_id = (
- str(_src) + '_' + str(_tar) + '_' +
- '_'.join(str(v) for v in _filtered_props.values())
+ str(_src)
+ + "_"
+ + str(_tar)
+ + "_"
+ + "_".join(str(v) for v in _filtered_props.values())
)
n = BioCypherNode(
@@ -277,21 +265,18 @@ def translate_edges(
# directionality check TODO generalise to account for
# different descriptions of directionality or find a
# more consistent solution for indicating directionality
- if _filtered_props.get('directed') == True:
-
- l1 = 'IS_SOURCE_OF'
- l2 = 'IS_TARGET_OF'
+ if _filtered_props.get("directed") == True:
+ l1 = "IS_SOURCE_OF"
+ l2 = "IS_TARGET_OF"
elif _filtered_props.get(
- 'src_role',
- ) and _filtered_props.get('tar_role'):
-
- l1 = _filtered_props.get('src_role')
- l2 = _filtered_props.get('tar_role')
+ "src_role",
+ ) and _filtered_props.get("tar_role"):
+ l1 = _filtered_props.get("src_role")
+ l2 = _filtered_props.get("tar_role")
else:
-
- l1 = l2 = 'IS_PART_OF'
+ l1 = l2 = "IS_PART_OF"
e_s = BioCypherEdge(
source_id=_src,
@@ -310,13 +295,11 @@ def translate_edges(
yield BioCypherRelAsNode(n, e_s, e_t)
else:
-
edge_label = self.extended_schema[bl_type].get(
- 'label_as_edge'
+ "label_as_edge"
)
if edge_label is None:
-
edge_label = bl_type
yield BioCypherEdge(
@@ -328,10 +311,9 @@ def translate_edges(
)
else:
-
self._record_no_type(_type, (_src, _tar))
- self._log_finish_translate('edges')
+ self._log_finish_translate("edges")
def _record_no_type(self, _type: Any, what: Any) -> None:
"""
@@ -339,14 +321,12 @@ def _record_no_type(self, _type: Any, what: Any) -> None:
schema_config.
"""
- logger.debug(f'No Biolink type defined for `{_type}`: {what}')
+ logger.debug(f"No Biolink type defined for `{_type}`: {what}")
if self.notype.get(_type, None):
-
self.notype[_type] += 1
else:
-
self.notype[_type] = 1
def get_missing_biolink_types(self) -> dict:
@@ -359,15 +339,13 @@ def get_missing_biolink_types(self) -> dict:
@staticmethod
def _log_begin_translate(_input: Iterable, what: str):
+ n = f"{len(_input)} " if hasattr(_input, "__len__") else ""
- n = f'{len(_input)} ' if hasattr(_input, '__len__') else ''
-
- logger.debug(f'Translating {n}{what} to BioCypher')
+ logger.debug(f"Translating {n}{what} to BioCypher")
@staticmethod
def _log_finish_translate(what: str):
-
- logger.debug(f'Finished translating {what} to BioCypher.')
+ logger.debug(f"Finished translating {what} to BioCypher.")
def _update_ontology_types(self):
"""
@@ -379,24 +357,19 @@ def _update_ontology_types(self):
self._ontology_mapping = {}
for key, value in self.extended_schema.items():
-
- labels = value.get('input_label') or value.get('label_in_input')
+ labels = value.get("input_label") or value.get("label_in_input")
if isinstance(labels, str):
-
self._ontology_mapping[labels] = key
elif isinstance(labels, list):
-
for label in labels:
self._ontology_mapping[label] = key
- if value.get('label_as_edge'):
-
- self._add_translation_mappings(labels, value['label_as_edge'])
+ if value.get("label_as_edge"):
+ self._add_translation_mappings(labels, value["label_as_edge"])
else:
-
self._add_translation_mappings(labels, key)
def _get_ontology_mapping(self, label: str) -> Optional[str]:
@@ -433,7 +406,7 @@ def translate(self, query):
Translate a cypher query. Only translates labels as of now.
"""
for key in self.mappings:
- query = query.replace(':' + key, ':' + self.mappings[key])
+ query = query.replace(":" + key, ":" + self.mappings[key])
return query
def reverse_translate(self, query):
@@ -442,23 +415,22 @@ def reverse_translate(self, query):
now.
"""
for key in self.reverse_mappings:
-
- a = ':' + key + ')'
- b = ':' + key + ']'
+ a = ":" + key + ")"
+ b = ":" + key + "]"
# TODO this conditional probably does not cover all cases
if a in query or b in query:
if isinstance(self.reverse_mappings[key], list):
raise NotImplementedError(
- 'Reverse translation of multiple inputs not '
- 'implemented yet. Many-to-one mappings are '
- 'not reversible. '
- f'({key} -> {self.reverse_mappings[key]})',
+ "Reverse translation of multiple inputs not "
+ "implemented yet. Many-to-one mappings are "
+ "not reversible. "
+ f"({key} -> {self.reverse_mappings[key]})",
)
else:
query = query.replace(
a,
- ':' + self.reverse_mappings[key] + ')',
- ).replace(b, ':' + self.reverse_mappings[key] + ']')
+ ":" + self.reverse_mappings[key] + ")",
+ ).replace(b, ":" + self.reverse_mappings[key] + "]")
return query
def _add_translation_mappings(self, original_name, biocypher_name):
@@ -479,12 +451,17 @@ def _add_translation_mappings(self, original_name, biocypher_name):
if isinstance(biocypher_name, list):
for bn in biocypher_name:
- self.reverse_mappings[self.name_sentence_to_pascal(bn, )
- ] = original_name
+ self.reverse_mappings[
+ self.name_sentence_to_pascal(
+ bn,
+ )
+ ] = original_name
else:
- self.reverse_mappings[self.name_sentence_to_pascal(
- biocypher_name,
- )] = original_name
+ self.reverse_mappings[
+ self.name_sentence_to_pascal(
+ biocypher_name,
+ )
+ ] = original_name
@staticmethod
def name_sentence_to_pascal(name: str) -> str:
@@ -492,9 +469,9 @@ def name_sentence_to_pascal(name: str) -> str:
Converts a name in sentence case to pascal case.
"""
# split on dots if dot is present
- if '.' in name:
- return '.'.join(
- [_misc.sentencecase_to_pascalcase(n) for n in name.split('.')],
+ if "." in name:
+ return ".".join(
+ [_misc.sentencecase_to_pascalcase(n) for n in name.split(".")],
)
else:
return _misc.sentencecase_to_pascalcase(name)
diff --git a/biocypher/_write.py b/biocypher/_write.py
index efbd60fb..233b69f9 100644
--- a/biocypher/_write.py
+++ b/biocypher/_write.py
@@ -17,7 +17,7 @@
from ._logger import logger
-logger.debug(f'Loading module {__name__}.')
+logger.debug(f"Loading module {__name__}.")
from abc import ABC, abstractmethod
from types import GeneratorType
@@ -31,10 +31,9 @@
from ._config import config as _config
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
-__all__ = ['get_writer']
+__all__ = ["get_writer"]
if TYPE_CHECKING:
-
from ._ontology import Ontology
from ._translate import Translator
from ._deduplicate import Deduplicator
@@ -92,7 +91,7 @@ class _BatchWriter(ABC):
Path prefix for the admin import call binary.
import_call_file_prefix:
- Path prefix for the data files (headers and parts) in the import
+ Path prefix for the data files (headers and parts) in the import
call.
wipe:
@@ -108,6 +107,7 @@ class _BatchWriter(ABC):
skip_duplicate_nodes:
Whether to skip duplicate nodes. (Specific to Neo4j.)
"""
+
@abstractmethod
def _get_default_import_call_bin_prefix(self):
"""
@@ -193,14 +193,14 @@ def _get_import_script_name(self) -> str:
def __init__(
self,
- ontology: 'Ontology',
- translator: 'Translator',
- deduplicator: 'Deduplicator',
+ ontology: "Ontology",
+ translator: "Translator",
+ deduplicator: "Deduplicator",
delimiter: str,
- array_delimiter: str = ',',
+ array_delimiter: str = ",",
quote: str = '"',
output_directory: Optional[str] = None,
- db_name: str = 'neo4j',
+ db_name: str = "neo4j",
import_call_bin_prefix: Optional[str] = None,
import_call_file_prefix: Optional[str] = None,
wipe: bool = True,
@@ -209,7 +209,7 @@ def __init__(
skip_duplicate_nodes: bool = False,
db_user: str = None,
db_password: str = None,
- db_port: str = None
+ db_port: str = None,
):
self.db_name = db_name
self.db_user = db_user
@@ -225,7 +225,8 @@ def __init__(
self.skip_duplicate_nodes = skip_duplicate_nodes
if import_call_bin_prefix is None:
- self.import_call_bin_prefix = self._get_default_import_call_bin_prefix(
+ self.import_call_bin_prefix = (
+ self._get_default_import_call_bin_prefix()
)
else:
self.import_call_bin_prefix = import_call_bin_prefix
@@ -248,11 +249,11 @@ def __init__(
if os.path.exists(self.outdir):
logger.warning(
- f'Output directory `{self.outdir}` already exists. '
- 'If this is not planned, file consistency may be compromised.'
+ f"Output directory `{self.outdir}` already exists. "
+ "If this is not planned, file consistency may be compromised."
)
else:
- logger.info(f'Creating output directory `{self.outdir}`.')
+ logger.info(f"Creating output directory `{self.outdir}`.")
os.makedirs(self.outdir)
self.parts = {} # dict to store the paths of part files for each label
@@ -268,7 +269,6 @@ def outdir(self):
return self._outdir
-
@property
def import_call_file_prefix(self):
"""
@@ -286,12 +286,10 @@ def _process_delimiter(self, delimiter: str) -> str:
representation (e.g. tab for '\t').
"""
- if delimiter == '\\t':
-
- return '\t', '\\t'
+ if delimiter == "\\t":
+ return "\t", "\\t"
else:
-
return delimiter, delimiter
def write_nodes(self, nodes, batch_size: int = int(1e6)):
@@ -310,12 +308,12 @@ def write_nodes(self, nodes, batch_size: int = int(1e6)):
# write node data
passed = self._write_node_data(nodes, batch_size)
if not passed:
- logger.error('Error while writing node data.')
+ logger.error("Error while writing node data.")
return False
# pass property data to header writer per node type written
passed = self._write_node_headers()
if not passed:
- logger.error('Error while writing node headers.')
+ logger.error("Error while writing node headers.")
return False
return True
@@ -348,7 +346,9 @@ def write_edges(
e.get_source_edge(),
e.get_target_edge(),
],
- ) if isinstance(e, BioCypherRelAsNode) else (None, [e])
+ )
+ if isinstance(e, BioCypherRelAsNode)
+ else (None, [e])
for e in edges
)
)
@@ -368,17 +368,17 @@ def write_edges(
# is this a problem? if the generator or list is empty, we
# don't write anything.
logger.debug(
- 'No edges to write, possibly due to no matched Biolink classes.',
+ "No edges to write, possibly due to no matched Biolink classes.",
)
pass
if not passed:
- logger.error('Error while writing edge data.')
+ logger.error("Error while writing edge data.")
return False
# pass property data to header writer per edge type written
passed = self._write_edge_headers()
if not passed:
- logger.error('Error while writing edge headers.')
+ logger.error("Error while writing edge headers.")
return False
return True
@@ -401,7 +401,7 @@ def _write_node_data(self, nodes, batch_size):
"""
if isinstance(nodes, GeneratorType) or isinstance(nodes, peekable):
- logger.debug('Writing node CSV from generator.')
+ logger.debug("Writing node CSV from generator.")
bins = defaultdict(list) # dict to store a list for each
# label that is passed in
@@ -424,7 +424,7 @@ def _write_node_data(self, nodes, batch_size):
# check for non-id
if not _id:
- logger.warning(f'Node {label} has no id; skipping.')
+ logger.warning(f"Node {label} has no id; skipping.")
continue
if not label in bins.keys():
@@ -434,20 +434,22 @@ def _write_node_data(self, nodes, batch_size):
bin_l[label] = 1
# get properties from config if present
- cprops = self.extended_schema.get(label).get('properties', )
+ cprops = self.extended_schema.get(label).get(
+ "properties",
+ )
if cprops:
d = dict(cprops)
# add id and preferred id to properties; these are
# created in node creation (`_create.BioCypherNode`)
- d['id'] = 'str'
- d['preferred_id'] = 'str'
+ d["id"] = "str"
+ d["preferred_id"] = "str"
# add strict mode properties
if self.strict_mode:
- d['source'] = 'str'
- d['version'] = 'str'
- d['licence'] = 'str'
+ d["source"] = "str"
+ d["version"] = "str"
+ d["licence"] = "str"
else:
d = dict(node.get_properties())
@@ -531,7 +533,7 @@ def _write_node_data(self, nodes, batch_size):
return True
else:
if type(nodes) is not list:
- logger.error('Nodes must be passed as list or generator.')
+ logger.error("Nodes must be passed as list or generator.")
return False
else:
@@ -563,14 +565,13 @@ def _write_single_node_list_to_file(
bool: The return value. True for success, False otherwise.
"""
if not all(isinstance(n, BioCypherNode) for n in node_list):
- logger.error('Nodes must be passed as type BioCypherNode.')
+ logger.error("Nodes must be passed as type BioCypherNode.")
return False
# from list of nodes to list of strings
lines = []
for n in node_list:
-
# check for deviations in properties
# node properties
n_props = n.get_properties()
@@ -584,46 +585,45 @@ def _write_single_node_list_to_file(
oprop1 = set(ref_props).difference(n_keys)
oprop2 = set(n_keys).difference(ref_props)
logger.error(
- f'At least one node of the class {n.get_label()} '
- f'has more or fewer properties than another. '
- f'Offending node: {onode!r}, offending property: '
- f'{max([oprop1, oprop2])}. '
- f'All reference properties: {ref_props}, '
- f'All node properties: {n_keys}.',
+ f"At least one node of the class {n.get_label()} "
+ f"has more or fewer properties than another. "
+ f"Offending node: {onode!r}, offending property: "
+ f"{max([oprop1, oprop2])}. "
+ f"All reference properties: {ref_props}, "
+ f"All node properties: {n_keys}.",
)
return False
line = [n.get_id()]
if ref_props:
-
plist = []
# make all into strings, put actual strings in quotes
for k, v in prop_dict.items():
p = n_props.get(k)
if p is None: # TODO make field empty instead of ""?
- plist.append('')
+ plist.append("")
elif v in [
- 'int',
- 'integer',
- 'long',
- 'float',
- 'double',
- 'dbl',
- 'bool',
- 'boolean',
+ "int",
+ "integer",
+ "long",
+ "float",
+ "double",
+ "dbl",
+ "bool",
+ "boolean",
]:
plist.append(str(p))
else:
if isinstance(p, list):
plist.append(self._write_array_string(p))
else:
- plist.append(f'{self.quote}{str(p)}{self.quote}')
+ plist.append(f"{self.quote}{str(p)}{self.quote}")
line.append(self.delim.join(plist))
line.append(labels)
- lines.append(self.delim.join(line) + '\n')
+ lines.append(self.delim.join(line) + "\n")
# avoid writing empty files
if lines:
@@ -653,7 +653,7 @@ def _write_edge_data(self, edges, batch_size):
"""
if isinstance(edges, GeneratorType):
- logger.debug('Writing edge CSV from generator.')
+ logger.debug("Writing edge CSV from generator.")
bins = defaultdict(list) # dict to store a list for each
# label that is passed in
@@ -671,8 +671,8 @@ def _write_edge_data(self, edges, batch_size):
if not (edge.get_source_id() and edge.get_target_id()):
logger.error(
- 'Edge must have source and target node. '
- f'Caused by: {edge}',
+ "Edge must have source and target node. "
+ f"Caused by: {edge}",
)
continue
@@ -691,23 +691,23 @@ def _write_edge_data(self, edges, batch_size):
cprops = None
if label in self.extended_schema:
cprops = self.extended_schema.get(label).get(
- 'properties',
+ "properties",
)
else:
# try via "label_as_edge"
for k, v in self.extended_schema.items():
if isinstance(v, dict):
- if v.get('label_as_edge') == label:
- cprops = v.get('properties')
+ if v.get("label_as_edge") == label:
+ cprops = v.get("properties")
break
if cprops:
d = cprops
# add strict mode properties
if self.strict_mode:
- d['source'] = 'str'
- d['version'] = 'str'
- d['licence'] = 'str'
+ d["source"] = "str"
+ d["version"] = "str"
+ d["licence"] = "str"
else:
d = dict(edge.get_properties())
@@ -746,7 +746,6 @@ def _write_edge_data(self, edges, batch_size):
# after generator depleted, write remainder of bins
for label, nl in bins.items():
-
passed = self._write_single_edge_list_to_file(
nl,
label,
@@ -768,7 +767,7 @@ def _write_edge_data(self, edges, batch_size):
return True
else:
if type(edges) is not list:
- logger.error('Edges must be passed as list or generator.')
+ logger.error("Edges must be passed as list or generator.")
return False
else:
@@ -800,8 +799,7 @@ def _write_single_edge_list_to_file(
"""
if not all(isinstance(n, BioCypherEdge) for n in edge_list):
-
- logger.error('Edges must be passed as type BioCypherEdge.')
+ logger.error("Edges must be passed as type BioCypherEdge.")
return False
# from list of edges to list of strings
@@ -815,16 +813,16 @@ def _write_single_edge_list_to_file(
# compare list order invariant
if not set(ref_props) == set(e_keys):
- oedge = f'{e.get_source_id()}-{e.get_target_id()}'
+ oedge = f"{e.get_source_id()}-{e.get_target_id()}"
oprop1 = set(ref_props).difference(e_keys)
oprop2 = set(e_keys).difference(ref_props)
logger.error(
- f'At least one edge of the class {e.get_label()} '
- f'has more or fewer properties than another. '
- f'Offending edge: {oedge!r}, offending property: '
- f'{max([oprop1, oprop2])}. '
- f'All reference properties: {ref_props}, '
- f'All edge properties: {e_keys}.',
+ f"At least one edge of the class {e.get_label()} "
+ f"has more or fewer properties than another. "
+ f"Offending edge: {oedge!r}, offending property: "
+ f"{max([oprop1, oprop2])}. "
+ f"All reference properties: {ref_props}, "
+ f"All edge properties: {e_keys}.",
)
return False
@@ -833,16 +831,16 @@ def _write_single_edge_list_to_file(
for k, v in prop_dict.items():
p = e_props.get(k)
if p is None: # TODO make field empty instead of ""?
- plist.append('')
+ plist.append("")
elif v in [
- 'int',
- 'integer',
- 'long',
- 'float',
- 'double',
- 'dbl',
- 'bool',
- 'boolean',
+ "int",
+ "integer",
+ "long",
+ "float",
+ "double",
+ "dbl",
+ "bool",
+ "boolean",
]:
plist.append(str(p))
else:
@@ -850,7 +848,7 @@ def _write_single_edge_list_to_file(
plist.append(self._write_array_string(p))
else:
plist.append(self.quote + str(p) + self.quote)
-
+
entries = [e.get_source_id()]
skip_id = False
@@ -861,29 +859,34 @@ def _write_single_edge_list_to_file(
elif not self.extended_schema.get(label):
# find label in schema by label_as_edge
for k, v in self.extended_schema.items():
- if v.get('label_as_edge') == label:
+ if v.get("label_as_edge") == label:
schema_label = k
break
else:
schema_label = label
if schema_label:
- if self.extended_schema.get(schema_label).get('use_id') == False:
+ if (
+ self.extended_schema.get(schema_label).get("use_id")
+ == False
+ ):
skip_id = True
if not skip_id:
- entries.append(e.get_id() or '')
+ entries.append(e.get_id() or "")
if ref_props:
entries.append(self.delim.join(plist))
entries.append(e.get_target_id())
- entries.append(self.translator.name_sentence_to_pascal(
- e.get_label(),
- ))
+ entries.append(
+ self.translator.name_sentence_to_pascal(
+ e.get_label(),
+ )
+ )
lines.append(
- self.delim.join(entries) + '\n',
+ self.delim.join(entries) + "\n",
)
# avoid writing empty files
@@ -911,39 +914,34 @@ def _write_next_part(self, label: str, lines: list):
# list files in self.outdir
files = glob.glob(
- os.path.join(self.outdir, f'{label_pascal}-part*.csv')
+ os.path.join(self.outdir, f"{label_pascal}-part*.csv")
)
# find file with highest part number
if not files:
-
next_part = 0
else:
-
next_part = (
max(
[
- int(
- f.split('.')[-2].split('-')[-1].replace('part', '')
- ) for f in files
+ int(f.split(".")[-2].split("-")[-1].replace("part", ""))
+ for f in files
],
- ) + 1
+ )
+ + 1
)
# write to file
padded_part = str(next_part).zfill(3)
logger.info(
- f'Writing {len(lines)} entries to {label_pascal}-part{padded_part}.csv',
+ f"Writing {len(lines)} entries to {label_pascal}-part{padded_part}.csv",
)
# store name only in case import_call_file_prefix is set
- part = f'{label_pascal}-part{padded_part}.csv'
- file_path = os.path.join(
- self.outdir, part
- )
-
- with open(file_path, 'w', encoding='utf-8') as f:
+ part = f"{label_pascal}-part{padded_part}.csv"
+ file_path = os.path.join(self.outdir, part)
+ with open(file_path, "w", encoding="utf-8") as f:
# concatenate with delimiter
f.writelines(lines)
@@ -975,10 +973,9 @@ def write_import_call(self) -> bool:
"""
file_path = os.path.join(self.outdir, self._get_import_script_name())
- logger.info(f'Writing {self.db_name} import call to `{file_path}`.')
-
- with open(file_path, 'w', encoding='utf-8') as f:
+ logger.info(f"Writing {self.db_name} import call to `{file_path}`.")
+ with open(file_path, "w", encoding="utf-8") as f:
f.write(self._construct_import_call())
return True
@@ -1000,6 +997,7 @@ class _Neo4jBatchWriter(_BatchWriter):
- _construct_import_call
- _write_array_string
"""
+
def _get_default_import_call_bin_prefix(self):
"""
Method to provide the default string for the import call bin prefix.
@@ -1007,7 +1005,7 @@ def _get_default_import_call_bin_prefix(self):
Returns:
str: The default location for the neo4j admin import location
"""
- return 'bin/'
+ return "bin/"
def _write_array_string(self, string_list):
"""
@@ -1021,7 +1019,7 @@ def _write_array_string(self, string_list):
str: The string representation of an array for the neo4j admin import
"""
string = self.adelim.join(string_list)
- return f'{self.quote}{string}{self.quote}'
+ return f"{self.quote}{string}{self.quote}"
def _write_node_headers(self):
"""
@@ -1035,56 +1033,55 @@ def _write_node_headers(self):
# load headers from data parse
if not self.node_property_dict:
logger.error(
- 'Header information not found. Was the data parsed first?',
+ "Header information not found. Was the data parsed first?",
)
return False
for label, props in self.node_property_dict.items():
-
- _id = ':ID'
+ _id = ":ID"
# translate label to PascalCase
pascal_label = self.translator.name_sentence_to_pascal(label)
- header = f'{pascal_label}-header.csv'
+ header = f"{pascal_label}-header.csv"
header_path = os.path.join(
self.outdir,
header,
)
- parts = f'{pascal_label}-part.*'
+ parts = f"{pascal_label}-part.*"
# check if file already exists
if os.path.exists(header_path):
logger.warning(
- f'Header file `{header_path}` already exists. Overwriting.',
+ f"Header file `{header_path}` already exists. Overwriting.",
)
# concatenate key:value in props
props_list = []
for k, v in props.items():
- if v in ['int', 'long', 'integer']:
- props_list.append(f'{k}:long')
- elif v in ['int[]', 'long[]', 'integer[]']:
- props_list.append(f'{k}:long[]')
- elif v in ['float', 'double', 'dbl']:
- props_list.append(f'{k}:double')
- elif v in ['float[]', 'double[]']:
- props_list.append(f'{k}:double[]')
- elif v in ['bool', 'boolean']:
+ if v in ["int", "long", "integer"]:
+ props_list.append(f"{k}:long")
+ elif v in ["int[]", "long[]", "integer[]"]:
+ props_list.append(f"{k}:long[]")
+ elif v in ["float", "double", "dbl"]:
+ props_list.append(f"{k}:double")
+ elif v in ["float[]", "double[]"]:
+ props_list.append(f"{k}:double[]")
+ elif v in ["bool", "boolean"]:
# TODO Neo4j boolean support / spelling?
- props_list.append(f'{k}:boolean')
- elif v in ['bool[]', 'boolean[]']:
- props_list.append(f'{k}:boolean[]')
- elif v in ['str[]', 'string[]']:
- props_list.append(f'{k}:string[]')
+ props_list.append(f"{k}:boolean")
+ elif v in ["bool[]", "boolean[]"]:
+ props_list.append(f"{k}:boolean[]")
+ elif v in ["str[]", "string[]"]:
+ props_list.append(f"{k}:string[]")
else:
- props_list.append(f'{k}')
+ props_list.append(f"{k}")
# create list of lists and flatten
- out_list = [[_id], props_list, [':LABEL']]
+ out_list = [[_id], props_list, [":LABEL"]]
out_list = [val for sublist in out_list for val in sublist]
- with open(header_path, 'w', encoding='utf-8') as f:
+ with open(header_path, "w", encoding="utf-8") as f:
# concatenate with delimiter
row = self.delim.join(out_list)
f.write(row)
@@ -1099,7 +1096,9 @@ def _write_node_headers(self):
self.import_call_file_prefix,
parts,
)
- self.import_call_nodes.add((import_call_header_path, import_call_parts_path))
+ self.import_call_nodes.add(
+ (import_call_header_path, import_call_parts_path)
+ )
return True
@@ -1115,51 +1114,50 @@ def _write_edge_headers(self):
# load headers from data parse
if not self.edge_property_dict:
logger.error(
- 'Header information not found. Was the data parsed first?',
+ "Header information not found. Was the data parsed first?",
)
return False
for label, props in self.edge_property_dict.items():
-
# translate label to PascalCase
pascal_label = self.translator.name_sentence_to_pascal(label)
# paths
- header = f'{pascal_label}-header.csv'
+ header = f"{pascal_label}-header.csv"
header_path = os.path.join(
self.outdir,
header,
)
- parts = f'{pascal_label}-part.*'
+ parts = f"{pascal_label}-part.*"
# check for file exists
if os.path.exists(header_path):
logger.warning(
- f'File {header_path} already exists. Overwriting.'
+ f"File {header_path} already exists. Overwriting."
)
# concatenate key:value in props
props_list = []
for k, v in props.items():
- if v in ['int', 'long', 'integer']:
- props_list.append(f'{k}:long')
- elif v in ['int[]', 'long[]', 'integer[]']:
- props_list.append(f'{k}:long[]')
- elif v in ['float', 'double']:
- props_list.append(f'{k}:double')
- elif v in ['float[]', 'double[]']:
- props_list.append(f'{k}:double[]')
+ if v in ["int", "long", "integer"]:
+ props_list.append(f"{k}:long")
+ elif v in ["int[]", "long[]", "integer[]"]:
+ props_list.append(f"{k}:long[]")
+ elif v in ["float", "double"]:
+ props_list.append(f"{k}:double")
+ elif v in ["float[]", "double[]"]:
+ props_list.append(f"{k}:double[]")
elif v in [
- 'bool',
- 'boolean',
+ "bool",
+ "boolean",
]: # TODO does Neo4j support bool?
- props_list.append(f'{k}:boolean')
- elif v in ['bool[]', 'boolean[]']:
- props_list.append(f'{k}:boolean[]')
- elif v in ['str[]', 'string[]']:
- props_list.append(f'{k}:string[]')
+ props_list.append(f"{k}:boolean")
+ elif v in ["bool[]", "boolean[]"]:
+ props_list.append(f"{k}:boolean[]")
+ elif v in ["str[]", "string[]"]:
+ props_list.append(f"{k}:string[]")
else:
- props_list.append(f'{k}')
+ props_list.append(f"{k}")
skip_id = False
schema_label = None
@@ -1169,25 +1167,28 @@ def _write_edge_headers(self):
elif not self.extended_schema.get(label):
# find label in schema by label_as_edge
for k, v in self.extended_schema.items():
- if v.get('label_as_edge') == label:
+ if v.get("label_as_edge") == label:
schema_label = k
break
else:
schema_label = label
- out_list = [':START_ID']
+ out_list = [":START_ID"]
if schema_label:
- if self.extended_schema.get(schema_label).get('use_id') == False:
+ if (
+ self.extended_schema.get(schema_label).get("use_id")
+ == False
+ ):
skip_id = True
if not skip_id:
- out_list.append('id')
+ out_list.append("id")
out_list.extend(props_list)
- out_list.extend([':END_ID', ':TYPE'])
+ out_list.extend([":END_ID", ":TYPE"])
- with open(header_path, 'w', encoding='utf-8') as f:
+ with open(header_path, "w", encoding="utf-8") as f:
# concatenate with delimiter
row = self.delim.join(out_list)
f.write(row)
@@ -1202,7 +1203,9 @@ def _write_edge_headers(self):
self.import_call_file_prefix,
parts,
)
- self.import_call_edges.add((import_call_header_path, import_call_parts_path))
+ self.import_call_edges.add(
+ (import_call_header_path, import_call_parts_path)
+ )
return True
@@ -1213,7 +1216,7 @@ def _get_import_script_name(self) -> str:
Returns:
str: The name of the import script (ending in .sh)
"""
- return 'neo4j-admin-import-call.sh'
+ return "neo4j-admin-import-call.sh"
def _construct_import_call(self) -> str:
"""
@@ -1226,8 +1229,8 @@ def _construct_import_call(self) -> str:
str: a bash command for neo4j-admin import
"""
import_call = (
- f'{self.import_call_bin_prefix}neo4j-admin import '
- f'--database={self.db_name} '
+ f"{self.import_call_bin_prefix}neo4j-admin import "
+ f"--database={self.db_name} "
f'--delimiter="{self.escaped_delim}" '
f'--array-delimiter="{self.escaped_adelim}" '
)
@@ -1238,11 +1241,11 @@ def _construct_import_call(self) -> str:
import_call += f"--quote='{self.quote}' "
if self.wipe:
- import_call += f'--force=true '
+ import_call += f"--force=true "
if self.skip_bad_relationships:
- import_call += '--skip-bad-relationships=true '
+ import_call += "--skip-bad-relationships=true "
if self.skip_duplicate_nodes:
- import_call += '--skip-duplicate-nodes=true '
+ import_call += "--skip-duplicate-nodes=true "
# append node import calls
for header_path, parts_path in self.import_call_nodes:
@@ -1261,6 +1264,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
specified by ArangoDB for the use of "arangoimport". Output files are
similar to Neo4j, but with a different header format.
"""
+
def _get_default_import_call_bin_prefix(self):
"""
Method to provide the default string for the import call bin prefix.
@@ -1268,7 +1272,7 @@ def _get_default_import_call_bin_prefix(self):
Returns:
str: The default location for the neo4j admin import location
"""
- return ''
+ return ""
def _get_import_script_name(self) -> str:
"""
@@ -1277,7 +1281,7 @@ def _get_import_script_name(self) -> str:
Returns:
str: The name of the import script (ending in .sh)
"""
- return 'arangodb-import-call.sh'
+ return "arangodb-import-call.sh"
def _write_node_headers(self):
"""
@@ -1291,19 +1295,19 @@ def _write_node_headers(self):
# load headers from data parse
if not self.node_property_dict:
logger.error(
- 'Header information not found. Was the data parsed first?',
+ "Header information not found. Was the data parsed first?",
)
return False
for label, props in self.node_property_dict.items():
# create header CSV with ID, properties, labels
- _id = '_key'
+ _id = "_key"
# translate label to PascalCase
pascal_label = self.translator.name_sentence_to_pascal(label)
- header = f'{pascal_label}-header.csv'
+ header = f"{pascal_label}-header.csv"
header_path = os.path.join(
self.outdir,
header,
@@ -1312,28 +1316,27 @@ def _write_node_headers(self):
# check if file already exists
if os.path.exists(header_path):
logger.warning(
- f'File {header_path} already exists. Overwriting.'
+ f"File {header_path} already exists. Overwriting."
)
# concatenate key:value in props
props_list = []
for k in props.keys():
-
- props_list.append(f'{k}')
+ props_list.append(f"{k}")
# create list of lists and flatten
# removes need for empty check of property list
out_list = [[_id], props_list]
out_list = [val for sublist in out_list for val in sublist]
- with open(header_path, 'w', encoding='utf-8') as f:
+ with open(header_path, "w", encoding="utf-8") as f:
# concatenate with delimiter
row = self.delim.join(out_list)
f.write(row)
# add collection from schema config
collection = self.extended_schema[label].get(
- 'db_collection_name', None
+ "db_collection_name", None
)
# add file path to neo4 admin import statement
@@ -1341,14 +1344,12 @@ def _write_node_headers(self):
parts = self.parts.get(label, [])
if not parts:
-
raise ValueError(
- f'No parts found for node label {label}. '
- f'Check that the data was parsed first.',
+ f"No parts found for node label {label}. "
+ f"Check that the data was parsed first.",
)
for part in parts:
-
import_call_header_path = os.path.join(
self.import_call_file_prefix,
header,
@@ -1358,7 +1359,13 @@ def _write_node_headers(self):
part,
)
- self.import_call_nodes.add((import_call_header_path, import_call_parts_path, collection))
+ self.import_call_nodes.add(
+ (
+ import_call_header_path,
+ import_call_parts_path,
+ collection,
+ )
+ )
return True
@@ -1374,54 +1381,50 @@ def _write_edge_headers(self):
# load headers from data parse
if not self.edge_property_dict:
logger.error(
- 'Header information not found. Was the data parsed first?',
+ "Header information not found. Was the data parsed first?",
)
return False
for label, props in self.edge_property_dict.items():
-
# translate label to PascalCase
pascal_label = self.translator.name_sentence_to_pascal(label)
# paths
- header = f'{pascal_label}-header.csv'
+ header = f"{pascal_label}-header.csv"
header_path = os.path.join(
self.outdir,
header,
)
- parts = f'{pascal_label}-part.*'
+ parts = f"{pascal_label}-part.*"
# check for file exists
if os.path.exists(header_path):
logger.warning(
- f'Header file {header_path} already exists. Overwriting.'
+ f"Header file {header_path} already exists. Overwriting."
)
# concatenate key:value in props
props_list = []
for k in props.keys():
+ props_list.append(f"{k}")
- props_list.append(f'{k}')
+ out_list = ["_from", "_key", *props_list, "_to"]
- out_list = ['_from', '_key', *props_list, '_to']
-
- with open(header_path, 'w', encoding='utf-8') as f:
+ with open(header_path, "w", encoding="utf-8") as f:
# concatenate with delimiter
row = self.delim.join(out_list)
f.write(row)
# add collection from schema config
if not self.extended_schema.get(label):
-
for _, v in self.extended_schema.items():
- if v.get('label_as_edge') == label:
- collection = v.get('db_collection_name', None)
+ if v.get("label_as_edge") == label:
+ collection = v.get("db_collection_name", None)
break
else:
-
collection = self.extended_schema[label].get(
- 'db_collection_name', None
+ "db_collection_name", None
)
# add file path to neo4 admin import statement (import call path
@@ -1434,7 +1437,13 @@ def _write_edge_headers(self):
self.import_call_file_prefix,
parts,
)
- self.import_call_edges.add((header_import_call_path, parts_import_call_path, collection,))
+ self.import_call_edges.add(
+ (
+ header_import_call_path,
+ parts_import_call_path,
+ collection,
+ )
+ )
return True
@@ -1449,8 +1458,8 @@ def _construct_import_call(self) -> str:
str: a bash command for neo4j-admin import
"""
import_call = (
- f'{self.import_call_bin_prefix}arangoimp '
- f'--type csv '
+ f"{self.import_call_bin_prefix}arangoimp "
+ f"--type csv "
f'--separator="{self.escaped_delim}" '
)
@@ -1459,23 +1468,22 @@ def _construct_import_call(self) -> str:
else:
import_call += f"--quote='{self.quote}' "
- node_lines = ''
+ node_lines = ""
# node import calls: one line per node type
for header_path, parts_path, collection in self.import_call_nodes:
-
line = (
- f'{import_call} '
- f'--headers-file {header_path} '
- f'--file= {parts_path} '
+ f"{import_call} "
+ f"--headers-file {header_path} "
+ f"--file= {parts_path} "
)
if collection:
- line += f'--create-collection --collection {collection} '
+ line += f"--create-collection --collection {collection} "
- node_lines += f'{line}\n'
+ node_lines += f"{line}\n"
- edge_lines = ''
+ edge_lines = ""
# edge import calls: one line per edge type
for header_path, parts_path, collection in self.import_call_edges:
@@ -1502,15 +1510,15 @@ class _PostgreSQLBatchWriter(_BatchWriter):
"""
DATA_TYPE_LOOKUP = {
- 'str': 'VARCHAR', # VARCHAR needs limit
- 'int': 'INTEGER',
- 'long': 'BIGINT',
- 'float': 'NUMERIC',
- 'double': 'NUMERIC',
- 'dbl': 'NUMERIC',
- 'boolean': 'BOOLEAN',
- 'str[]': 'VARCHAR[]',
- 'string[]': 'VARCHAR[]'
+ "str": "VARCHAR", # VARCHAR needs limit
+ "int": "INTEGER",
+ "long": "BIGINT",
+ "float": "NUMERIC",
+ "double": "NUMERIC",
+ "dbl": "NUMERIC",
+ "boolean": "BOOLEAN",
+ "str[]": "VARCHAR[]",
+ "string[]": "VARCHAR[]",
}
def __init__(self, *args, **kwargs):
@@ -1524,7 +1532,7 @@ def _get_default_import_call_bin_prefix(self):
Returns:
str: The default location for the psql command
"""
- return ''
+ return ""
def _get_data_type(self, string) -> str:
try:
@@ -1533,7 +1541,7 @@ def _get_data_type(self, string) -> str:
logger.info(
'Could not determine data type {string}. Using default "VARCHAR"'
)
- return 'VARCHAR'
+ return "VARCHAR"
def _write_array_string(self, string_list) -> str:
"""
@@ -1546,7 +1554,7 @@ def _write_array_string(self, string_list) -> str:
Returns:
str: The string representation of an array for postgres COPY
"""
- string = ','.join(string_list)
+ string = ",".join(string_list)
string = f'"{{{string}}}"'
return string
@@ -1557,10 +1565,10 @@ def _get_import_script_name(self) -> str:
Returns:
str: The name of the import script (ending in .sh)
"""
- return f'{self.db_name}-import-call.sh'
+ return f"{self.db_name}-import-call.sh"
def _adjust_pascal_to_psql(self, string):
- string = string.replace('.', '_')
+ string = string.replace(".", "_")
string = string.lower()
return string
@@ -1576,7 +1584,7 @@ def _write_node_headers(self):
# load headers from data parse
if not self.node_property_dict:
logger.error(
- 'Header information not found. Was the data parsed first?',
+ "Header information not found. Was the data parsed first?",
)
return False
@@ -1586,7 +1594,7 @@ def _write_node_headers(self):
# translate label to PascalCase
pascal_label = self.translator.name_sentence_to_pascal(label)
- parts = f'{pascal_label}-part*.csv'
+ parts = f"{pascal_label}-part*.csv"
parts_paths = os.path.join(self.outdir, parts)
parts_paths = glob.glob(parts_paths)
parts_paths.sort()
@@ -1595,36 +1603,36 @@ def _write_node_headers(self):
pascal_label = self._adjust_pascal_to_psql(pascal_label)
table_create_command_path = os.path.join(
self.outdir,
- f'{pascal_label}-create_table.sql',
+ f"{pascal_label}-create_table.sql",
)
# check if file already exists
if os.path.exists(table_create_command_path):
logger.warning(
- f'File {table_create_command_path} already exists. Overwriting.',
+ f"File {table_create_command_path} already exists. Overwriting.",
)
# concatenate key:value in props
- columns = ['_ID VARCHAR']
+ columns = ["_ID VARCHAR"]
for col_name, col_type in props.items():
col_type = self._get_data_type(col_type)
col_name = self._adjust_pascal_to_psql(col_name)
- columns.append(f'{col_name} {col_type}')
- columns.append('_LABEL VARCHAR[]')
+ columns.append(f"{col_name} {col_type}")
+ columns.append("_LABEL VARCHAR[]")
- with open(table_create_command_path, 'w', encoding='utf-8') as f:
-
- command = ''
+ with open(table_create_command_path, "w", encoding="utf-8") as f:
+ command = ""
if self.wipe:
- command += f'DROP TABLE IF EXISTS {pascal_label};\n'
+ command += f"DROP TABLE IF EXISTS {pascal_label};\n"
# table creation requires comma separation
- command += f'CREATE TABLE {pascal_label}({",".join(columns)});\n'
+ command += (
+ f'CREATE TABLE {pascal_label}({",".join(columns)});\n'
+ )
f.write(command)
for parts_path in parts_paths:
-
- # if import_call_file_prefix is set, replace actual path
+ # if import_call_file_prefix is set, replace actual path
# with prefix
if self.import_call_file_prefix != self.outdir:
parts_path = parts_path.replace(
@@ -1633,7 +1641,7 @@ def _write_node_headers(self):
)
self._copy_from_csv_commands.add(
- f'\\copy {pascal_label} FROM \'{parts_path}\' DELIMITER E\'{self.delim}\' CSV;'
+ f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
)
# add file path to import statement
@@ -1661,16 +1669,15 @@ def _write_edge_headers(self):
# load headers from data parse
if not self.edge_property_dict:
logger.error(
- 'Header information not found. Was the data parsed first?',
+ "Header information not found. Was the data parsed first?",
)
return False
for label, props in self.edge_property_dict.items():
-
# translate label to PascalCase
pascal_label = self.translator.name_sentence_to_pascal(label)
- parts_paths = os.path.join(self.outdir, f'{pascal_label}-part*.csv')
+ parts_paths = os.path.join(self.outdir, f"{pascal_label}-part*.csv")
parts_paths = glob.glob(parts_paths)
parts_paths.sort()
@@ -1678,13 +1685,13 @@ def _write_edge_headers(self):
pascal_label = self._adjust_pascal_to_psql(pascal_label)
table_create_command_path = os.path.join(
self.outdir,
- f'{pascal_label}-create_table.sql',
+ f"{pascal_label}-create_table.sql",
)
# check for file exists
if os.path.exists(table_create_command_path):
logger.warning(
- f'File {table_create_command_path} already exists. Overwriting.',
+ f"File {table_create_command_path} already exists. Overwriting.",
)
# concatenate key:value in props
@@ -1692,7 +1699,7 @@ def _write_edge_headers(self):
for col_name, col_type in props.items():
col_type = self._get_data_type(col_type)
col_name = self._adjust_pascal_to_psql(col_name)
- if col_name == '_ID':
+ if col_name == "_ID":
# should ideally never happen
raise ValueError(
"Column name '_ID' is reserved for internal use, "
@@ -1700,26 +1707,30 @@ def _write_edge_headers(self):
"different name for your column."
)
- columns.append(f'{col_name} {col_type}')
+ columns.append(f"{col_name} {col_type}")
# create list of lists and flatten
# removes need for empty check of property list
out_list = [
- '_START_ID VARCHAR', '_ID VARCHAR', *columns, '_END_ID VARCHAR',
- '_TYPE VARCHAR'
+ "_START_ID VARCHAR",
+ "_ID VARCHAR",
+ *columns,
+ "_END_ID VARCHAR",
+ "_TYPE VARCHAR",
]
- with open(table_create_command_path, 'w', encoding='utf-8') as f:
- command = ''
+ with open(table_create_command_path, "w", encoding="utf-8") as f:
+ command = ""
if self.wipe:
- command += f'DROP TABLE IF EXISTS {pascal_label};\n'
+ command += f"DROP TABLE IF EXISTS {pascal_label};\n"
# table creation requires comma separation
- command += f'CREATE TABLE {pascal_label}({",".join(out_list)});\n'
+ command += (
+ f'CREATE TABLE {pascal_label}({",".join(out_list)});\n'
+ )
f.write(command)
for parts_path in parts_paths:
-
# if import_call_file_prefix is set, replace actual path
# with prefix
if self.import_call_file_prefix != self.outdir:
@@ -1729,7 +1740,7 @@ def _write_edge_headers(self):
)
self._copy_from_csv_commands.add(
- f'\\copy {pascal_label} FROM \'{parts_path}\' DELIMITER E\'{self.delim}\' CSV;'
+ f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
)
# add file path to import statement
@@ -1740,7 +1751,7 @@ def _write_edge_headers(self):
self.outdir,
self.import_call_file_prefix,
)
-
+
self.import_call_edges.add(table_create_command_path)
return True
@@ -1755,59 +1766,62 @@ def _construct_import_call(self) -> str:
Returns:
str: a bash command for postgresql import
"""
- import_call = ''
+ import_call = ""
# create tables
# At this point, csv files of nodes and edges do not require differentiation
for import_file_path in [
- *self.import_call_nodes, *self.import_call_edges
+ *self.import_call_nodes,
+ *self.import_call_edges,
]:
import_call += f'echo "Setup {import_file_path}..."\n'
if {self.db_password}:
# set password variable inline
- import_call += f'PGPASSWORD={self.db_password} '
- import_call += f'{self.import_call_bin_prefix}psql -f {import_file_path}'
- import_call += f' --dbname {self.db_name}'
- import_call += f' --port {self.db_port}'
- import_call += f' --user {self.db_user}'
+ import_call += f"PGPASSWORD={self.db_password} "
+ import_call += (
+ f"{self.import_call_bin_prefix}psql -f {import_file_path}"
+ )
+ import_call += f" --dbname {self.db_name}"
+ import_call += f" --port {self.db_port}"
+ import_call += f" --user {self.db_user}"
import_call += '\necho "Done!"\n'
- import_call += '\n'
+ import_call += "\n"
# copy data to tables
for command in self._copy_from_csv_commands:
- table_part = command.split(' ')[3]
+ table_part = command.split(" ")[3]
import_call += f'echo "Importing {table_part}..."\n'
if {self.db_password}:
# set password variable inline
- import_call += f'PGPASSWORD={self.db_password} '
+ import_call += f"PGPASSWORD={self.db_password} "
import_call += f'{self.import_call_bin_prefix}psql -c "{command}"'
- import_call += f' --dbname {self.db_name}'
- import_call += f' --port {self.db_port}'
- import_call += f' --user {self.db_user}'
+ import_call += f" --dbname {self.db_name}"
+ import_call += f" --port {self.db_port}"
+ import_call += f" --user {self.db_user}"
import_call += '\necho "Done!"\n'
- import_call += '\n'
+ import_call += "\n"
return import_call
DBMS_TO_CLASS = {
- 'neo': _Neo4jBatchWriter,
- 'neo4j': _Neo4jBatchWriter,
- 'Neo4j': _Neo4jBatchWriter,
- 'postgres': _PostgreSQLBatchWriter,
- 'postgresql': _PostgreSQLBatchWriter,
- 'PostgreSQL': _PostgreSQLBatchWriter,
- 'arango': _ArangoDBBatchWriter,
- 'arangodb': _ArangoDBBatchWriter,
- 'ArangoDB': _ArangoDBBatchWriter,
+ "neo": _Neo4jBatchWriter,
+ "neo4j": _Neo4jBatchWriter,
+ "Neo4j": _Neo4jBatchWriter,
+ "postgres": _PostgreSQLBatchWriter,
+ "postgresql": _PostgreSQLBatchWriter,
+ "PostgreSQL": _PostgreSQLBatchWriter,
+ "arango": _ArangoDBBatchWriter,
+ "arangodb": _ArangoDBBatchWriter,
+ "ArangoDB": _ArangoDBBatchWriter,
}
def get_writer(
dbms: str,
- translator: 'Translator',
- ontology: 'Ontology',
- deduplicator: 'Deduplicator',
+ translator: "Translator",
+ ontology: "Ontology",
+ deduplicator: "Deduplicator",
output_directory: str,
strict_mode: bool,
):
@@ -1835,34 +1849,36 @@ def get_writer(
dbms_config = _config(dbms)
- timestamp = lambda: datetime.now().strftime('%Y%m%d%H%M%S')
- outdir = output_directory or os.path.join('biocypher-out', timestamp())
+ timestamp = lambda: datetime.now().strftime("%Y%m%d%H%M%S")
+ outdir = output_directory or os.path.join("biocypher-out", timestamp())
outdir = os.path.abspath(outdir)
writer = DBMS_TO_CLASS[dbms]
if not writer:
- raise ValueError(f'Unknown dbms: {dbms}')
+ raise ValueError(f"Unknown dbms: {dbms}")
if writer is not None:
return writer(
ontology=ontology,
translator=translator,
deduplicator=deduplicator,
- delimiter=dbms_config.get('delimiter'),
- array_delimiter=dbms_config.get('array_delimiter'),
- quote=dbms_config.get('quote_character'),
+ delimiter=dbms_config.get("delimiter"),
+ array_delimiter=dbms_config.get("array_delimiter"),
+ quote=dbms_config.get("quote_character"),
output_directory=outdir,
- db_name=dbms_config.get('database_name'),
- import_call_bin_prefix=dbms_config.get('import_call_bin_prefix'),
- import_call_file_prefix=dbms_config.get('import_call_file_prefix'),
- wipe=dbms_config.get('wipe'),
+ db_name=dbms_config.get("database_name"),
+ import_call_bin_prefix=dbms_config.get("import_call_bin_prefix"),
+ import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
+ wipe=dbms_config.get("wipe"),
strict_mode=strict_mode,
- skip_bad_relationships=dbms_config.get('skip_bad_relationships'
- ), # neo4j
- skip_duplicate_nodes=dbms_config.get('skip_duplicate_nodes'
- ), # neo4j
- db_user=dbms_config.get('user'), # psql
- db_password=dbms_config.get('password'), # psql
- db_port=dbms_config.get('port'), # psql
+ skip_bad_relationships=dbms_config.get(
+ "skip_bad_relationships"
+ ), # neo4j
+ skip_duplicate_nodes=dbms_config.get(
+ "skip_duplicate_nodes"
+ ), # neo4j
+ db_user=dbms_config.get("user"), # psql
+ db_password=dbms_config.get("password"), # psql
+ db_port=dbms_config.get("port"), # psql
)
diff --git a/docs/adapters.md b/docs/adapters.md
index 4cf3c6b9..20feaee7 100644
--- a/docs/adapters.md
+++ b/docs/adapters.md
@@ -30,7 +30,7 @@ tutorial.
::::
The project view is built from issues in the [BioCypher GitHub repository](
-https://github.com/biocypher/biocypher/issues), which carry ``Fields`` (a
+https://github.com/biocypher/biocypher/issues), which carry ``Fields`` (a
GitHub Projects-specific attribute) to describe their category and features. In
detail, these are as follows:
@@ -118,4 +118,4 @@ RETURN n
```
For more information on how to use the graph, please refer to the [Neo4j
-documentation](https://neo4j.com/docs/).
\ No newline at end of file
+documentation](https://neo4j.com/docs/).
diff --git a/docs/conf.py b/docs/conf.py
index 5c85a849..d51ea793 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,61 +22,60 @@
# -- Project information -----------------------------------------------------
-project = 'BioCypher'
+project = "BioCypher"
version = biocypher.__version__
-author = ', '.join(biocypher.__author__)
-copyright = f'2021-{datetime.now():%Y}, BioCypher developers'
+author = ", ".join(biocypher.__author__)
+copyright = f"2021-{datetime.now():%Y}, BioCypher developers"
# -- General configuration ---------------------------------------------------
# TOC only in sidebar
-master_doc = 'contents'
+master_doc = "contents"
html_sidebars = {
- '**':
- [
- 'globaltoc.html',
- 'relations.html',
- 'sourcelink.html',
- 'searchbox.html',
- ],
+ "**": [
+ "globaltoc.html",
+ "relations.html",
+ "sourcelink.html",
+ "searchbox.html",
+ ],
}
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
- 'sphinx.ext.autodoc',
- 'sphinx.ext.napoleon',
- 'sphinx.ext.todo', # not for output but to remove warnings
- 'sphinxext.opengraph',
- 'myst_parser', # markdown support
- 'sphinx_rtd_theme',
- 'sphinx_design',
+ "sphinx.ext.autodoc",
+ "sphinx.ext.napoleon",
+ "sphinx.ext.todo", # not for output but to remove warnings
+ "sphinxext.opengraph",
+ "myst_parser", # markdown support
+ "sphinx_rtd_theme",
+ "sphinx_design",
]
-myst_enable_extensions = ['colon_fence']
+myst_enable_extensions = ["colon_fence"]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'biocypher-log/']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "biocypher-log/"]
# -- Autodoc configuration ---------------------------------------------------
-autodoc_mock_imports = ['bmt', 'neo4j-utils']
+autodoc_mock_imports = ["bmt", "neo4j-utils"]
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_title = 'BioCypher'
-html_theme = 'sphinx_rtd_theme'
+html_title = "BioCypher"
+html_theme = "sphinx_rtd_theme"
html_theme_options = {
- 'navigation_depth': 2,
- 'collapse_navigation': True,
+ "navigation_depth": 2,
+ "collapse_navigation": True,
}
# Add any paths that contain custom static files (such as style sheets) here,
@@ -86,8 +85,8 @@
# -- OpenGraph configuration -------------------------------------------------
-ogp_site_url = 'https://biocypher.org'
-ogp_image = 'https://biocypher.org/_images/biocypher-open-graph.png'
+ogp_site_url = "https://biocypher.org"
+ogp_image = "https://biocypher.org/_images/biocypher-open-graph.png"
ogp_custom_meta_tags = [
'',
'',
@@ -95,4 +94,4 @@
'',
'',
]
-ogp_enable_meta_description = True
\ No newline at end of file
+ogp_enable_meta_description = True
diff --git a/docs/index.rst b/docs/index.rst
index 4ee91a38..2480dd53 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -114,4 +114,4 @@ information.
:link: https://github.com/biocypher/biochatter
:text-align: center
- :octicon:`mark-github;3em` :octicon:`repo;3em`
\ No newline at end of file
+ :octicon:`mark-github;3em` :octicon:`repo;3em`
diff --git a/docs/r-bioc.md b/docs/r-bioc.md
index 74f4ecd3..383b4559 100644
--- a/docs/r-bioc.md
+++ b/docs/r-bioc.md
@@ -2,4 +2,4 @@
We are working on a Bioconductor package to make BioCypher functionality
available to the R community. The current work in progess is available in [this
repository](https://vjcitn.github.io/biocBiocypher/index.html). If you are
-interested in contributing or using the package, please get in touch!
\ No newline at end of file
+interested in contributing or using the package, please get in touch!
diff --git a/docs/tutorial-adapter.md b/docs/tutorial-adapter.md
index ff8ba903..567b9f04 100644
--- a/docs/tutorial-adapter.md
+++ b/docs/tutorial-adapter.md
@@ -43,7 +43,7 @@ There are currently two 'flavours' of adapters. The first is simpler and used in
workflows that are similar to harmonisation scripts, where the BioCypher
interface is instantiated in the same script as the adapter(s). In the second,
the BioCypher interface is contained in the adapter class, which makes for a
-more complex architecture, but allows for more involved workflows. In
+more complex architecture, but allows for more involved workflows. In
pseudo-code, the two approaches look like this:
```{code-block} python
@@ -109,7 +109,7 @@ Graph](https://github.com/IGVF-DACC/igvf-catalog/tree/main/data) and the
[Clinical Knowledge
Graph migration](https://github.com/biocypher/clinical-knowledge-graph).
-```{note}
+```{note}
While there are differences in implementation details, both approaches are
largely functionally equivalent. At the current time, there is no clear
diff --git a/docs/tutorial.md b/docs/tutorial.md
index 7effe22f..345e1cd8 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -567,4 +567,4 @@ protein protein interaction:
represented_as: edge
use_id: false
# ...
-```
\ No newline at end of file
+```
diff --git a/docs/user-experiences.md b/docs/user-experiences.md
index 5a76d14f..44d5e961 100644
--- a/docs/user-experiences.md
+++ b/docs/user-experiences.md
@@ -9,7 +9,7 @@ repositories ("storage") and (2) project-specific knowledge graph creation
## A Knowledge Graph for Impact of Genomic Variation on Function (IGVF)
-:::{card} Impact of Genomic Variation on Function (IGVF)
+:::{card} Impact of Genomic Variation on Function (IGVF)
:link: https://www.igvf.org/
The Impact of Genomic Variation on Function (IGVF) project aims to provide a
@@ -28,7 +28,7 @@ creating a user-facing API (and eventually UI) that will access this graph.
BioCypher, which acts as an intermediary between Biolink and graph databases (we
are using ArangoDB) has been instrumental in helping us design the schema and
move our project forward. Specifically, it provides a framework we can use to
-parse the dozens of data files and formats into a Biolink-inspired schema.
+parse the dozens of data files and formats into a Biolink-inspired schema.
— Ben Hitz, Director of Genomics Data Resources, Project Manager ENCODE,
Stanford University
@@ -37,10 +37,10 @@ Stanford University
The BioCypher pipeline used to build the knowledge graph uses several adapters
for genetics data sources; an overview is available in our
-[meta-graph](metagraph) and on the [GitHub Components
+[meta-graph](metagraph) and on the [GitHub Components
Board](https://github.com/orgs/biocypher/projects/3) (pipelines column). The
pipeline boasts a Docker Compose workflow that builds the graph and the API
-(using [tRPC](https://trpc.io/)), and is available on
+(using [tRPC](https://trpc.io/)), and is available on
[GitHub](https://github.com/IGVF-DACC/igvf-catalog).
## Drug Repurposing with CROssBAR
@@ -72,7 +72,7 @@ multiple genes/proteins, compounds/drugs, diseases, phenotypes, pathways, or any
combination of those, this procedure gets extremely complicated, requiring an
average of 64 NoSQL queries to construct one single user-specific KG. The total
number of lines of code required for this procedure alone is around 8000.
-This task could have been achieved significantly faster and more efficiently
+This task could have been achieved significantly faster and more efficiently
if we had had BioCypher five years ago.
— Tunca Doğan, Department of Computer Engineering and Artificial Intelligence
@@ -84,7 +84,7 @@ Institute (EMBL-EBI)
Using BioCypher, CROssBAR v2 will be a flexible property graph database
comprised of single input adapters for each data source. As above, you can see
-its current state in the [meta-graph](metagraph) and on the [GitHub Components
+its current state in the [meta-graph](metagraph) and on the [GitHub Components
Board](https://github.com/orgs/biocypher/projects/3) (pipelines column).
## Builing a Knowledge Graph for Contextualised Metabolic-Enzymatic Interactions
@@ -124,4 +124,4 @@ The BioCypher pipeline used to build the knowledge graph uses several adapters,
some of which overlap with the CROssBAR project, which helps synergising
maintenance efforts. An overview is available in our
[meta-graph](metagraph) and on the [GitHub Components
-Board](https://github.com/orgs/biocypher/projects/3) (pipelines column).
\ No newline at end of file
+Board](https://github.com/orgs/biocypher/projects/3) (pipelines column).
diff --git a/test/conftest.py b/test/conftest.py
index 0a5abf48..c23a77fb 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -14,8 +14,8 @@
_ArangoDBBatchWriter,
_PostgreSQLBatchWriter,
)
-from biocypher._pandas import Pandas
from biocypher._create import BioCypherEdge, BioCypherNode
+from biocypher._pandas import Pandas
from biocypher._connect import _Neo4jDriver
from biocypher._mapping import OntologyMapping
from biocypher._ontology import Ontology, OntologyAdapter
@@ -25,29 +25,26 @@
# CLI option parser
def pytest_addoption(parser):
-
options = (
# neo4j
- ('database_name', 'The Neo4j database to be used for tests.'),
- ('user', 'Tests access Neo4j as this user.'),
- ('password', 'Password to access Neo4j.'),
- ('uri', 'URI of the Neo4j server.'),
-
+ ("database_name", "The Neo4j database to be used for tests."),
+ ("user", "Tests access Neo4j as this user."),
+ ("password", "Password to access Neo4j."),
+ ("uri", "URI of the Neo4j server."),
# postgresl
(
- 'database_name_postgresql',
- 'The PostgreSQL database to be used for tests. Defaults to "postgresql-biocypher-test-TG2C7GsdNw".'
+ "database_name_postgresql",
+ 'The PostgreSQL database to be used for tests. Defaults to "postgresql-biocypher-test-TG2C7GsdNw".',
),
- ('user_postgresql', 'Tests access PostgreSQL as this user.'),
- ('password_postgresql', 'Password to access PostgreSQL.'),
- ('port_postgresql', 'Port of the PostgreSQL server.'),
+ ("user_postgresql", "Tests access PostgreSQL as this user."),
+ ("password_postgresql", "Password to access PostgreSQL."),
+ ("port_postgresql", "Port of the PostgreSQL server."),
)
for name, help_ in options:
-
parser.addoption(
- f'--{name}',
- action='store',
+ f"--{name}",
+ action="store",
default=None,
help=help_,
)
@@ -56,33 +53,33 @@ def pytest_addoption(parser):
# temporary output paths
def get_random_string(length):
letters = string.ascii_lowercase
- return ''.join(random.choice(letters) for _ in range(length))
+ return "".join(random.choice(letters) for _ in range(length))
# biocypher node generator
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def _get_nodes(l: int) -> list:
nodes = []
for i in range(l):
bnp = BioCypherNode(
- node_id=f'p{i+1}',
- node_label='protein',
- preferred_id='uniprot',
+ node_id=f"p{i+1}",
+ node_label="protein",
+ preferred_id="uniprot",
properties={
- 'score': 4 / (i + 1),
- 'name': 'StringProperty1',
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2'],
+ "score": 4 / (i + 1),
+ "name": "StringProperty1",
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
},
)
nodes.append(bnp)
bnm = BioCypherNode(
- node_id=f'm{i+1}',
- node_label='microRNA',
- preferred_id='mirbase',
+ node_id=f"m{i+1}",
+ node_label="microRNA",
+ preferred_id="mirbase",
properties={
- 'name': 'StringProperty1',
- 'taxon': 9606,
+ "name": "StringProperty1",
+ "taxon": 9606,
},
)
nodes.append(bnm)
@@ -91,31 +88,31 @@ def _get_nodes(l: int) -> list:
# biocypher edge generator
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def _get_edges(l):
edges = []
for i in range(l):
e1 = BioCypherEdge(
- relationship_id=f'prel{i}',
- source_id=f'p{i}',
- target_id=f'p{i + 1}',
- relationship_label='PERTURBED_IN_DISEASE',
+ relationship_id=f"prel{i}",
+ source_id=f"p{i}",
+ target_id=f"p{i + 1}",
+ relationship_label="PERTURBED_IN_DISEASE",
properties={
- 'residue': 'T253',
- 'level': 4,
+ "residue": "T253",
+ "level": 4,
},
# we suppose the verb-form relationship label is created by
# translation functionality in translate.py
)
edges.append(e1)
e2 = BioCypherEdge(
- relationship_id=f'mrel{i}',
- source_id=f'm{i}',
- target_id=f'p{i + 1}',
- relationship_label='Is_Mutated_In',
+ relationship_id=f"mrel{i}",
+ source_id=f"m{i}",
+ target_id=f"p{i + 1}",
+ relationship_label="Is_Mutated_In",
properties={
- 'site': '3-UTR',
- 'confidence': 1,
+ "site": "3-UTR",
+ "confidence": 1,
},
# we suppose the verb-form relationship label is created by
# translation functionality in translate.py
@@ -123,95 +120,95 @@ def _get_edges(l):
edges.append(e2)
return edges
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
def deduplicator():
return Deduplicator()
-@pytest.fixture(scope='module')
+
+@pytest.fixture(scope="module")
def ontology_mapping():
return OntologyMapping(
- config_file='biocypher/_config/test_schema_config.yaml'
+ config_file="biocypher/_config/test_schema_config.yaml"
)
-@pytest.fixture(scope='module')
+
+@pytest.fixture(scope="module")
def extended_ontology_mapping():
return OntologyMapping(
- config_file='biocypher/_config/test_schema_config_extended.yaml'
+ config_file="biocypher/_config/test_schema_config_extended.yaml"
)
-@pytest.fixture(scope='module')
+
+@pytest.fixture(scope="module")
def disconnected_mapping():
return OntologyMapping(
- config_file='biocypher/_config/test_schema_config_disconnected.yaml'
+ config_file="biocypher/_config/test_schema_config_disconnected.yaml"
)
-@pytest.fixture(scope='module')
+
+@pytest.fixture(scope="module")
def translator(extended_ontology_mapping):
return Translator(extended_ontology_mapping)
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def biolink_adapter():
return OntologyAdapter(
- 'https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl',
- 'entity'
+ "https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl",
+ "entity",
)
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def so_adapter():
- return OntologyAdapter('test/so.owl', 'sequence_variant')
+ return OntologyAdapter("test/so.owl", "sequence_variant")
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def go_adapter():
- return OntologyAdapter('test/go.owl', 'molecular_function')
+ return OntologyAdapter("test/go.owl", "molecular_function")
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def mondo_adapter():
- return OntologyAdapter('test/mondo.owl', 'disease')
+ return OntologyAdapter("test/mondo.owl", "disease")
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def hybrid_ontology(extended_ontology_mapping):
return Ontology(
head_ontology={
- 'url':
- 'https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl',
- 'root_node':
- 'entity',
+ "url": "https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl",
+ "root_node": "entity",
},
ontology_mapping=extended_ontology_mapping,
tail_ontologies={
- 'so':
- {
- 'url': 'test/so.owl',
- 'head_join_node': 'sequence variant',
- 'tail_join_node': 'sequence_variant',
- },
- 'mondo':
- {
- 'url': 'test/mondo.owl',
- 'head_join_node': 'disease',
- 'tail_join_node': 'human disease',
- 'merge_nodes': False,
- }
+ "so": {
+ "url": "test/so.owl",
+ "head_join_node": "sequence variant",
+ "tail_join_node": "sequence_variant",
+ },
+ "mondo": {
+ "url": "test/mondo.owl",
+ "head_join_node": "disease",
+ "tail_join_node": "human disease",
+ "merge_nodes": False,
+ },
},
)
# neo4j batch writer fixtures
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def bw(hybrid_ontology, translator, deduplicator, tmp_path):
-
bw = _Neo4jBatchWriter(
ontology=hybrid_ontology,
translator=translator,
deduplicator=deduplicator,
output_directory=tmp_path,
- delimiter=';',
- array_delimiter='|',
+ delimiter=";",
+ array_delimiter="|",
quote="'",
)
@@ -224,16 +221,15 @@ def bw(hybrid_ontology, translator, deduplicator, tmp_path):
# neo4j batch writer fixtures
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def bw_tab(hybrid_ontology, translator, deduplicator, tmp_path):
-
bw_tab = _Neo4jBatchWriter(
ontology=hybrid_ontology,
translator=translator,
deduplicator=deduplicator,
output_directory=tmp_path,
- delimiter='\\t',
- array_delimiter='|',
+ delimiter="\\t",
+ array_delimiter="|",
quote="'",
)
@@ -245,16 +241,15 @@ def bw_tab(hybrid_ontology, translator, deduplicator, tmp_path):
os.rmdir(tmp_path)
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def bw_strict(hybrid_ontology, translator, deduplicator, tmp_path):
-
bw = _Neo4jBatchWriter(
ontology=hybrid_ontology,
translator=translator,
deduplicator=deduplicator,
output_directory=tmp_path,
- delimiter=';',
- array_delimiter='|',
+ delimiter=";",
+ array_delimiter="|",
quote="'",
strict_mode=True,
)
@@ -268,36 +263,31 @@ def bw_strict(hybrid_ontology, translator, deduplicator, tmp_path):
# core instance fixture
-@pytest.fixture(name='core', scope='function')
+@pytest.fixture(name="core", scope="function")
def create_core(request, tmp_path):
-
# TODO why does the integration test use a different path than this fixture?
- marker = request.node.get_closest_marker('inject_core_args')
+ marker = request.node.get_closest_marker("inject_core_args")
marker_args = {}
# check if marker has attribute param
- if marker and hasattr(marker, 'param'):
-
+ if marker and hasattr(marker, "param"):
marker_args = marker.param
- if not marker_args and 'CORE' in globals():
-
- c = globals()['CORE']
+ if not marker_args and "CORE" in globals():
+ c = globals()["CORE"]
else:
-
core_args = {
- 'schema_config_path': 'biocypher/_config/test_schema_config.yaml',
- 'output_directory': tmp_path,
+ "schema_config_path": "biocypher/_config/test_schema_config.yaml",
+ "output_directory": tmp_path,
}
core_args.update(marker_args)
c = BioCypher(**core_args)
if not marker_args:
-
- globals()['CORE'] = c
+ globals()["CORE"] = c
c._deduplicator = Deduplicator()
# seems to reuse deduplicator from previous test, unsure why
@@ -309,7 +299,8 @@ def create_core(request, tmp_path):
os.remove(os.path.join(tmp_path, f))
os.rmdir(tmp_path)
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
def _pd(deduplicator):
return Pandas(
ontology=None,
@@ -317,22 +308,21 @@ def _pd(deduplicator):
deduplicator=deduplicator,
)
+
# neo4j parameters
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def neo4j_param(request):
-
keys = (
- 'database_name',
- 'user',
- 'password',
- 'uri',
+ "database_name",
+ "user",
+ "password",
+ "uri",
)
- param = bcy_config('neo4j')
+ param = bcy_config("neo4j")
cli = {
- key: request.config.getoption(f'--{key}') or param[key]
- for key in keys
+ key: request.config.getoption(f"--{key}") or param[key] for key in keys
}
return cli
@@ -341,118 +331,104 @@ def neo4j_param(request):
# skip test if neo4j is offline
@pytest.fixture(autouse=True)
def skip_if_offline_neo4j(request, neo4j_param, translator, hybrid_ontology):
-
- marker = request.node.get_closest_marker('requires_neo4j')
+ marker = request.node.get_closest_marker("requires_neo4j")
if marker:
-
try:
-
marker_args = {}
# check if marker has attribute param
- if marker and hasattr(marker, 'param'):
-
+ if marker and hasattr(marker, "param"):
marker_args = marker.param
driver_args = {
- 'wipe': True,
- 'multi_db': True,
- 'translator': translator,
- 'ontology': hybrid_ontology,
+ "wipe": True,
+ "multi_db": True,
+ "translator": translator,
+ "ontology": hybrid_ontology,
}
driver_args.update(marker_args)
driver_args.update(neo4j_param)
- driver_args['database_name'] = 'test'
+ driver_args["database_name"] = "test"
_Neo4jDriver(**driver_args)
except ServiceUnavailable as e:
-
- pytest.skip(f'Neo4j is offline: {e}')
+ pytest.skip(f"Neo4j is offline: {e}")
# neo4j driver fixture
-@pytest.fixture(name='driver', scope='function')
+@pytest.fixture(name="driver", scope="function")
def create_driver(request, neo4j_param, translator, hybrid_ontology):
-
marker = None # request.node.get_closest_marker('inject_driver_args')
marker_args = {}
# check if marker has attribute param
- if marker and hasattr(marker, 'param'):
-
+ if marker and hasattr(marker, "param"):
marker_args = marker.param
- if not marker_args and 'DRIVER' in globals():
-
- d = globals()['DRIVER']
+ if not marker_args and "DRIVER" in globals():
+ d = globals()["DRIVER"]
else:
-
driver_args = {
- 'wipe': True,
- 'multi_db': True,
- 'translator': translator,
- 'ontology': hybrid_ontology,
+ "wipe": True,
+ "multi_db": True,
+ "translator": translator,
+ "ontology": hybrid_ontology,
}
driver_args.update(marker_args)
driver_args.update(neo4j_param)
- driver_args['database_name'] = 'test'
+ driver_args["database_name"] = "test"
d = _Neo4jDriver(**driver_args)
if not marker_args:
-
- globals()['DRIVER'] = d
+ globals()["DRIVER"] = d
yield d
# teardown
- d._driver.query('MATCH (n:Test)'
- 'DETACH DELETE n')
- d._driver.query('MATCH (n:Int1)'
- 'DETACH DELETE n')
- d._driver.query('MATCH (n:Int2)'
- 'DETACH DELETE n')
+ d._driver.query("MATCH (n:Test)" "DETACH DELETE n")
+ d._driver.query("MATCH (n:Int1)" "DETACH DELETE n")
+ d._driver.query("MATCH (n:Int2)" "DETACH DELETE n")
# to deal with merging on non-existing nodes
# see test_add_single_biocypher_edge_missing_nodes()
- d._driver.query("MATCH (n2) WHERE n2.id = 'src'"
- 'DETACH DELETE n2')
- d._driver.query("MATCH (n3) WHERE n3.id = 'tar'"
- 'DETACH DELETE n3')
+ d._driver.query("MATCH (n2) WHERE n2.id = 'src'" "DETACH DELETE n2")
+ d._driver.query("MATCH (n3) WHERE n3.id = 'tar'" "DETACH DELETE n3")
d._driver.close()
### postgresql ###
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def postgresql_param(request):
-
keys = (
- 'user_postgresql',
- 'password_postgresql',
- 'port_postgresql',
+ "user_postgresql",
+ "password_postgresql",
+ "port_postgresql",
)
# get fallback parameters from biocypher config
- param = bcy_config('postgresql')
+ param = bcy_config("postgresql")
cli = {}
for key in keys:
# remove '_postgresql' suffix
key_short = key[:-11]
# change into format of input parameters
- cli[f'db_{key_short}'] = request.config.getoption(f'--{key}'
- ) or param[key_short]
+ cli[f"db_{key_short}"] = (
+ request.config.getoption(f"--{key}") or param[key_short]
+ )
# hardcoded string for test-db name. test-db will be created for testing and droped after testing.
# Do not take db_name from config to avoid accidental testing on the production database
- cli['db_name'] = request.config.getoption(
- '--database_name_postgresql'
- ) or 'postgresql-biocypher-test-TG2C7GsdNw'
+ cli["db_name"] = (
+ request.config.getoption("--database_name_postgresql")
+ or "postgresql-biocypher-test-TG2C7GsdNw"
+ )
return cli
@@ -460,36 +436,38 @@ def postgresql_param(request):
# skip test if postgresql is offline
@pytest.fixture(autouse=True)
def skip_if_offline_postgresql(request, postgresql_param):
-
- marker = request.node.get_closest_marker('requires_postgresql')
+ marker = request.node.get_closest_marker("requires_postgresql")
if marker:
-
params = postgresql_param
- user, port, password = params['db_user'], params['db_port'], params[
- 'db_password']
+ user, port, password = (
+ params["db_user"],
+ params["db_port"],
+ params["db_password"],
+ )
# an empty command, just to test if connection is possible
- command = f'PGPASSWORD={password} psql -c \'\' --port {port} --user {user}'
+ command = (
+ f"PGPASSWORD={password} psql -c '' --port {port} --user {user}"
+ )
process = subprocess.run(command, shell=True)
# returncode is 0 when success
if process.returncode != 0:
- pytest.skip('Requires psql and connection to Postgresql server.')
+ pytest.skip("Requires psql and connection to Postgresql server.")
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def bw_comma_postgresql(
postgresql_param, hybrid_ontology, translator, deduplicator, tmp_path
):
-
bw_comma = _PostgreSQLBatchWriter(
ontology=hybrid_ontology,
translator=translator,
deduplicator=deduplicator,
output_directory=tmp_path,
- delimiter=',',
- **postgresql_param
+ delimiter=",",
+ **postgresql_param,
)
yield bw_comma
@@ -500,16 +478,17 @@ def bw_comma_postgresql(
os.rmdir(tmp_path)
-@pytest.fixture(scope='function')
-def bw_tab_postgresql(postgresql_param, hybrid_ontology, translator, deduplicator, tmp_path):
-
+@pytest.fixture(scope="function")
+def bw_tab_postgresql(
+ postgresql_param, hybrid_ontology, translator, deduplicator, tmp_path
+):
bw_tab = _PostgreSQLBatchWriter(
ontology=hybrid_ontology,
translator=translator,
deduplicator=deduplicator,
output_directory=tmp_path,
- delimiter='\\t',
- **postgresql_param
+ delimiter="\\t",
+ **postgresql_param,
)
yield bw_tab
@@ -520,32 +499,35 @@ def bw_tab_postgresql(postgresql_param, hybrid_ontology, translator, deduplicato
os.rmdir(tmp_path)
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def create_database_postgres(postgresql_param):
params = postgresql_param
- dbname, user, port, password = params['db_name'], params['db_user'], params[
- 'db_port'], params['db_password']
+ dbname, user, port, password = (
+ params["db_name"],
+ params["db_user"],
+ params["db_port"],
+ params["db_password"],
+ )
# create the database
- command = f'PGPASSWORD={password} psql -c \'CREATE DATABASE "{dbname}";\' --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'CREATE DATABASE \"{dbname}\";' --port {port} --user {user}"
process = subprocess.run(command, shell=True)
yield dbname, user, port, password, process.returncode == 0 # 0 if success
# teardown
- command = f'PGPASSWORD={password} psql -c \'DROP DATABASE "{dbname}";\' --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'DROP DATABASE \"{dbname}\";' --port {port} --user {user}"
process = subprocess.run(command, shell=True)
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def bw_arango(hybrid_ontology, translator, deduplicator, tmp_path):
-
bw_arango = _ArangoDBBatchWriter(
ontology=hybrid_ontology,
translator=translator,
deduplicator=deduplicator,
output_directory=tmp_path,
- delimiter=',',
+ delimiter=",",
)
yield bw_arango
diff --git a/test/profile_performance.py b/test/profile_performance.py
index bee843cd..a4921484 100644
--- a/test/profile_performance.py
+++ b/test/profile_performance.py
@@ -11,15 +11,15 @@
from biocypher._connect import _Neo4jDriver
__all__ = [
- 'create_network_by_gen',
- 'create_network_by_list',
- 'create_networks',
- 'delete_test_network',
- 'explain_neo4j',
- 'profile_neo4j',
- 'remove_constraint',
- 'setup_constraint',
- 'visualise_benchmark',
+ "create_network_by_gen",
+ "create_network_by_list",
+ "create_networks",
+ "delete_test_network",
+ "explain_neo4j",
+ "profile_neo4j",
+ "remove_constraint",
+ "setup_constraint",
+ "visualise_benchmark",
]
@@ -28,14 +28,14 @@ def create_network_by_gen(num_nodes, num_edges, profile=False, explain=False):
def node_gen(num_nodes):
for i in range(num_nodes):
- yield BioCypherNode(i, 'test')
+ yield BioCypherNode(i, "test")
def edge_gen(num_edges):
for _ in range(num_edges):
src = random.randint(1, num_nodes)
tar = random.randint(1, num_nodes)
- yield BioCypherEdge(src, tar, 'test')
+ yield BioCypherEdge(src, tar, "test")
node_profile, np_printout = d.add_biocypher_nodes(
node_gen(num_nodes),
@@ -82,7 +82,7 @@ def create_network_by_list(num_nodes, num_edges):
def node_list(num_nodes):
ls = []
for i in range(num_nodes):
- ls.append(BioCypherNode(i, 'test'))
+ ls.append(BioCypherNode(i, "test"))
return ls
@@ -91,7 +91,7 @@ def edge_list(num_edges):
for _ in range(num_edges):
src = random.randint(1, num_nodes)
tar = random.randint(1, num_nodes)
- ls.append(BioCypherEdge(src, tar, 'test'))
+ ls.append(BioCypherEdge(src, tar, "test"))
return ls
@@ -104,23 +104,23 @@ def edge_list(num_edges):
def setup_constraint():
d = _Neo4jDriver(increment_version=False)
d.query(
- 'CREATE CONSTRAINT test_id '
- 'IF NOT EXISTS ON (n:test) '
- 'ASSERT n.id IS UNIQUE ',
+ "CREATE CONSTRAINT test_id "
+ "IF NOT EXISTS ON (n:test) "
+ "ASSERT n.id IS UNIQUE ",
)
d.close()
def remove_constraint():
d = _Neo4jDriver(increment_version=False)
- d.query('DROP CONSTRAINT test_id')
+ d.query("DROP CONSTRAINT test_id")
d.close()
def delete_test_network():
d = _Neo4jDriver(increment_version=False)
- d.query('MATCH (n)-[:test]-() DETACH DELETE n')
- d.query('MATCH (n:test) DETACH DELETE n')
+ d.query("MATCH (n)-[:test]-() DETACH DELETE n")
+ d.query("MATCH (n:test) DETACH DELETE n")
d.close()
@@ -140,9 +140,9 @@ def create_networks():
)
delete_test_network()
- res.update({'lis%s' % n: lis, 'lism%s' % n: lism})
+ res.update({"lis%s" % n: lis, "lism%s" % n: lism})
- with open('benchmark.pickle', 'wb') as f:
+ with open("benchmark.pickle", "wb") as f:
pickle.dump(res, f)
print(res)
@@ -153,57 +153,55 @@ def visualise_benchmark():
import matplotlib.pyplot as plt
- with open('benchmark.pickle', 'rb') as f:
+ with open("benchmark.pickle", "rb") as f:
res = pickle.load(f)
- x = [key for key in res.keys() if 'lism' in key]
- x = [int(e.replace('lism', '')) for e in x]
- lis = [value for key, value in res.items() if 'lism' not in key]
- lism = [value for key, value in res.items() if 'lism' in key]
+ x = [key for key in res.keys() if "lism" in key]
+ x = [int(e.replace("lism", "")) for e in x]
+ lis = [value for key, value in res.items() if "lism" not in key]
+ lism = [value for key, value in res.items() if "lism" in key]
- plt.plot(x, lis, marker='o', label='List')
- plt.plot(x, lism, marker='o', label='List (modified)')
- plt.xlabel('Network size (nodes)')
- plt.ylabel('Time (s)')
+ plt.plot(x, lis, marker="o", label="List")
+ plt.plot(x, lism, marker="o", label="List (modified)")
+ plt.xlabel("Network size (nodes)")
+ plt.ylabel("Time (s)")
plt.legend()
plt.show()
def profile_neo4j(num_nodes, num_edges):
-
np, ep, epm = create_network_by_gen(num_nodes, num_edges, profile=True)
- print('')
- print(f'{bcolors.HEADER}### NODE PROFILE ###{bcolors.ENDC}')
+ print("")
+ print(f"{bcolors.HEADER}### NODE PROFILE ###{bcolors.ENDC}")
for p in np[1]:
print(p)
- print('')
- print(f'{bcolors.HEADER}### EDGE PROFILE ###{bcolors.ENDC}')
+ print("")
+ print(f"{bcolors.HEADER}### EDGE PROFILE ###{bcolors.ENDC}")
for p in ep[1]:
print(p)
- print('')
- print(f'{bcolors.HEADER}### MODIFIED EDGE PROFILE ###{bcolors.ENDC}')
+ print("")
+ print(f"{bcolors.HEADER}### MODIFIED EDGE PROFILE ###{bcolors.ENDC}")
for p in epm[1]:
print(p)
def explain_neo4j(num_nodes, num_edges):
-
np, ep, epm = create_network_by_gen(num_nodes, num_edges, explain=True)
- print('')
- print(f'{bcolors.HEADER}### NODE PROFILE ###{bcolors.ENDC}')
+ print("")
+ print(f"{bcolors.HEADER}### NODE PROFILE ###{bcolors.ENDC}")
for p in np[1]:
print(p)
- print('')
- print(f'{bcolors.HEADER}### EDGE PROFILE ###{bcolors.ENDC}')
+ print("")
+ print(f"{bcolors.HEADER}### EDGE PROFILE ###{bcolors.ENDC}")
for p in ep[1]:
print(p)
- print('')
- print(f'{bcolors.HEADER}### MODIFIED EDGE PROFILE ###{bcolors.ENDC}')
+ print("")
+ print(f"{bcolors.HEADER}### MODIFIED EDGE PROFILE ###{bcolors.ENDC}")
for p in epm[1]:
print(p)
-if __name__ == '__main__':
+if __name__ == "__main__":
# profile python performance with cProfile
python_prof = False
# run network creation (needed for python profiling)
@@ -233,7 +231,7 @@ def explain_neo4j(num_nodes, num_edges):
ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
ps.print_stats()
# print(s.getvalue())
- filename = 'create_network.prof'
+ filename = "create_network.prof"
ps.dump_stats(filename)
if viz:
diff --git a/test/rdflib_playground.py b/test/rdflib_playground.py
index 6dca50c0..44ae877b 100644
--- a/test/rdflib_playground.py
+++ b/test/rdflib_playground.py
@@ -3,10 +3,9 @@
def ontology_to_tree(ontology_path, root_label, switch_id_and_label=True):
-
# Load the ontology into an rdflib Graph
g = rdflib.Graph()
- g.parse(ontology_path, format='ttl')
+ g.parse(ontology_path, format="ttl")
# Loop through all labels in the ontology
for s, _, o in g.triples((None, rdflib.RDFS.label, None)):
@@ -15,14 +14,13 @@ def ontology_to_tree(ontology_path, root_label, switch_id_and_label=True):
root = s
break
else:
- raise ValueError(f'Could not find root node with label {root_label}')
+ raise ValueError(f"Could not find root node with label {root_label}")
# Create a directed graph to represent the ontology as a tree
G = nx.DiGraph()
# Define a recursive function to add subclasses to the graph
def add_subclasses(node):
-
# Only add nodes that have a label
if (node, rdflib.RDFS.label, None) not in g:
return
@@ -31,25 +29,23 @@ def add_subclasses(node):
if nx_id not in G:
G.add_node(nx_id)
- G.nodes[nx_id]['label'] = nx_label
+ G.nodes[nx_id]["label"] = nx_label
# Recursively add all subclasses of the node to the graph
for s, _, o in g.triples((None, rdflib.RDFS.subClassOf, node)):
-
# Only add nodes that have a label
if (s, rdflib.RDFS.label, None) not in g:
continue
s_id, s_label = _get_nx_id_and_label(s)
G.add_node(s_id)
- G.nodes[s_id]['label'] = s_label
+ G.nodes[s_id]["label"] = s_label
G.add_edge(s_id, nx_id)
add_subclasses(s)
add_parents(s)
def add_parents(node):
-
# Only add nodes that have a label
if (node, rdflib.RDFS.label, None) not in g:
return
@@ -58,7 +54,6 @@ def add_parents(node):
# Recursively add all parents of the node to the graph
for s, _, o in g.triples((node, rdflib.RDFS.subClassOf, None)):
-
# Only add nodes that have a label
if (o, rdflib.RDFS.label, None) not in g:
continue
@@ -70,7 +65,7 @@ def add_parents(node):
continue
G.add_node(o_id)
- G.nodes[o_id]['label'] = o_label
+ G.nodes[o_id]["label"] = o_label
G.add_edge(nx_id, o_id)
add_parents(o)
@@ -95,15 +90,15 @@ def remove_prefix(uri: str) -> str:
separator between the prefix and the local name. The prefix is
everything before the last separator.
"""
- return uri.rsplit('#', 1)[-1].rsplit('/', 1)[-1]
+ return uri.rsplit("#", 1)[-1].rsplit("/", 1)[-1]
-if __name__ == '__main__':
- path = 'test/so.owl'
- url = 'https://raw.githubusercontent.com/biolink/biolink-model/v3.2.1/biolink-model.owl.ttl'
- root_label = 'entity'
+if __name__ == "__main__":
+ path = "test/so.owl"
+ url = "https://raw.githubusercontent.com/biolink/biolink-model/v3.2.1/biolink-model.owl.ttl"
+ root_label = "entity"
G = ontology_to_tree(url, root_label, switch_id_and_label=True)
# depth first search: ancestors of the "protein" node
- ancestors = nx.dfs_preorder_nodes(G, 'macromolecular complex')
+ ancestors = nx.dfs_preorder_nodes(G, "macromolecular complex")
print(list(ancestors))
diff --git a/test/test_config.py b/test/test_config.py
index e26ae474..e2611f43 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -4,12 +4,11 @@
def test_read_yaml():
- schema_config = _read_yaml('biocypher/_config/test_schema_config.yaml')
+ schema_config = _read_yaml("biocypher/_config/test_schema_config.yaml")
- assert 'protein' in schema_config
+ assert "protein" in schema_config
def test_for_special_characters():
-
with pytest.warns(UserWarning):
- _read_yaml('biocypher/_config/test_config.yaml')
+ _read_yaml("biocypher/_config/test_config.yaml")
diff --git a/test/test_core.py b/test/test_core.py
index ac227a96..0ba455a0 100644
--- a/test/test_core.py
+++ b/test/test_core.py
@@ -1,7 +1,8 @@
import pytest
+
def test_biocypher(core):
- assert core._dbms == 'neo4j'
+ assert core._dbms == "neo4j"
assert core._offline == True
assert core._strict_mode == False
@@ -11,12 +12,13 @@ def test_log_missing_types(core, translator):
core._translator.notype = {}
assert core.log_missing_input_labels() == None
- core._translator.notype = {'a': 1, 'b': 2}
+ core._translator.notype = {"a": 1, "b": 2}
mt = core.log_missing_input_labels()
- assert mt.get('a') == 1 and mt.get('b') == 2
+ assert mt.get("a") == 1 and mt.get("b") == 2
+
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_log_duplicates(core, deduplicator, _get_nodes):
core._deduplicator = deduplicator
nodes = _get_nodes + _get_nodes
@@ -26,6 +28,7 @@ def test_log_duplicates(core, deduplicator, _get_nodes):
assert True
+
# def test_access_translate(driver):
# driver.start_ontology()
diff --git a/test/test_create.py b/test/test_create.py
index ce425c88..abedc0f8 100644
--- a/test/test_create.py
+++ b/test/test_create.py
@@ -12,7 +12,7 @@ def test_node(node):
assert isinstance(node.get_properties(), dict)
assert isinstance(node.get_dict(), dict)
- assert 'id' in node.get_properties().keys()
+ assert "id" in node.get_properties().keys()
@given(st.builds(BioCypherEdge))
@@ -34,4 +34,4 @@ def test_rel_as_node(rel_as_node):
def test_rel_as_node_invalid_node():
with pytest.raises(TypeError):
- BioCypherRelAsNode('str', 1, 2.5122)
+ BioCypherRelAsNode("str", 1, 2.5122)
diff --git a/test/test_deduplicate.py b/test/test_deduplicate.py
index e82c0e99..1efa122e 100644
--- a/test/test_deduplicate.py
+++ b/test/test_deduplicate.py
@@ -1,45 +1,47 @@
import pytest
-from biocypher._create import BioCypherNode, BioCypherEdge
+
+from biocypher._create import BioCypherEdge, BioCypherNode
from biocypher._deduplicate import Deduplicator
-@pytest.mark.parametrize('l', [4], scope='module')
+
+@pytest.mark.parametrize("l", [4], scope="module")
def test_duplicate_nodes(_get_nodes):
dedup = Deduplicator()
nodes = _get_nodes
nodes.append(
BioCypherNode(
- node_id='p1',
- node_label='protein',
+ node_id="p1",
+ node_label="protein",
properties={
- 'name': 'StringProperty1',
- 'score': 4.32,
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2']
- }
+ "name": "StringProperty1",
+ "score": 4.32,
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
+ },
)
)
for node in nodes:
dedup.node_seen(node)
- assert 'protein' in dedup.duplicate_node_types
- assert 'p1' in dedup.duplicate_node_ids
+ assert "protein" in dedup.duplicate_node_types
+ assert "p1" in dedup.duplicate_node_ids
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_get_duplicate_nodes(_get_nodes):
dedup = Deduplicator()
nodes = _get_nodes
nodes.append(
BioCypherNode(
- node_id='p1',
- node_label='protein',
+ node_id="p1",
+ node_label="protein",
properties={
- 'name': 'StringProperty1',
- 'score': 4.32,
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2']
- }
+ "name": "StringProperty1",
+ "score": 4.32,
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
+ },
)
)
@@ -50,24 +52,25 @@ def test_get_duplicate_nodes(_get_nodes):
types = d[0]
ids = d[1]
- assert 'protein' in types
- assert 'p1' in ids
+ assert "protein" in types
+ assert "p1" in ids
-@pytest.mark.parametrize('l', [4], scope='module')
+
+@pytest.mark.parametrize("l", [4], scope="module")
def test_duplicate_edges(_get_edges):
dedup = Deduplicator()
edges = _get_edges
edges.append(
BioCypherEdge(
- relationship_id='mrel2',
- source_id='m2',
- target_id='p3',
- relationship_label='Is_Mutated_In',
+ relationship_id="mrel2",
+ source_id="m2",
+ target_id="p3",
+ relationship_label="Is_Mutated_In",
properties={
- 'score': 4.32,
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2']
- }
+ "score": 4.32,
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
+ },
)
)
# this will fail if we go beyond concatenation of ids
@@ -75,24 +78,25 @@ def test_duplicate_edges(_get_edges):
for edge in edges:
dedup.edge_seen(edge)
- assert 'Is_Mutated_In' in dedup.duplicate_edge_types
- assert ('mrel2') in dedup.duplicate_edge_ids
+ assert "Is_Mutated_In" in dedup.duplicate_edge_types
+ assert ("mrel2") in dedup.duplicate_edge_ids
+
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_get_duplicate_edges(_get_edges):
dedup = Deduplicator()
edges = _get_edges
edges.append(
BioCypherEdge(
- relationship_id='mrel2',
- source_id='m2',
- target_id='p3',
- relationship_label='Is_Mutated_In',
+ relationship_id="mrel2",
+ source_id="m2",
+ target_id="p3",
+ relationship_label="Is_Mutated_In",
properties={
- 'score': 4.32,
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2']
- }
+ "score": 4.32,
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
+ },
)
)
# this will fail if we go beyond concatenation of ids
@@ -104,5 +108,5 @@ def test_get_duplicate_edges(_get_edges):
types = d[0]
ids = d[1]
- assert 'Is_Mutated_In' in types
- assert ('mrel2') in ids
\ No newline at end of file
+ assert "Is_Mutated_In" in types
+ assert ("mrel2") in ids
diff --git a/test/test_driver.py b/test/test_driver.py
index 82a0ee1f..2273ed3d 100644
--- a/test/test_driver.py
+++ b/test/test_driver.py
@@ -7,13 +7,11 @@
@pytest.mark.requires_neo4j
def test_create_driver(driver):
-
assert isinstance(driver, _Neo4jDriver)
@pytest.mark.requires_neo4j
def test_connect_to_db(driver):
-
assert isinstance(driver._driver.driver, neo4j.Neo4jDriver)
@@ -24,28 +22,29 @@ def test_increment_version(driver):
driver._driver.query(query)
driver._update_meta_graph()
- r, summary = driver._driver.query('MATCH (n:BioCypher) '
- 'RETURN n', )
+ r, summary = driver._driver.query(
+ "MATCH (n:BioCypher) " "RETURN n",
+ )
assert len(r) == 2
@pytest.mark.requires_neo4j
def test_explain(driver):
- query = 'MATCH (n) WITH n LIMIT 25 MATCH (n)--(m)--(f) RETURN n, m, f'
+ query = "MATCH (n) WITH n LIMIT 25 MATCH (n)--(m)--(f) RETURN n, m, f"
e = driver._driver.explain(query)
t = e[0]
- assert 'args' in t and 'identifiers' in t
+ assert "args" in t and "identifiers" in t
@pytest.mark.requires_neo4j
def test_profile(driver):
- query = 'MATCH (n) RETURN n LIMIT 100'
+ query = "MATCH (n) RETURN n LIMIT 100"
p = driver._driver.profile(query)
t = p[0]
- assert 'args' in t and 'identifiers' in t
+ assert "args" in t and "identifiers" in t
@pytest.mark.requires_neo4j
@@ -56,34 +55,30 @@ def test_add_invalid_biocypher_node(driver):
driver.add_biocypher_nodes(1)
with pytest.raises(ValueError):
- driver.add_biocypher_nodes('String')
+ driver.add_biocypher_nodes("String")
@pytest.mark.requires_neo4j
def test_add_single_biocypher_node(driver):
# neo4j database needs to be running!
- n = BioCypherNode(node_id='test_id1', node_label='Test')
+ n = BioCypherNode(node_id="test_id1", node_label="Test")
driver.add_biocypher_nodes(n)
r, summary = driver._driver.query(
- 'MATCH (n:Test) '
- 'WITH n, n.id AS id '
- 'RETURN id ',
+ "MATCH (n:Test) " "WITH n, n.id AS id " "RETURN id ",
)
- assert r[0]['id'] == 'test_id1'
+ assert r[0]["id"] == "test_id1"
@pytest.mark.requires_neo4j
def test_add_biocypher_node_list(driver):
# neo4j database needs to be running!
- n1 = BioCypherNode(node_id='test_id1', node_label='Test')
- n2 = BioCypherNode(node_id='test_id2', node_label='Test')
+ n1 = BioCypherNode(node_id="test_id1", node_label="Test")
+ n2 = BioCypherNode(node_id="test_id2", node_label="Test")
driver.add_biocypher_nodes([n1, n2])
r, summary = driver._driver.query(
- 'MATCH (n:Test) '
- 'WITH n, n.id AS id '
- 'RETURN id ',
+ "MATCH (n:Test) " "WITH n, n.id AS id " "RETURN id ",
)
- assert set([r[0]['id'], r[1]['id']]) == set(['test_id1', 'test_id2'])
+ assert set([r[0]["id"], r[1]["id"]]) == set(["test_id1", "test_id2"])
@pytest.mark.requires_neo4j
@@ -94,42 +89,42 @@ def gen(nodes):
for g in nodes:
yield BioCypherNode(g[0], g[1])
- g = gen([('test_id1', 'Test'), ('test_id2', 'Test')])
+ g = gen([("test_id1", "Test"), ("test_id2", "Test")])
driver.add_biocypher_nodes(g)
r, summary = driver._driver.query(
- 'MATCH (n:Test) '
- 'WITH n, n.id AS id '
- 'RETURN id ',
+ "MATCH (n:Test) " "WITH n, n.id AS id " "RETURN id ",
)
- ids = [n['id'] for n in r]
+ ids = [n["id"] for n in r]
- assert 'test_id1' in ids
- assert 'test_id2' in ids
+ assert "test_id1" in ids
+ assert "test_id2" in ids
@pytest.mark.requires_neo4j
def test_add_specific_id_node(driver):
- n = BioCypherNode(node_id='CHAT', node_label='Gene', preferred_id='hgnc')
+ n = BioCypherNode(node_id="CHAT", node_label="Gene", preferred_id="hgnc")
driver.add_biocypher_nodes(n)
- r, summary = driver._driver.query('MATCH (n:Gene) '
- 'RETURN n', )
+ r, summary = driver._driver.query(
+ "MATCH (n:Gene) " "RETURN n",
+ )
- assert r[0]['n'].get('id') == 'CHAT'
- assert r[0]['n'].get('preferred_id') == 'hgnc'
+ assert r[0]["n"].get("id") == "CHAT"
+ assert r[0]["n"].get("preferred_id") == "hgnc"
@pytest.mark.requires_neo4j
def test_add_generic_id_node(driver):
- n = BioCypherNode(node_id='CHAT', node_label='Gene', preferred_id='HGNC')
+ n = BioCypherNode(node_id="CHAT", node_label="Gene", preferred_id="HGNC")
driver.add_biocypher_nodes(n)
- r, summary = driver._driver.query('MATCH (n:Gene) '
- 'RETURN n', )
+ r, summary = driver._driver.query(
+ "MATCH (n:Gene) " "RETURN n",
+ )
- assert r[0]['n'].get('id') is not None
+ assert r[0]["n"].get("id") is not None
@pytest.mark.requires_neo4j
@@ -142,20 +137,21 @@ def test_add_invalid_biocypher_edge(driver):
@pytest.mark.requires_neo4j
def test_add_single_biocypher_edge_explicit_node_creation(driver):
# neo4j database needs to be running!
- n1 = BioCypherNode('src', 'Test')
- n2 = BioCypherNode('tar', 'Test')
+ n1 = BioCypherNode("src", "Test")
+ n2 = BioCypherNode("tar", "Test")
driver.add_biocypher_nodes([n1, n2])
- e = BioCypherEdge('src', 'tar', 'Test')
+ e = BioCypherEdge("src", "tar", "Test")
driver.add_biocypher_edges(e)
r, summary = driver._driver.query(
- 'MATCH (n1)-[r:Test]->(n2) '
- 'WITH n1, n2, n1.id AS id1, n2.id AS id2, type(r) AS label '
- 'RETURN id1, id2, label',
+ "MATCH (n1)-[r:Test]->(n2) "
+ "WITH n1, n2, n1.id AS id1, n2.id AS id2, type(r) AS label "
+ "RETURN id1, id2, label",
)
assert (
- r[0]['id1'] == 'src' and r[0]['id2'] == 'tar' and
- r[0]['label'] == 'Test'
+ r[0]["id1"] == "src"
+ and r[0]["id2"] == "tar"
+ and r[0]["label"] == "Test"
)
@@ -165,50 +161,53 @@ def test_add_single_biocypher_edge_missing_nodes(driver):
# merging on non-existing nodes creates them without labels; what is
# the desired behaviour here? do we only want to MATCH?
- e = BioCypherEdge('src', 'tar', 'Test')
+ e = BioCypherEdge("src", "tar", "Test")
driver.add_biocypher_edges(e)
r, summary = driver._driver.query(
- 'MATCH (n1)-[r:Test]->(n2) '
- 'WITH n1, n2, n1.id AS id1, n2.id AS id2, type(r) AS label '
- 'RETURN id1, id2, label',
+ "MATCH (n1)-[r:Test]->(n2) "
+ "WITH n1, n2, n1.id AS id1, n2.id AS id2, type(r) AS label "
+ "RETURN id1, id2, label",
)
assert (
- r[0]['id1'] == 'src' and r[0]['id2'] == 'tar' and
- r[0]['label'] == 'Test'
+ r[0]["id1"] == "src"
+ and r[0]["id2"] == "tar"
+ and r[0]["label"] == "Test"
)
@pytest.mark.requires_neo4j
def test_add_biocypher_edge_list(driver):
# neo4j database needs to be running!
- n1 = BioCypherNode('src', 'Test')
- n2 = BioCypherNode('tar1', 'Test')
- n3 = BioCypherNode('tar2', 'Test')
+ n1 = BioCypherNode("src", "Test")
+ n2 = BioCypherNode("tar1", "Test")
+ n3 = BioCypherNode("tar2", "Test")
driver.add_biocypher_nodes([n1, n2, n3])
# edge list
- e1 = BioCypherEdge('src', 'tar1', 'Test1')
- e2 = BioCypherEdge('src', 'tar2', 'Test2')
+ e1 = BioCypherEdge("src", "tar1", "Test1")
+ e2 = BioCypherEdge("src", "tar2", "Test2")
driver.add_biocypher_edges([e1, e2])
r, summary = driver._driver.query(
- 'MATCH (n3)<-[r2:Test2]-(n1)-[r1:Test1]->(n2) '
- 'WITH n1, n2, n3, n1.id AS id1, n2.id AS id2, n3.id AS id3, '
- 'type(r1) AS label1, type(r2) AS label2 '
- 'RETURN id1, id2, id3, label1, label2',
+ "MATCH (n3)<-[r2:Test2]-(n1)-[r1:Test1]->(n2) "
+ "WITH n1, n2, n3, n1.id AS id1, n2.id AS id2, n3.id AS id3, "
+ "type(r1) AS label1, type(r2) AS label2 "
+ "RETURN id1, id2, id3, label1, label2",
)
assert (
- r[0]['id1'] == 'src' and r[0]['id2'] == 'tar1' and
- r[0]['id3'] == 'tar2' and r[0]['label1'] == 'Test1' and
- r[0]['label2'] == 'Test2'
+ r[0]["id1"] == "src"
+ and r[0]["id2"] == "tar1"
+ and r[0]["id3"] == "tar2"
+ and r[0]["label1"] == "Test1"
+ and r[0]["label2"] == "Test2"
)
@pytest.mark.requires_neo4j
def test_add_biocypher_edge_generator(driver):
# neo4j database needs to be running!
- n1 = BioCypherNode('src', 'Test')
- n2 = BioCypherNode('tar1', 'Test')
- n3 = BioCypherNode('tar2', 'Test')
+ n1 = BioCypherNode("src", "Test")
+ n2 = BioCypherNode("tar1", "Test")
+ n3 = BioCypherNode("tar2", "Test")
driver.add_biocypher_nodes([n1, n2, n3])
# generator
@@ -221,64 +220,69 @@ def gen(edges):
)
# edge list
- e1 = BioCypherEdge('src', 'tar1', 'Test1')
- e2 = BioCypherEdge('src', 'tar2', 'Test2')
+ e1 = BioCypherEdge("src", "tar1", "Test1")
+ e2 = BioCypherEdge("src", "tar2", "Test2")
g = gen([e1, e2])
driver.add_biocypher_edges(g)
r, summary = driver._driver.query(
- 'MATCH (n3)<-[r2:Test2]-(n1)-[r1:Test1]->(n2) '
- 'WITH n1, n2, n3, n1.id AS id1, n2.id AS id2, n3.id AS id3, '
- 'type(r1) AS label1, type(r2) AS label2 '
- 'RETURN id1, id2, id3, label1, label2',
+ "MATCH (n3)<-[r2:Test2]-(n1)-[r1:Test1]->(n2) "
+ "WITH n1, n2, n3, n1.id AS id1, n2.id AS id2, n3.id AS id3, "
+ "type(r1) AS label1, type(r2) AS label2 "
+ "RETURN id1, id2, id3, label1, label2",
)
assert (
- r[0]['id1'] == 'src' and r[0]['id2'] == 'tar1' and
- r[0]['id3'] == 'tar2' and r[0]['label1'] == 'Test1' and
- r[0]['label2'] == 'Test2'
+ r[0]["id1"] == "src"
+ and r[0]["id2"] == "tar1"
+ and r[0]["id3"] == "tar2"
+ and r[0]["label1"] == "Test1"
+ and r[0]["label2"] == "Test2"
)
@pytest.mark.requires_neo4j
def test_add_biocypher_interaction_as_BioCypherRelAsNode_list(driver):
# neo4j database needs to be running!
- i1 = BioCypherNode('int1', 'Int1')
- i2 = BioCypherNode('int2', 'Int2')
+ i1 = BioCypherNode("int1", "Int1")
+ i2 = BioCypherNode("int2", "Int2")
driver.add_biocypher_nodes([i1, i2])
- e1 = BioCypherEdge('src', 'int1', 'is_source_of')
- e2 = BioCypherEdge('tar', 'int1', 'is_target_of')
- e3 = BioCypherEdge('src', 'int2', 'is_source_of')
- e4 = BioCypherEdge('tar', 'int2', 'is_target_of')
+ e1 = BioCypherEdge("src", "int1", "is_source_of")
+ e2 = BioCypherEdge("tar", "int1", "is_target_of")
+ e3 = BioCypherEdge("src", "int2", "is_source_of")
+ e4 = BioCypherEdge("tar", "int2", "is_target_of")
r1, r2 = BioCypherRelAsNode(i1, e1, e2), BioCypherRelAsNode(i2, e3, e4)
driver.add_biocypher_edges([r1, r2])
r, summary = driver._driver.query(
- 'MATCH (n2)-[e4:is_target_of]->(i2:Int2)<-[e3:is_source_of]-'
- '(n1)-[e1:is_source_of]->(i1:Int1)<-[e2:is_target_of]-(n2)'
- 'WITH n1, n2, i1, i2, n1.id AS id1, n2.id AS id2, '
- 'i1.id AS id3, i2.id AS id4, '
- 'type(e1) AS label1, type(e2) AS label2, '
- 'type(e3) AS label3, type(e4) AS label4 '
- 'RETURN id1, id2, id3, id4, label1, label2, label3, label4',
+ "MATCH (n2)-[e4:is_target_of]->(i2:Int2)<-[e3:is_source_of]-"
+ "(n1)-[e1:is_source_of]->(i1:Int1)<-[e2:is_target_of]-(n2)"
+ "WITH n1, n2, i1, i2, n1.id AS id1, n2.id AS id2, "
+ "i1.id AS id3, i2.id AS id4, "
+ "type(e1) AS label1, type(e2) AS label2, "
+ "type(e3) AS label3, type(e4) AS label4 "
+ "RETURN id1, id2, id3, id4, label1, label2, label3, label4",
)
assert (
- r[0]['id1'] == 'src' and r[0]['id2'] == 'tar' and
- r[0]['id3'] == 'int1' and r[0]['id4'] == 'int2' and
- r[0]['label1'] == 'is_source_of' and
- r[0]['label2'] == 'is_target_of' and
- r[0]['label3'] == 'is_source_of' and r[0]['label4'] == 'is_target_of'
+ r[0]["id1"] == "src"
+ and r[0]["id2"] == "tar"
+ and r[0]["id3"] == "int1"
+ and r[0]["id4"] == "int2"
+ and r[0]["label1"] == "is_source_of"
+ and r[0]["label2"] == "is_target_of"
+ and r[0]["label3"] == "is_source_of"
+ and r[0]["label4"] == "is_target_of"
)
@pytest.mark.requires_neo4j
def test_add_biocypher_interaction_as_BioCypherRelAsNode_generator(driver):
# neo4j database needs to be running!
- i1 = BioCypherNode('int1', 'Int1')
- i2 = BioCypherNode('int2', 'Int2')
+ i1 = BioCypherNode("int1", "Int1")
+ i2 = BioCypherNode("int2", "Int2")
driver.add_biocypher_nodes([i1, i2])
- e1 = BioCypherEdge('src', 'int1', 'is_source_of')
- e2 = BioCypherEdge('tar', 'int1', 'is_target_of')
- e3 = BioCypherEdge('src', 'int2', 'is_source_of')
- e4 = BioCypherEdge('tar', 'int2', 'is_target_of')
+ e1 = BioCypherEdge("src", "int1", "is_source_of")
+ e2 = BioCypherEdge("tar", "int1", "is_target_of")
+ e3 = BioCypherEdge("src", "int2", "is_source_of")
+ e4 = BioCypherEdge("tar", "int2", "is_target_of")
r1, r2 = BioCypherRelAsNode(i1, e1, e2), BioCypherRelAsNode(i2, e3, e4)
relasnode_list = [r1, r2]
@@ -288,40 +292,43 @@ def gen(lis):
driver.add_biocypher_edges(gen(relasnode_list))
r, summary = driver._driver.query(
- 'MATCH (n2)-[e4:is_target_of]->(i2:Int2)<-[e3:is_source_of]-'
- '(n1)-[e1:is_source_of]->(i1:Int1)<-[e2:is_target_of]-(n2)'
- 'WITH n1, n2, i1, i2, n1.id AS id1, n2.id AS id2, '
- 'i1.id AS id3, i2.id AS id4, '
- 'type(e1) AS label1, type(e2) AS label2, '
- 'type(e3) AS label3, type(e4) AS label4 '
- 'RETURN id1, id2, id3, id4, label1, label2, label3, label4',
+ "MATCH (n2)-[e4:is_target_of]->(i2:Int2)<-[e3:is_source_of]-"
+ "(n1)-[e1:is_source_of]->(i1:Int1)<-[e2:is_target_of]-(n2)"
+ "WITH n1, n2, i1, i2, n1.id AS id1, n2.id AS id2, "
+ "i1.id AS id3, i2.id AS id4, "
+ "type(e1) AS label1, type(e2) AS label2, "
+ "type(e3) AS label3, type(e4) AS label4 "
+ "RETURN id1, id2, id3, id4, label1, label2, label3, label4",
)
assert (
- r[0]['id1'] == 'src' and r[0]['id2'] == 'tar' and
- r[0]['id3'] == 'int1' and r[0]['id4'] == 'int2' and
- r[0]['label1'] == 'is_source_of' and
- r[0]['label2'] == 'is_target_of' and
- r[0]['label3'] == 'is_source_of' and r[0]['label4'] == 'is_target_of'
+ r[0]["id1"] == "src"
+ and r[0]["id2"] == "tar"
+ and r[0]["id3"] == "int1"
+ and r[0]["id4"] == "int2"
+ and r[0]["label1"] == "is_source_of"
+ and r[0]["label2"] == "is_target_of"
+ and r[0]["label3"] == "is_source_of"
+ and r[0]["label4"] == "is_target_of"
)
@pytest.mark.requires_neo4j
def test_pretty_profile(driver):
prof, printout = driver._driver.profile(
- 'UNWIND [1,2,3,4,5] as id '
- 'MERGE (n:Test {id: id}) '
- 'MERGE (x:Test {id: id + 1})',
+ "UNWIND [1,2,3,4,5] as id "
+ "MERGE (n:Test {id: id}) "
+ "MERGE (x:Test {id: id + 1})",
)
- assert 'args' in prof and 'ProduceResults' in printout[1]
+ assert "args" in prof and "ProduceResults" in printout[1]
@pytest.mark.requires_neo4j
def test_pretty_explain(driver):
plan, printout = driver._driver.explain(
- 'UNWIND [1,2,3,4,5] as id '
- 'MERGE (n:Test {id: id}) '
- 'MERGE (x:Test {id: id + 1})',
+ "UNWIND [1,2,3,4,5] as id "
+ "MERGE (n:Test {id: id}) "
+ "MERGE (x:Test {id: id + 1})",
)
- assert 'args' in plan and 'ProduceResults' in printout[0]
+ assert "args" in plan and "ProduceResults" in printout[0]
diff --git a/test/test_integration.py b/test/test_integration.py
index 23d6e701..ea038118 100644
--- a/test/test_integration.py
+++ b/test/test_integration.py
@@ -3,7 +3,7 @@
import pytest
-@pytest.mark.parametrize('l', [4], scope='function')
+@pytest.mark.parametrize("l", [4], scope="function")
def test_write_node_data_from_gen(core, _get_nodes):
nodes = _get_nodes
@@ -15,8 +15,8 @@ def node_gen(nodes):
path = core._output_directory
- p_csv = os.path.join(path, 'Protein-part000.csv')
- m_csv = os.path.join(path, 'MicroRNA-part000.csv')
+ p_csv = os.path.join(path, "Protein-part000.csv")
+ m_csv = os.path.join(path, "MicroRNA-part000.csv")
with open(p_csv) as f:
pr = f.read()
@@ -26,9 +26,9 @@ def node_gen(nodes):
assert passed
assert "p1;'StringProperty1';4.0;9606;'gene1|gene2';'p1';'uniprot'" in pr
- assert 'BiologicalEntity' in pr
+ assert "BiologicalEntity" in pr
assert "m1;'StringProperty1';9606;'m1';'mirbase'" in mi
- assert 'ChemicalEntity' in mi
+ assert "ChemicalEntity" in mi
def test_show_ontology_structure_kwargs(core):
diff --git a/test/test_mapping.py b/test/test_mapping.py
index 1169f74f..801ca017 100644
--- a/test/test_mapping.py
+++ b/test/test_mapping.py
@@ -2,27 +2,26 @@
def test_inheritance_loop(ontology_mapping):
+ assert "gene to variant association" in ontology_mapping.schema.keys()
- assert 'gene to variant association' in ontology_mapping.schema.keys()
-
- assert 'gene to variant association' not in ontology_mapping.extended_schema.keys(
+ assert (
+ "gene to variant association"
+ not in ontology_mapping.extended_schema.keys()
)
def test_virtual_leaves_node(ontology_mapping):
-
- assert 'wikipathways.pathway' in ontology_mapping.extended_schema
+ assert "wikipathways.pathway" in ontology_mapping.extended_schema
def test_getting_properties_via_config(ontology_mapping):
-
- assert 'name' in ontology_mapping.extended_schema['protein'].get(
- 'properties'
- ).keys()
+ assert (
+ "name"
+ in ontology_mapping.extended_schema["protein"].get("properties").keys()
+ )
def test_preferred_id_optional(ontology_mapping):
+ pti = ontology_mapping.extended_schema.get("post translational interaction")
- pti = ontology_mapping.extended_schema.get('post translational interaction')
-
- assert pti.get('preferred_id') == 'id'
+ assert pti.get("preferred_id") == "id"
diff --git a/test/test_misc.py b/test/test_misc.py
index ff6d7bc8..85e63e9d 100644
--- a/test/test_misc.py
+++ b/test/test_misc.py
@@ -4,52 +4,49 @@
from biocypher._misc import create_tree_visualisation
inheritance_tree = {
- 'B': 'A',
- 'C': 'A',
- 'D': 'B',
- 'E': 'B',
- 'F': 'C',
- 'G': 'C',
- 'H': 'E',
- 'I': 'G',
+ "B": "A",
+ "C": "A",
+ "D": "B",
+ "E": "B",
+ "F": "C",
+ "G": "C",
+ "H": "E",
+ "I": "G",
}
disjoint_tree = {
- 'B': 'A',
- 'C': 'A',
- 'D': 'B',
- 'F': 'E',
- 'G': 'E',
- 'H': 'F',
+ "B": "A",
+ "C": "A",
+ "D": "B",
+ "F": "E",
+ "G": "E",
+ "H": "F",
}
def test_tree_vis():
-
tree_vis = create_tree_visualisation(inheritance_tree)
assert tree_vis.DEPTH == 1
assert tree_vis.WIDTH == 2
- assert tree_vis.root == 'A'
+ assert tree_vis.root == "A"
def test_tree_vis_from_networkx():
-
G = nx.DiGraph(inheritance_tree)
tree_vis = create_tree_visualisation(G)
assert tree_vis.DEPTH == 1
assert tree_vis.WIDTH == 2
- assert tree_vis.root == 'A'
+ assert tree_vis.root == "A"
def test_disjoint_tree():
-
with pytest.raises(ValueError):
create_tree_visualisation(disjoint_tree)
-if __name__ == '__main__':
+if __name__ == "__main__":
# to look at it
print(create_tree_visualisation(nx.DiGraph(inheritance_tree)).show())
diff --git a/test/test_ontology.py b/test/test_ontology.py
index f5f3027d..a40d1d02 100644
--- a/test/test_ontology.py
+++ b/test/test_ontology.py
@@ -1,47 +1,48 @@
import os
-import networkx as nx
import pytest
+import networkx as nx
from biocypher._ontology import Ontology
def test_biolink_adapter(biolink_adapter):
- assert biolink_adapter.get_root_label() == 'entity'
+ assert biolink_adapter.get_root_label() == "entity"
assert biolink_adapter.get_nx_graph().number_of_nodes() > 100
- assert 'biological entity' in biolink_adapter.get_ancestors('gene')
- assert 'macromolecular machine mixin' in biolink_adapter.get_ancestors(
- 'macromolecular complex'
+ assert "biological entity" in biolink_adapter.get_ancestors("gene")
+ assert "macromolecular machine mixin" in biolink_adapter.get_ancestors(
+ "macromolecular complex"
)
def test_so_adapter(so_adapter):
- assert so_adapter.get_root_label() == 'sequence_variant'
+ assert so_adapter.get_root_label() == "sequence_variant"
# here without underscores
- assert 'sequence variant' in so_adapter.get_ancestors('lethal variant')
+ assert "sequence variant" in so_adapter.get_ancestors("lethal variant")
def test_go_adapter(go_adapter):
- assert go_adapter.get_root_label() == 'molecular_function'
+ assert go_adapter.get_root_label() == "molecular_function"
- assert 'molecular function' in go_adapter.get_ancestors(
- 'rna helicase activity'
+ assert "molecular function" in go_adapter.get_ancestors(
+ "rna helicase activity"
)
def test_mondo_adapter(mondo_adapter):
- assert mondo_adapter.get_root_label() == 'disease'
+ assert mondo_adapter.get_root_label() == "disease"
- assert 'human disease' in mondo_adapter.get_ancestors('cystic fibrosis')
+ assert "human disease" in mondo_adapter.get_ancestors("cystic fibrosis")
def test_ontology_functions(hybrid_ontology):
assert isinstance(hybrid_ontology, Ontology)
- first_tail_ontology = hybrid_ontology._tail_ontologies.get('so'
- ).get_nx_graph()
+ first_tail_ontology = hybrid_ontology._tail_ontologies.get(
+ "so"
+ ).get_nx_graph()
assert len(first_tail_ontology) == 6
assert nx.is_directed_acyclic_graph(first_tail_ontology)
@@ -61,51 +62,51 @@ def test_ontology_functions(hybrid_ontology):
assert hybrid_length - num_ext == combined_length - num_tail
dgpl_ancestors = list(
- hybrid_ontology.get_ancestors('decreased gene product level')
+ hybrid_ontology.get_ancestors("decreased gene product level")
)
- assert 'decreased gene product level' in dgpl_ancestors
- assert 'altered gene product level' in dgpl_ancestors
- assert 'functional effect variant' in dgpl_ancestors
- assert 'sequence variant' in dgpl_ancestors
- assert 'biological entity' in dgpl_ancestors
- assert 'named thing' in dgpl_ancestors
- assert 'entity' in dgpl_ancestors
- assert 'thing with taxon' in dgpl_ancestors
-
- lethal_var = hybrid_ontology._nx_graph.nodes['lethal variant']
- assert lethal_var['label'] == 'SO_0001773'
+ assert "decreased gene product level" in dgpl_ancestors
+ assert "altered gene product level" in dgpl_ancestors
+ assert "functional effect variant" in dgpl_ancestors
+ assert "sequence variant" in dgpl_ancestors
+ assert "biological entity" in dgpl_ancestors
+ assert "named thing" in dgpl_ancestors
+ assert "entity" in dgpl_ancestors
+ assert "thing with taxon" in dgpl_ancestors
+
+ lethal_var = hybrid_ontology._nx_graph.nodes["lethal variant"]
+ assert lethal_var["label"] == "SO_0001773"
# second tail ontology: here we don't merge the nodes, but attach 'human
# disease' as a child of 'disease'
- cf_ancestors = list(hybrid_ontology.get_ancestors('cystic fibrosis'))
- assert 'cystic fibrosis' in cf_ancestors
- assert 'autosomal recessive disease' in cf_ancestors
- assert 'autosomal genetic disease' in cf_ancestors
- assert 'hereditary disease' in cf_ancestors
- assert 'human disease' in cf_ancestors
- assert 'disease' in cf_ancestors
- assert 'disease or phenotypic feature' in cf_ancestors
- assert 'biological entity' in cf_ancestors
- assert 'entity' in cf_ancestors
+ cf_ancestors = list(hybrid_ontology.get_ancestors("cystic fibrosis"))
+ assert "cystic fibrosis" in cf_ancestors
+ assert "autosomal recessive disease" in cf_ancestors
+ assert "autosomal genetic disease" in cf_ancestors
+ assert "hereditary disease" in cf_ancestors
+ assert "human disease" in cf_ancestors
+ assert "disease" in cf_ancestors
+ assert "disease or phenotypic feature" in cf_ancestors
+ assert "biological entity" in cf_ancestors
+ assert "entity" in cf_ancestors
# mixins?
# user extensions
- dsdna_ancestors = list(hybrid_ontology.get_ancestors('dsDNA sequence'))
- assert 'chemical entity' in dsdna_ancestors
- assert 'association' in hybrid_ontology.get_ancestors(
- 'mutation to tissue association'
+ dsdna_ancestors = list(hybrid_ontology.get_ancestors("dsDNA sequence"))
+ assert "chemical entity" in dsdna_ancestors
+ assert "association" in hybrid_ontology.get_ancestors(
+ "mutation to tissue association"
)
# properties
- protein = hybrid_ontology._nx_graph.nodes['protein']
- assert protein['label'] == 'Protein'
- assert 'taxon' in protein['properties'].keys()
+ protein = hybrid_ontology._nx_graph.nodes["protein"]
+ assert protein["label"] == "Protein"
+ assert "taxon" in protein["properties"].keys()
# synonyms
- assert 'complex' in hybrid_ontology._nx_graph.nodes
- assert 'macromolecular complex' not in hybrid_ontology._nx_graph.nodes
+ assert "complex" in hybrid_ontology._nx_graph.nodes
+ assert "macromolecular complex" not in hybrid_ontology._nx_graph.nodes
def test_show_ontology(hybrid_ontology):
@@ -123,20 +124,18 @@ def test_show_full_ontology(hybrid_ontology):
def test_write_ontology(hybrid_ontology, tmp_path):
passed = hybrid_ontology.show_ontology_structure(to_disk=tmp_path)
- f = os.path.join(tmp_path, 'ontology_structure.graphml')
+ f = os.path.join(tmp_path, "ontology_structure.graphml")
assert passed
assert os.path.isfile(f)
def test_disconnected_exception(disconnected_mapping):
-
with pytest.raises(ValueError):
Ontology(
head_ontology={
- 'url': 'test/so.owl',
- 'root_node': 'sequence_variant',
+ "url": "test/so.owl",
+ "root_node": "sequence_variant",
},
ontology_mapping=disconnected_mapping,
)
-
diff --git a/test/test_pandas.py b/test/test_pandas.py
index 8d7329d0..9cd405d4 100644
--- a/test/test_pandas.py
+++ b/test/test_pandas.py
@@ -1,9 +1,11 @@
import pytest
+
def test_pandas(_pd):
assert _pd.dfs == {}
-@pytest.mark.parametrize('l', [4], scope='module')
+
+@pytest.mark.parametrize("l", [4], scope="module")
def test_nodes(_pd, _get_nodes):
_pd.add_tables(_get_nodes)
assert "protein" in _pd.dfs.keys()
@@ -14,7 +16,7 @@ def test_nodes(_pd, _get_nodes):
assert "m2" in _pd.dfs["microRNA"]["node_id"].values
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_nodes_gen(_pd, _get_nodes):
def node_gen():
for node in _get_nodes:
@@ -23,19 +25,22 @@ def node_gen():
_pd.add_tables(node_gen())
assert "protein" in _pd.dfs.keys()
-@pytest.mark.parametrize('l', [4], scope='module')
+
+@pytest.mark.parametrize("l", [4], scope="module")
def test_duplicates(_pd, _get_nodes):
nodes = _get_nodes + _get_nodes
_pd.add_tables(nodes)
assert len(_pd.dfs["protein"].node_id) == 4
-@pytest.mark.parametrize('l', [8], scope='module')
+
+@pytest.mark.parametrize("l", [8], scope="module")
def test_two_step_add(_pd, _get_nodes):
_pd.add_tables(_get_nodes[:4])
_pd.add_tables(_get_nodes[4:])
assert len(_pd.dfs["protein"].node_id) == 8
-@pytest.mark.parametrize('l', [4], scope='module')
+
+@pytest.mark.parametrize("l", [4], scope="module")
def test_edges(_pd, _get_edges):
_pd.add_tables(_get_edges)
assert "PERTURBED_IN_DISEASE" in _pd.dfs.keys()
@@ -46,11 +51,11 @@ def test_edges(_pd, _get_edges):
assert "p1" in _pd.dfs["Is_Mutated_In"]["target_id"].values
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_edges_gen(_pd, _get_edges):
def edge_gen():
for edge in _get_edges:
yield edge
_pd.add_tables(edge_gen())
- assert "PERTURBED_IN_DISEASE" in _pd.dfs.keys()
\ No newline at end of file
+ assert "PERTURBED_IN_DISEASE" in _pd.dfs.keys()
diff --git a/test/test_translate.py b/test/test_translate.py
index 558700a9..bf113158 100644
--- a/test/test_translate.py
+++ b/test/test_translate.py
@@ -6,68 +6,68 @@
def test_translate_nodes(translator):
id_type = [
(
- 'G9205',
- 'protein',
+ "G9205",
+ "protein",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
(
- 'hsa-miR-132-3p',
- 'mirna',
+ "hsa-miR-132-3p",
+ "mirna",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
(
- 'ASDB_OSBS',
- 'complex',
+ "ASDB_OSBS",
+ "complex",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
- ('REACT:25520', 'reactome', {}),
- ('agpl:001524', 'agpl', {}),
+ ("REACT:25520", "reactome", {}),
+ ("agpl:001524", "agpl", {}),
]
t = translator.translate_nodes(id_type)
assert all(type(n) == BioCypherNode for n in t)
t = translator.translate_nodes(id_type)
- assert next(t).get_label() == 'protein'
- assert next(t).get_label() == 'microRNA'
- assert next(t).get_label() == 'complex'
- assert next(t).get_label() == 'reactome.pathway'
- assert next(t).get_label() == 'altered gene product level'
+ assert next(t).get_label() == "protein"
+ assert next(t).get_label() == "microRNA"
+ assert next(t).get_label() == "complex"
+ assert next(t).get_label() == "reactome.pathway"
+ assert next(t).get_label() == "altered gene product level"
def test_specific_and_generic_ids(translator):
id_type = [
(
- 'CHAT',
- 'hgnc',
+ "CHAT",
+ "hgnc",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
- ('REACT:25520', 'reactome', {}),
+ ("REACT:25520", "reactome", {}),
]
t = list(translator.translate_nodes(id_type))
- assert t[0].get_id() == 'CHAT'
- assert t[0].get_properties().get('preferred_id') == 'hgnc'
- assert t[0].get_properties().get('id') == 'CHAT'
- assert t[1].get_id() == 'REACT:25520'
- assert t[1].get_properties().get('preferred_id') == 'reactome'
- assert t[1].get_properties().get('id') == 'REACT:25520'
+ assert t[0].get_id() == "CHAT"
+ assert t[0].get_properties().get("preferred_id") == "hgnc"
+ assert t[0].get_properties().get("id") == "CHAT"
+ assert t[1].get_id() == "REACT:25520"
+ assert t[1].get_properties().get("preferred_id") == "reactome"
+ assert t[1].get_properties().get("id") == "REACT:25520"
def test_translate_edges(translator):
# edge type association (defined in `schema_config.yaml`)
src_tar_type_edge = [
- ('G15258', 'MONDO1', 'gene_disease', {}),
- ('G15258', 'MONDO2', 'protein_disease', {}),
- ('G15258', 'G15242', 'phosphorylation', {}),
+ ("G15258", "MONDO1", "gene_disease", {}),
+ ("G15258", "MONDO2", "protein_disease", {}),
+ ("G15258", "G15242", "phosphorylation", {}),
]
def gen_edges():
@@ -76,34 +76,34 @@ def gen_edges():
t = translator.translate_edges(gen_edges())
assert type(next(t)) == BioCypherEdge
- assert next(t).get_label() == 'PERTURBED_IN_DISEASE'
- assert next(t).get_label() == 'phosphorylation'
+ assert next(t).get_label() == "PERTURBED_IN_DISEASE"
+ assert next(t).get_label() == "phosphorylation"
# node type association (defined in `schema_config.yaml`)
src_tar_type_node = [
(
- 'G21058',
- 'G50127',
- 'post_translational',
+ "G21058",
+ "G50127",
+ "post_translational",
{
- 'prop1': 'test',
+ "prop1": "test",
},
),
(
- 'G22418',
- 'G50123',
- 'post_translational',
+ "G22418",
+ "G50123",
+ "post_translational",
{
- 'directed': 'arbitrary_string',
+ "directed": "arbitrary_string",
},
),
(
- 'G15258',
- 'G16347',
- 'post_translational',
+ "G15258",
+ "G16347",
+ "post_translational",
{
- 'directed': True,
- 'effect': -1,
+ "directed": True,
+ "effect": -1,
},
),
]
@@ -114,16 +114,16 @@ def gen_edges():
n2 = t[1]
n3 = t[2]
- assert n1.get_source_edge().get_label() == 'IS_PART_OF'
- assert n2.get_source_edge().get_label() == 'IS_PART_OF'
- assert n3.get_target_edge().get_label() == 'IS_TARGET_OF'
+ assert n1.get_source_edge().get_label() == "IS_PART_OF"
+ assert n2.get_source_edge().get_label() == "IS_PART_OF"
+ assert n3.get_target_edge().get_label() == "IS_TARGET_OF"
assert (
- type(n1.get_node()) == BioCypherNode and
- type(n1.get_source_edge()) == BioCypherEdge and
- type(n1.get_target_edge()) == BioCypherEdge
+ type(n1.get_node()) == BioCypherNode
+ and type(n1.get_source_edge()) == BioCypherEdge
+ and type(n1.get_target_edge()) == BioCypherEdge
)
- assert n3.get_node().get_id() == 'G15258_G16347_True_-1'
- assert n3.get_source_edge().get_source_id() == 'G15258'
+ assert n3.get_node().get_id() == "G15258_G16347_True_-1"
+ assert n3.get_source_edge().get_source_id() == "G15258"
# def test_biolink_adapter(version_node, translator):
@@ -177,17 +177,17 @@ def test_merge_multiple_inputs_node(ontology_mapping, translator):
# define nodes
id_type = [
(
- 'CHAT',
- 'hgnc',
+ "CHAT",
+ "hgnc",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
(
- 'CHRNA4',
- 'ensg',
+ "CHRNA4",
+ "ensg",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
]
@@ -197,26 +197,27 @@ def test_merge_multiple_inputs_node(ontology_mapping, translator):
# check unique node type
assert not any(
- [s for s in ontology_mapping.extended_schema.keys() if '.gene' in s]
+ [s for s in ontology_mapping.extended_schema.keys() if ".gene" in s]
)
assert any(
- [s for s in ontology_mapping.extended_schema.keys() if '.pathway' in s]
+ [s for s in ontology_mapping.extended_schema.keys() if ".pathway" in s]
)
# check translator.translate_nodes for unique return type
assert all([type(n) == BioCypherNode for n in t])
- assert all([n.get_label() == 'gene' for n in t])
+ assert all([n.get_label() == "gene" for n in t])
+
def test_implicit_inheritance_node(translator):
id_type = [
(
- 'snrna1',
- 'intact_snrna',
+ "snrna1",
+ "intact_snrna",
{},
),
(
- 'snrna2',
- 'rnacentral_snrna',
+ "snrna2",
+ "rnacentral_snrna",
{},
),
]
@@ -224,8 +225,8 @@ def test_implicit_inheritance_node(translator):
t = list(translator.translate_nodes(id_type))
assert all([type(n) == BioCypherNode for n in t])
- assert t[0].get_label() == 'intact.snRNA sequence'
- assert t[1].get_label() == 'rnacentral.snRNA sequence'
+ assert t[0].get_label() == "intact.snRNA sequence"
+ assert t[1].get_label() == "rnacentral.snRNA sequence"
def test_merge_multiple_inputs_edge(ontology_mapping, translator):
@@ -237,19 +238,19 @@ def test_merge_multiple_inputs_edge(ontology_mapping, translator):
# define nodes
src_tar_type = [
(
- 'CHAT',
- 'AD',
- 'gene_disease',
+ "CHAT",
+ "AD",
+ "gene_disease",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
(
- 'CHRNA4',
- 'AD',
- 'protein_disease',
+ "CHRNA4",
+ "AD",
+ "protein_disease",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
]
@@ -258,100 +259,106 @@ def test_merge_multiple_inputs_edge(ontology_mapping, translator):
# check unique edge type
assert not any(
[
- s for s in ontology_mapping.extended_schema.keys()
- if '.gene to disease association' in s
+ s
+ for s in ontology_mapping.extended_schema.keys()
+ if ".gene to disease association" in s
],
)
assert any(
[
- s for s in ontology_mapping.extended_schema.keys()
- if '.sequence variant' in s
+ s
+ for s in ontology_mapping.extended_schema.keys()
+ if ".sequence variant" in s
],
)
# check translator.translate_nodes for unique return type
assert all([type(e) == BioCypherEdge for e in t])
- assert all([e.get_label() == 'PERTURBED_IN_DISEASE' for e in t])
+ assert all([e.get_label() == "PERTURBED_IN_DISEASE" for e in t])
+
def test_implicit_inheritance_edge(translator):
src_tar_type = [
(
- 'mut1',
- 'var1',
- 'gene1',
- 'VARIANT_FOUND_IN_GENE_Known_variant_Gene',
+ "mut1",
+ "var1",
+ "gene1",
+ "VARIANT_FOUND_IN_GENE_Known_variant_Gene",
{},
),
(
- 'mut2',
- 'var2',
- 'gene2',
- 'VARIANT_FOUND_IN_GENE_Somatic_mutation_Gene',
+ "mut2",
+ "var2",
+ "gene2",
+ "VARIANT_FOUND_IN_GENE_Somatic_mutation_Gene",
{},
),
]
t = list(translator.translate_edges(src_tar_type))
assert all([type(e) == BioCypherEdge for e in t])
- assert t[0].get_label() == 'known.sequence variant.variant to gene association'
- assert t[1].get_label() == 'somatic.sequence variant.variant to gene association'
+ assert (
+ t[0].get_label() == "known.sequence variant.variant to gene association"
+ )
+ assert (
+ t[1].get_label()
+ == "somatic.sequence variant.variant to gene association"
+ )
-def test_virtual_leaves_inherit_is_a(ontology_mapping):
- snrna = ontology_mapping.extended_schema.get('intact.snRNA sequence')
+def test_virtual_leaves_inherit_is_a(ontology_mapping):
+ snrna = ontology_mapping.extended_schema.get("intact.snRNA sequence")
- assert 'is_a' in snrna.keys()
- assert snrna['is_a'] == ['snRNA sequence', 'nucleic acid entity']
+ assert "is_a" in snrna.keys()
+ assert snrna["is_a"] == ["snRNA sequence", "nucleic acid entity"]
- dsdna = ontology_mapping.extended_schema.get('intact.dsDNA sequence')
+ dsdna = ontology_mapping.extended_schema.get("intact.dsDNA sequence")
- assert dsdna['is_a'] == [
- 'dsDNA sequence',
- 'DNA sequence',
- 'nucleic acid entity',
+ assert dsdna["is_a"] == [
+ "dsDNA sequence",
+ "DNA sequence",
+ "nucleic acid entity",
]
def test_virtual_leaves_inherit_properties(ontology_mapping):
+ snrna = ontology_mapping.extended_schema.get("intact.snRNA sequence")
- snrna = ontology_mapping.extended_schema.get('intact.snRNA sequence')
-
- assert 'properties' in snrna.keys()
- assert 'exclude_properties' in snrna.keys()
+ assert "properties" in snrna.keys()
+ assert "exclude_properties" in snrna.keys()
def test_inherit_properties(ontology_mapping):
+ dsdna = ontology_mapping.extended_schema.get("intact.dsDNA sequence")
- dsdna = ontology_mapping.extended_schema.get('intact.dsDNA sequence')
-
- assert 'properties' in dsdna.keys()
- assert 'sequence' in dsdna['properties']
+ assert "properties" in dsdna.keys()
+ assert "sequence" in dsdna["properties"]
def test_properties_from_config(translator):
id_type = [
(
- 'G49205',
- 'protein',
+ "G49205",
+ "protein",
{
- 'taxon': 9606,
- 'name': 'test',
+ "taxon": 9606,
+ "name": "test",
},
),
(
- 'G92035',
- 'protein',
+ "G92035",
+ "protein",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
(
- 'G92205',
- 'protein',
+ "G92205",
+ "protein",
{
- 'taxon': 9606,
- 'name': 'test2',
- 'test': 'should_not_be_returned',
+ "taxon": 9606,
+ "name": "test2",
+ "test": "should_not_be_returned",
},
),
]
@@ -359,32 +366,32 @@ def test_properties_from_config(translator):
r = list(t)
assert (
- 'name' in r[0].get_properties().keys() and
- 'name' in r[1].get_properties().keys() and
- 'test' not in r[2].get_properties().keys()
+ "name" in r[0].get_properties().keys()
+ and "name" in r[1].get_properties().keys()
+ and "test" not in r[2].get_properties().keys()
)
src_tar_type = [
(
- 'G49205',
- 'AD',
- 'gene_gene',
+ "G49205",
+ "AD",
+ "gene_gene",
{
- 'directional': True,
- 'score': 0.5,
- 'id': 'should_not_be_returned',
+ "directional": True,
+ "score": 0.5,
+ "id": "should_not_be_returned",
},
),
(
- 'G92035',
- 'AD',
- 'gene_gene',
+ "G92035",
+ "AD",
+ "gene_gene",
{
- 'directional': False,
- 'curated': True,
- 'score': 0.5,
- 'test': 'should_not_be_returned',
- 'id': 'should_not_be_returned',
+ "directional": False,
+ "curated": True,
+ "score": 0.5,
+ "test": "should_not_be_returned",
+ "id": "should_not_be_returned",
},
),
]
@@ -393,32 +400,32 @@ def test_properties_from_config(translator):
r = list(t)
assert (
- 'directional' in r[0].get_properties().keys() and
- 'directional' in r[1].get_properties().keys() and
- 'curated' in r[1].get_properties().keys() and
- 'score' in r[0].get_properties().keys() and
- 'score' in r[1].get_properties().keys() and
- 'test' not in r[1].get_properties().keys() and
- 'id' not in r[0].get_properties().keys() and
- 'id' not in r[1].get_properties().keys()
+ "directional" in r[0].get_properties().keys()
+ and "directional" in r[1].get_properties().keys()
+ and "curated" in r[1].get_properties().keys()
+ and "score" in r[0].get_properties().keys()
+ and "score" in r[1].get_properties().keys()
+ and "test" not in r[1].get_properties().keys()
+ and "id" not in r[0].get_properties().keys()
+ and "id" not in r[1].get_properties().keys()
)
def test_exclude_properties(translator):
id_type = [
(
- 'CHAT',
- 'ensg',
+ "CHAT",
+ "ensg",
{
- 'taxon': 9606,
- 'accession': 'should_not_be_returned',
+ "taxon": 9606,
+ "accession": "should_not_be_returned",
},
),
(
- 'ACHE',
- 'ensg',
+ "ACHE",
+ "ensg",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
]
@@ -426,29 +433,29 @@ def test_exclude_properties(translator):
r = list(t)
assert (
- 'taxon' in r[0].get_properties().keys() and
- 'taxon' in r[1].get_properties().keys() and
- 'accession' not in r[0].get_properties().keys()
+ "taxon" in r[0].get_properties().keys()
+ and "taxon" in r[1].get_properties().keys()
+ and "accession" not in r[0].get_properties().keys()
)
src_tar_type = [
(
- 'G49205',
- 'AD',
- 'gene_disease',
+ "G49205",
+ "AD",
+ "gene_disease",
{
- 'directional': True,
- 'score': 0.5,
+ "directional": True,
+ "score": 0.5,
},
),
(
- 'G92035',
- 'AD',
- 'gene_disease',
+ "G92035",
+ "AD",
+ "gene_disease",
{
- 'directional': False,
- 'score': 0.5,
- 'accession': 'should_not_be_returned',
+ "directional": False,
+ "score": 0.5,
+ "accession": "should_not_be_returned",
},
),
]
@@ -457,51 +464,51 @@ def test_exclude_properties(translator):
r = list(t)
assert (
- 'directional' in r[0].get_properties().keys() and
- 'directional' in r[1].get_properties().keys() and
- 'score' in r[0].get_properties().keys() and
- 'score' in r[1].get_properties().keys() and
- 'accession' not in r[1].get_properties().keys()
+ "directional" in r[0].get_properties().keys()
+ and "directional" in r[1].get_properties().keys()
+ and "score" in r[0].get_properties().keys()
+ and "score" in r[1].get_properties().keys()
+ and "accession" not in r[1].get_properties().keys()
)
# we need to load the adapter because the mappings are passed from the adapter
# to the translator
def test_translate_term(translator):
- assert translator.translate_term('hgnc') == 'Gene'
+ assert translator.translate_term("hgnc") == "Gene"
assert (
- translator.translate_term('protein_disease') == 'PERTURBED_IN_DISEASE'
+ translator.translate_term("protein_disease") == "PERTURBED_IN_DISEASE"
)
def test_reverse_translate_term(translator):
- assert 'hgnc' in translator.reverse_translate_term('Gene')
- assert 'protein_disease' in translator.reverse_translate_term(
- 'PERTURBED_IN_DISEASE',
+ assert "hgnc" in translator.reverse_translate_term("Gene")
+ assert "protein_disease" in translator.reverse_translate_term(
+ "PERTURBED_IN_DISEASE",
)
def test_translate_query(translator):
# we translate to PascalCase for cypher queries, not to internal
# sentence case
- query = 'MATCH (n:hgnc)-[r:gene_disease]->(d:Disease) RETURN n'
+ query = "MATCH (n:hgnc)-[r:gene_disease]->(d:Disease) RETURN n"
assert (
- translator.translate(query) ==
- 'MATCH (n:Gene)-[r:PERTURBED_IN_DISEASE]->(d:Disease) RETURN n'
+ translator.translate(query)
+ == "MATCH (n:Gene)-[r:PERTURBED_IN_DISEASE]->(d:Disease) RETURN n"
)
def test_reverse_translate_query(translator):
# TODO cannot use sentence case in this context. include sentence to
# pascal case and back in translation?
- query = 'MATCH (n:Known.SequenceVariant)-[r:Known.SequenceVariant.VariantToGeneAssociation]->(g:Gene) RETURN n'
+ query = "MATCH (n:Known.SequenceVariant)-[r:Known.SequenceVariant.VariantToGeneAssociation]->(g:Gene) RETURN n"
with pytest.raises(NotImplementedError):
translator.reverse_translate(query)
- query = 'MATCH (n:Known.SequenceVariant)-[r:Known.SequenceVariant.VariantToGeneAssociation]->(g:Protein) RETURN n'
+ query = "MATCH (n:Known.SequenceVariant)-[r:Known.SequenceVariant.VariantToGeneAssociation]->(g:Protein) RETURN n"
assert (
- translator.reverse_translate(query) ==
- 'MATCH (n:Known_variant)-[r:VARIANT_FOUND_IN_GENE_Known_variant_Gene]->(g:protein) RETURN n'
+ translator.reverse_translate(query)
+ == "MATCH (n:Known_variant)-[r:VARIANT_FOUND_IN_GENE_Known_variant_Gene]->(g:protein) RETURN n"
)
@@ -509,67 +516,64 @@ def test_log_missing_nodes(translator):
tn = translator.translate_nodes(
[
(
- 'G49205',
- 'missing_protein',
+ "G49205",
+ "missing_protein",
{
- 'taxon': 9606,
+ "taxon": 9606,
},
),
- ('G92035', 'missing_protein', {}),
- ('REACT:25520', 'missing_pathway', {}),
+ ("G92035", "missing_protein", {}),
+ ("REACT:25520", "missing_pathway", {}),
],
)
tn = list(tn)
m = translator.get_missing_biolink_types()
- assert m.get('missing_protein') == 2
- assert m.get('missing_pathway') == 1
+ assert m.get("missing_protein") == 2
+ assert m.get("missing_pathway") == 1
def test_strict_mode_error(translator):
translator.strict_mode = True
n1 = (
- 'n2', 'Test', {
- 'prop': 'val',
- 'source': 'test',
- 'licence': 'test',
- 'version': 'test'
- }
+ "n2",
+ "Test",
+ {"prop": "val", "source": "test", "licence": "test", "version": "test"},
)
assert list(translator.translate_nodes([n1])) is not None
# test 'license' instead of 'licence'
n2 = (
- 'n2', 'Test', {
- 'prop': 'val',
- 'source': 'test',
- 'license': 'test',
- 'version': 'test'
- }
+ "n2",
+ "Test",
+ {"prop": "val", "source": "test", "license": "test", "version": "test"},
)
assert list(translator.translate_nodes([n2])) is not None
- n3 = ('n1', 'Test', {'prop': 'val'})
+ n3 = ("n1", "Test", {"prop": "val"})
with pytest.raises(ValueError):
list(translator.translate_nodes([n1, n2, n3]))
e1 = (
- 'n1', 'n2', 'Test', {
- 'prop': 'val',
- 'source': 'test',
- 'licence': 'test',
- 'version': 'test',
- }
+ "n1",
+ "n2",
+ "Test",
+ {
+ "prop": "val",
+ "source": "test",
+ "licence": "test",
+ "version": "test",
+ },
)
assert list(translator.translate_edges([e1])) is not None
- e2 = ('n1', 'n2', 'Test', {'prop': 'val'})
+ e2 = ("n1", "n2", "Test", {"prop": "val"})
with pytest.raises(ValueError):
list(translator.translate_edges([e1, e2]))
@@ -579,16 +583,18 @@ def test_strict_mode_property_filter(translator):
translator.strict_mode = True
p1 = (
- 'p1', 'protein', {
- 'taxon': 9606,
- 'source': 'test',
- 'licence': 'test',
- 'version': 'test',
- }
+ "p1",
+ "protein",
+ {
+ "taxon": 9606,
+ "source": "test",
+ "licence": "test",
+ "version": "test",
+ },
)
l = list(translator.translate_nodes([p1]))
- assert 'source' in l[0].get_properties().keys()
- assert 'licence' in l[0].get_properties().keys()
- assert 'version' in l[0].get_properties().keys()
+ assert "source" in l[0].get_properties().keys()
+ assert "licence" in l[0].get_properties().keys()
+ assert "version" in l[0].get_properties().keys()
diff --git a/test/test_write_arango.py b/test/test_write_arango.py
index e7e0a38b..d3639b2b 100644
--- a/test/test_write_arango.py
+++ b/test/test_write_arango.py
@@ -3,7 +3,7 @@
import pytest
-@pytest.mark.parametrize('l', [4], scope='function')
+@pytest.mark.parametrize("l", [4], scope="function")
def test_arango_write_data_headers_import_call(
bw_arango,
_get_nodes,
@@ -25,19 +25,19 @@ def test_arango_write_data_headers_import_call(
tmp_path = bw_arango.outdir
- ph_csv = os.path.join(tmp_path, 'Protein-header.csv')
- pp_1_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- pp_2_csv = os.path.join(tmp_path, 'Protein-part001.csv')
- mh_csv = os.path.join(tmp_path, 'MicroRNA-header.csv')
- mp_1_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
- mp_2_csv = os.path.join(tmp_path, 'MicroRNA-part001.csv')
- dh_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-header.csv')
- dp_1_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- dp_2_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part001.csv')
- muh_csv = os.path.join(tmp_path, 'Is_Mutated_In-header.csv')
- mup_1_csv = os.path.join(tmp_path, 'Is_Mutated_In-part000.csv')
- mup_2_csv = os.path.join(tmp_path, 'Is_Mutated_In-part001.csv')
- call_csv = os.path.join(tmp_path, 'arangodb-import-call.sh')
+ ph_csv = os.path.join(tmp_path, "Protein-header.csv")
+ pp_1_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ pp_2_csv = os.path.join(tmp_path, "Protein-part001.csv")
+ mh_csv = os.path.join(tmp_path, "MicroRNA-header.csv")
+ mp_1_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
+ mp_2_csv = os.path.join(tmp_path, "MicroRNA-part001.csv")
+ dh_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-header.csv")
+ dp_1_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ dp_2_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part001.csv")
+ muh_csv = os.path.join(tmp_path, "Is_Mutated_In-header.csv")
+ mup_1_csv = os.path.join(tmp_path, "Is_Mutated_In-part000.csv")
+ mup_2_csv = os.path.join(tmp_path, "Is_Mutated_In-part001.csv")
+ call_csv = os.path.join(tmp_path, "arangodb-import-call.sh")
with open(ph_csv) as f:
ph = f.read()
@@ -66,23 +66,31 @@ def test_arango_write_data_headers_import_call(
with open(call_csv) as f:
call = f.read()
- assert ph == '_key,name,score,taxon,genes,id,preferred_id'
- assert mh == '_key,name,taxon,id,preferred_id'
- assert '_from' in dh
- assert '_key' in dh
- assert '_to' in dh
- assert '_from' in muh
- assert '_key' in muh
- assert '_to' in muh
- assert len(pp_1) == len(pp_2) == len(mp_1) == len(mp_2) == len(dp_1) == len(
- dp_2
- ) == len(mup_1) == len(mup_2) == 2
- assert 'arangoimp --type csv' in call
- assert '--collection proteins' in call
- assert 'MicroRNA-part' in call
+ assert ph == "_key,name,score,taxon,genes,id,preferred_id"
+ assert mh == "_key,name,taxon,id,preferred_id"
+ assert "_from" in dh
+ assert "_key" in dh
+ assert "_to" in dh
+ assert "_from" in muh
+ assert "_key" in muh
+ assert "_to" in muh
+ assert (
+ len(pp_1)
+ == len(pp_2)
+ == len(mp_1)
+ == len(mp_2)
+ == len(dp_1)
+ == len(dp_2)
+ == len(mup_1)
+ == len(mup_2)
+ == 2
+ )
+ assert "arangoimp --type csv" in call
+ assert "--collection proteins" in call
+ assert "MicroRNA-part" in call
# custom import call executable path
- bw_arango.import_call_bin_prefix = 'custom/path/to/'
+ bw_arango.import_call_bin_prefix = "custom/path/to/"
os.remove(call_csv)
bw_arango.write_import_call()
@@ -90,4 +98,4 @@ def test_arango_write_data_headers_import_call(
with open(call_csv) as f:
call = f.read()
- assert 'custom/path/to/arangoimp --type csv' in call
+ assert "custom/path/to/arangoimp --type csv" in call
diff --git a/test/test_write_neo4j.py b/test/test_write_neo4j.py
index c065e504..3092488d 100644
--- a/test/test_write_neo4j.py
+++ b/test/test_write_neo4j.py
@@ -8,12 +8,12 @@
def test_neo4j_writer_and_output_dir(bw):
-
tmp_path = bw.outdir
assert (
- os.path.isdir(tmp_path) and isinstance(bw, _Neo4jBatchWriter) and
- bw.delim == ';'
+ os.path.isdir(tmp_path)
+ and isinstance(bw, _Neo4jBatchWriter)
+ and bw.delim == ";"
)
@@ -22,25 +22,25 @@ def test_create_import_call(bw):
le = 4
for i in range(le):
n = BioCypherNode(
- f'i{i+1}',
- 'post translational interaction',
+ f"i{i+1}",
+ "post translational interaction",
)
e1 = BioCypherEdge(
- source_id=f'i{i+1}',
- target_id=f'p{i+1}',
- relationship_label='IS_SOURCE_OF',
+ source_id=f"i{i+1}",
+ target_id=f"p{i+1}",
+ relationship_label="IS_SOURCE_OF",
)
e2 = BioCypherEdge(
- source_id=f'i{i}',
- target_id=f'p{i+2}',
- relationship_label='IS_TARGET_OF',
+ source_id=f"i{i}",
+ target_id=f"p{i+2}",
+ relationship_label="IS_TARGET_OF",
)
mixed.append(BioCypherRelAsNode(n, e1, e2))
e3 = BioCypherEdge(
- source_id=f'p{i+1}',
- target_id=f'p{i+1}',
- relationship_label='PERTURBED_IN_DISEASE',
+ source_id=f"p{i+1}",
+ target_id=f"p{i+1}",
+ relationship_label="PERTURBED_IN_DISEASE",
)
mixed.append(e3)
@@ -56,13 +56,25 @@ def gen(lis):
assert passed
assert 'bin/neo4j-admin import --database=neo4j --delimiter=";" ' in call
assert '--array-delimiter="|" --quote="\'" --force=true ' in call
- assert f'--nodes="{tmp_path}/PostTranslationalInteraction-header.csv,{tmp_path}/PostTranslationalInteraction-part.*" ' in call
- assert f'--relationships="{tmp_path}/IS_SOURCE_OF-header.csv,{tmp_path}/IS_SOURCE_OF-part.*" ' in call
- assert f'--relationships="{tmp_path}/IS_TARGET_OF-header.csv,{tmp_path}/IS_TARGET_OF-part.*" ' in call
- assert f'--relationships="{tmp_path}/PERTURBED_IN_DISEASE-header.csv,{tmp_path}/PERTURBED_IN_DISEASE-part.*" ' in call
+ assert (
+ f'--nodes="{tmp_path}/PostTranslationalInteraction-header.csv,{tmp_path}/PostTranslationalInteraction-part.*" '
+ in call
+ )
+ assert (
+ f'--relationships="{tmp_path}/IS_SOURCE_OF-header.csv,{tmp_path}/IS_SOURCE_OF-part.*" '
+ in call
+ )
+ assert (
+ f'--relationships="{tmp_path}/IS_TARGET_OF-header.csv,{tmp_path}/IS_TARGET_OF-part.*" '
+ in call
+ )
+ assert (
+ f'--relationships="{tmp_path}/PERTURBED_IN_DISEASE-header.csv,{tmp_path}/PERTURBED_IN_DISEASE-part.*" '
+ in call
+ )
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_neo4j_write_node_data_headers_import_call(bw, _get_nodes):
# four proteins, four miRNAs
nodes = _get_nodes
@@ -75,9 +87,9 @@ def test_neo4j_write_node_data_headers_import_call(bw, _get_nodes):
tmp_path = bw.outdir
- p_csv = os.path.join(tmp_path, 'Protein-header.csv')
- m_csv = os.path.join(tmp_path, 'MicroRNA-header.csv')
- call = os.path.join(tmp_path, 'neo4j-admin-import-call.sh')
+ p_csv = os.path.join(tmp_path, "Protein-header.csv")
+ m_csv = os.path.join(tmp_path, "MicroRNA-header.csv")
+ call = os.path.join(tmp_path, "neo4j-admin-import-call.sh")
with open(p_csv) as f:
p = f.read()
@@ -86,20 +98,23 @@ def test_neo4j_write_node_data_headers_import_call(bw, _get_nodes):
with open(call) as f:
c = f.read()
- assert p == ':ID;name;score:double;taxon:long;genes:string[];id;preferred_id;:LABEL'
- assert m == ':ID;name;taxon:long;id;preferred_id;:LABEL'
- assert 'bin/neo4j-admin import' in c
- assert '--database=neo4j' in c
+ assert (
+ p
+ == ":ID;name;score:double;taxon:long;genes:string[];id;preferred_id;:LABEL"
+ )
+ assert m == ":ID;name;taxon:long;id;preferred_id;:LABEL"
+ assert "bin/neo4j-admin import" in c
+ assert "--database=neo4j" in c
assert '--delimiter=";"' in c
- assert '--force=true' in c
+ assert "--force=true" in c
assert '--nodes="' in c
- assert 'Protein-header.csv' in c
+ assert "Protein-header.csv" in c
assert 'Protein-part.*"' in c
- assert 'MicroRNA-header.csv' in c
+ assert "MicroRNA-header.csv" in c
assert 'MicroRNA-part.*"' in c
# custom import call executable path
- bw.import_call_bin_prefix = 'custom/path/'
+ bw.import_call_bin_prefix = "custom/path/"
os.remove(call)
bw.write_import_call()
@@ -107,7 +122,7 @@ def test_neo4j_write_node_data_headers_import_call(bw, _get_nodes):
with open(call) as f:
c = f.read()
- assert 'custom/path/neo4j-admin import' in c
+ assert "custom/path/neo4j-admin import" in c
# custom file prefix
# TODO
@@ -118,9 +133,9 @@ def test_write_hybrid_ontology_nodes(bw):
for i in range(4):
nodes.append(
BioCypherNode(
- node_id=f'agpl:000{i}',
- node_label='altered gene product level',
- properties={}
+ node_id=f"agpl:000{i}",
+ node_label="altered gene product level",
+ properties={},
)
)
@@ -130,8 +145,8 @@ def test_write_hybrid_ontology_nodes(bw):
tmp_path = bw.outdir
- h_csv = os.path.join(tmp_path, 'AlteredGeneProductLevel-header.csv')
- p_csv = os.path.join(tmp_path, 'AlteredGeneProductLevel-part000.csv')
+ h_csv = os.path.join(tmp_path, "AlteredGeneProductLevel-header.csv")
+ p_csv = os.path.join(tmp_path, "AlteredGeneProductLevel-part000.csv")
with open(h_csv) as f:
header = f.read()
@@ -139,23 +154,23 @@ def test_write_hybrid_ontology_nodes(bw):
with open(p_csv) as f:
part = f.read()
- assert header == ':ID;id;preferred_id;:LABEL'
+ assert header == ":ID;id;preferred_id;:LABEL"
assert "agpl:0000;'agpl:0000';'id'" in part
- assert 'AlteredGeneProductLevel' in part
- assert 'BiologicalEntity' in part
+ assert "AlteredGeneProductLevel" in part
+ assert "BiologicalEntity" in part
def test_property_types(bw):
nodes = []
for i in range(4):
bnp = BioCypherNode(
- node_id=f'p{i+1}',
- node_label='protein',
+ node_id=f"p{i+1}",
+ node_label="protein",
properties={
- 'score': 4 / (i + 1),
- 'name': 'StringProperty1',
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2'],
+ "score": 4 / (i + 1),
+ "name": "StringProperty1",
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
},
)
nodes.append(bnp)
@@ -164,8 +179,8 @@ def test_property_types(bw):
tmp_path = bw.outdir
- d_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- h_csv = os.path.join(tmp_path, 'Protein-header.csv')
+ d_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ h_csv = os.path.join(tmp_path, "Protein-header.csv")
with open(d_csv) as f:
data = f.read()
@@ -174,12 +189,15 @@ def test_property_types(bw):
header = f.read()
assert passed
- assert header == ':ID;name;score:double;taxon:long;genes:string[];id;preferred_id;:LABEL'
+ assert (
+ header
+ == ":ID;name;score:double;taxon:long;genes:string[];id;preferred_id;:LABEL"
+ )
assert "p1;'StringProperty1';4.0;9606;'gene1|gene2';'p1';'id'" in data
- assert 'BiologicalEntity' in data
+ assert "BiologicalEntity" in data
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_node_data_from_list(bw, _get_nodes):
nodes = _get_nodes
@@ -187,8 +205,8 @@ def test_write_node_data_from_list(bw, _get_nodes):
tmp_path = bw.outdir
- p_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
+ p_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
with open(p_csv) as f:
pr = f.read()
@@ -198,12 +216,12 @@ def test_write_node_data_from_list(bw, _get_nodes):
assert passed
assert "p1;'StringProperty1';4.0;9606;'gene1|gene2';'p1';'uniprot'" in pr
- assert 'BiologicalEntity' in pr
+ assert "BiologicalEntity" in pr
assert "m1;'StringProperty1';9606;'m1';'mirbase'" in mi
- assert 'ChemicalEntity' in mi
+ assert "ChemicalEntity" in mi
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_node_data_from_gen(bw, _get_nodes):
nodes = _get_nodes
@@ -214,8 +232,8 @@ def node_gen(nodes):
tmp_path = bw.outdir
- p_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
+ p_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
with open(p_csv) as f:
pr = f.read()
@@ -225,9 +243,9 @@ def node_gen(nodes):
assert passed
assert "p1;'StringProperty1';4.0;9606;'gene1|gene2';'p1';'uniprot'" in pr
- assert 'BiologicalEntity' in pr
+ assert "BiologicalEntity" in pr
assert "m1;'StringProperty1';9606;'m1';'mirbase'" in mi
- assert 'ChemicalEntity' in mi
+ assert "ChemicalEntity" in mi
def test_write_node_data_from_gen_no_props(bw):
@@ -235,19 +253,19 @@ def test_write_node_data_from_gen_no_props(bw):
le = 4
for i in range(le):
bnp = BioCypherNode(
- node_id=f'p{i+1}',
- node_label='protein',
+ node_id=f"p{i+1}",
+ node_label="protein",
properties={
- 'score': 4 / (i + 1),
- 'name': 'StringProperty1',
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2'],
+ "score": 4 / (i + 1),
+ "name": "StringProperty1",
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
},
)
nodes.append(bnp)
bnm = BioCypherNode(
- node_id=f'm{i+1}',
- node_label='microRNA',
+ node_id=f"m{i+1}",
+ node_label="microRNA",
)
nodes.append(bnm)
@@ -258,8 +276,8 @@ def node_gen(nodes):
tmp_path = bw.outdir
- p_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m_csv = os.path.join(tmp_path, 'microRNA-part000.csv')
+ p_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m_csv = os.path.join(tmp_path, "microRNA-part000.csv")
with open(p_csv) as f:
pr = f.read()
@@ -269,12 +287,12 @@ def node_gen(nodes):
assert passed
assert "p1;'StringProperty1';4.0;9606;'gene1|gene2';'p1';'id'" in pr
- assert 'BiologicalEntity' in pr
+ assert "BiologicalEntity" in pr
assert "m1;'m1';'id'" in mi
- assert 'ChemicalEntity' in mi
+ assert "ChemicalEntity" in mi
-@pytest.mark.parametrize('l', [int(1e4 + 4)], scope='module')
+@pytest.mark.parametrize("l", [int(1e4 + 4)], scope="module")
def test_write_node_data_from_large_gen(bw, _get_nodes):
nodes = _get_nodes
@@ -288,10 +306,10 @@ def node_gen(nodes):
tmp_path = bw.outdir
- p0_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m0_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
- p1_csv = os.path.join(tmp_path, 'Protein-part001.csv')
- m1_csv = os.path.join(tmp_path, 'MicroRNA-part001.csv')
+ p0_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m0_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
+ p1_csv = os.path.join(tmp_path, "Protein-part001.csv")
+ m1_csv = os.path.join(tmp_path, "MicroRNA-part001.csv")
pr_lines = sum(1 for _ in open(p0_csv))
mi_lines = sum(1 for _ in open(m0_csv))
@@ -299,23 +317,26 @@ def node_gen(nodes):
mi_lines1 = sum(1 for _ in open(m1_csv))
assert (
- passed and pr_lines == 1e4 and mi_lines == 1e4 and pr_lines1 == 4 and
- mi_lines1 == 4
+ passed
+ and pr_lines == 1e4
+ and mi_lines == 1e4
+ and pr_lines1 == 4
+ and mi_lines1 == 4
)
-@pytest.mark.parametrize('l', [1], scope='module')
+@pytest.mark.parametrize("l", [1], scope="module")
def test_too_many_properties(bw, _get_nodes):
nodes = _get_nodes
bn1 = BioCypherNode(
- node_id='p0',
- node_label='protein',
+ node_id="p0",
+ node_label="protein",
properties={
- 'p1': 'StringProperty1',
- 'p2': 'StringProperty2',
- 'p3': 'StringProperty3',
- 'p4': 'StringProperty4',
+ "p1": "StringProperty1",
+ "p2": "StringProperty2",
+ "p3": "StringProperty3",
+ "p4": "StringProperty4",
},
)
nodes.append(bn1)
@@ -331,14 +352,14 @@ def node_gen(nodes):
assert not passed
-@pytest.mark.parametrize('l', [1], scope='module')
+@pytest.mark.parametrize("l", [1], scope="module")
def test_not_enough_properties(bw, _get_nodes):
nodes = _get_nodes
bn1 = BioCypherNode(
- node_id='p0',
- node_label='protein',
- properties={'p1': 'StringProperty1'},
+ node_id="p0",
+ node_label="protein",
+ properties={"p1": "StringProperty1"},
)
nodes.append(bn1)
@@ -352,7 +373,7 @@ def node_gen(nodes):
tmp_path = bw.outdir
- p0_csv = os.path.join(tmp_path, 'Protein-part000.csv')
+ p0_csv = os.path.join(tmp_path, "Protein-part000.csv")
assert not passed and not isfile(p0_csv)
@@ -363,31 +384,31 @@ def test_write_none_type_property_and_order_invariance(bw):
nodes = []
bnp1 = BioCypherNode(
- node_id=f'p1',
- node_label='protein',
+ node_id=f"p1",
+ node_label="protein",
properties={
- 'taxon': 9606,
- 'score': 1,
- 'name': None,
- 'genes': None,
+ "taxon": 9606,
+ "score": 1,
+ "name": None,
+ "genes": None,
},
)
bnp2 = BioCypherNode(
- node_id=f'p2',
- node_label='protein',
+ node_id=f"p2",
+ node_label="protein",
properties={
- 'name': None,
- 'genes': ['gene1', 'gene2'],
- 'score': 2,
- 'taxon': 9606,
+ "name": None,
+ "genes": ["gene1", "gene2"],
+ "score": 2,
+ "taxon": 9606,
},
)
bnm = BioCypherNode(
- node_id=f'm1',
- node_label='microRNA',
+ node_id=f"m1",
+ node_label="microRNA",
properties={
- 'name': None,
- 'taxon': 9606,
+ "name": None,
+ "taxon": 9606,
},
)
nodes.append(bnp1)
@@ -404,16 +425,16 @@ def node_gen(nodes):
tmp_path = bw.outdir
- p0_csv = os.path.join(tmp_path, 'Protein-part000.csv')
+ p0_csv = os.path.join(tmp_path, "Protein-part000.csv")
with open(p0_csv) as f:
p = f.read()
assert passed
assert "p1;;1;9606;;'p1';'id'" in p
- assert 'BiologicalEntity' in p
+ assert "BiologicalEntity" in p
-@pytest.mark.parametrize('l', [int(1e4)], scope='module')
+@pytest.mark.parametrize("l", [int(1e4)], scope="module")
def test_accidental_exact_batch_size(bw, _get_nodes):
nodes = _get_nodes
@@ -427,16 +448,16 @@ def node_gen(nodes):
tmp_path = bw.outdir
- p0_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m0_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
- p1_csv = os.path.join(tmp_path, 'Protein-part001.csv')
- m1_csv = os.path.join(tmp_path, 'MicroRNA-part001.csv')
+ p0_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m0_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
+ p1_csv = os.path.join(tmp_path, "Protein-part001.csv")
+ m1_csv = os.path.join(tmp_path, "MicroRNA-part001.csv")
pr_lines = sum(1 for _ in open(p0_csv))
mi_lines = sum(1 for _ in open(m0_csv))
- ph_csv = os.path.join(tmp_path, 'Protein-header.csv')
- mh_csv = os.path.join(tmp_path, 'MicroRNA-header.csv')
+ ph_csv = os.path.join(tmp_path, "Protein-header.csv")
+ mh_csv = os.path.join(tmp_path, "MicroRNA-header.csv")
with open(ph_csv) as f:
p = f.read()
@@ -444,14 +465,18 @@ def node_gen(nodes):
m = f.read()
assert (
- passed and pr_lines == 1e4 and mi_lines == 1e4 and
- not isfile(p1_csv) and not isfile(m1_csv) and p ==
- ':ID;name;score:double;taxon:long;genes:string[];id;preferred_id;:LABEL'
- and m == ':ID;name;taxon:long;id;preferred_id;:LABEL'
+ passed
+ and pr_lines == 1e4
+ and mi_lines == 1e4
+ and not isfile(p1_csv)
+ and not isfile(m1_csv)
+ and p
+ == ":ID;name;score:double;taxon:long;genes:string[];id;preferred_id;:LABEL"
+ and m == ":ID;name;taxon:long;id;preferred_id;:LABEL"
)
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_edge_data_from_gen(bw, _get_edges):
edges = _get_edges
@@ -462,8 +487,8 @@ def edge_gen(edges):
tmp_path = bw.outdir
- pid_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- imi_csv = os.path.join(tmp_path, 'Is_Mutated_In-part000.csv')
+ pid_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ imi_csv = os.path.join(tmp_path, "Is_Mutated_In-part000.csv")
with open(pid_csv) as f:
l = f.read()
@@ -483,7 +508,7 @@ def edge_gen(edges):
assert "4;" in l
assert "p2;" in l
assert "PERTURBED_IN_DISEASE" in l
- assert '\n' in l
+ assert "\n" in l
assert "m0;" in c
assert "mrel0;" in c
assert "'3-UTR';" in c
@@ -496,12 +521,11 @@ def edge_gen(edges):
assert "1;" in c
assert "p2;" in c
assert "Is_Mutated_In" in c
- assert '\n' in c
+ assert "\n" in c
-@pytest.mark.parametrize('l', [int(1e4 + 4)], scope='module')
+@pytest.mark.parametrize("l", [int(1e4 + 4)], scope="module")
def test_write_edge_data_from_large_gen(bw, _get_edges):
-
edges = _get_edges
def edge_gen(edges):
@@ -511,10 +535,10 @@ def edge_gen(edges):
tmp_path = bw.outdir
- apl0_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- ips0_csv = os.path.join(tmp_path, 'Is_Mutated_In-part000.csv')
- apl1_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part001.csv')
- ips1_csv = os.path.join(tmp_path, 'Is_Mutated_In-part001.csv')
+ apl0_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ ips0_csv = os.path.join(tmp_path, "Is_Mutated_In-part000.csv")
+ apl1_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part001.csv")
+ ips1_csv = os.path.join(tmp_path, "Is_Mutated_In-part001.csv")
l_lines0 = sum(1 for _ in open(apl0_csv))
c_lines0 = sum(1 for _ in open(ips0_csv))
@@ -522,12 +546,15 @@ def edge_gen(edges):
c_lines1 = sum(1 for _ in open(ips1_csv))
assert (
- passed and l_lines0 == 1e4 and c_lines0 == 1e4 and l_lines1 == 4 and
- c_lines1 == 4
+ passed
+ and l_lines0 == 1e4
+ and c_lines0 == 1e4
+ and l_lines1 == 4
+ and c_lines1 == 4
)
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_edge_data_from_list(bw, _get_edges):
edges = _get_edges
@@ -535,8 +562,8 @@ def test_write_edge_data_from_list(bw, _get_edges):
tmp_path = bw.outdir
- apl_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- ips_csv = os.path.join(tmp_path, 'Is_Mutated_In-part000.csv')
+ apl_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ ips_csv = os.path.join(tmp_path, "Is_Mutated_In-part000.csv")
with open(apl_csv) as f:
l = f.read()
@@ -547,7 +574,7 @@ def test_write_edge_data_from_list(bw, _get_edges):
assert "p0;" in l
assert "prel0;" in l
assert "'T253';" in l
- assert "4;" in l
+ assert "4;" in l
assert "p1;" in l
assert "PERTURBED_IN_DISEASE" in l
assert "\n" in l
@@ -559,9 +586,10 @@ def test_write_edge_data_from_list(bw, _get_edges):
assert "p1;" in c
assert "Is_Mutated_In" in c
assert "m1;" in c
- assert '\n' in c
-
-@pytest.mark.parametrize('l', [4], scope='module')
+ assert "\n" in c
+
+
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_edge_id_optional(bw, _get_edges):
edges = _get_edges
@@ -580,8 +608,8 @@ def test_write_edge_id_optional(bw, _get_edges):
tmp_path = bw.outdir
- pert_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- phos_csv = os.path.join(tmp_path, 'Phosphorylation-part000.csv')
+ pert_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ phos_csv = os.path.join(tmp_path, "Phosphorylation-part000.csv")
with open(pert_csv) as f:
pertf = f.read()
@@ -591,8 +619,8 @@ def test_write_edge_id_optional(bw, _get_edges):
assert "prel0;" in pertf
assert "phos1;" not in phosf
- pert_header = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-header.csv')
- phos_header = os.path.join(tmp_path, 'Phosphorylation-header.csv')
+ pert_header = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-header.csv")
+ phos_header = os.path.join(tmp_path, "Phosphorylation-header.csv")
with open(pert_header) as f:
perth = f.read()
@@ -602,20 +630,21 @@ def test_write_edge_id_optional(bw, _get_edges):
assert "id;" in perth
assert "id;" not in phosh
+
def test_write_edge_data_from_list_no_props(bw):
le = 4
edges = []
for i in range(le):
e1 = BioCypherEdge(
- source_id=f'p{i}',
- target_id=f'p{i + 1}',
- relationship_label='PERTURBED_IN_DISEASE',
+ source_id=f"p{i}",
+ target_id=f"p{i + 1}",
+ relationship_label="PERTURBED_IN_DISEASE",
)
edges.append(e1)
e2 = BioCypherEdge(
- source_id=f'm{i}',
- target_id=f'p{i + 1}',
- relationship_label='Is_Mutated_In',
+ source_id=f"m{i}",
+ target_id=f"p{i + 1}",
+ relationship_label="Is_Mutated_In",
)
edges.append(e2)
@@ -623,8 +652,8 @@ def test_write_edge_data_from_list_no_props(bw):
tmp_path = bw.outdir
- ptl_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- pts_csv = os.path.join(tmp_path, 'Is_Mutated_In-part000.csv')
+ ptl_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ pts_csv = os.path.join(tmp_path, "Is_Mutated_In-part000.csv")
with open(ptl_csv) as f:
l = f.read()
@@ -632,23 +661,23 @@ def test_write_edge_data_from_list_no_props(bw):
c = f.read()
assert passed
- assert 'p0;' in l
- assert 'p1;' in l
- assert 'PERTURBED_IN_DISEASE' in l
- assert 'p1;' in l
- assert 'p2;' in l
- assert 'PERTURBED_IN_DISEASE' in l
- assert '\n' in l
- assert 'm0;' in c
- assert 'p1;' in c
- assert 'Is_Mutated_In' in c
- assert 'm1;' in c
- assert 'p2;' in c
- assert 'Is_Mutated_In' in c
- assert '\n' in c
-
-
-@pytest.mark.parametrize('l', [8], scope='module')
+ assert "p0;" in l
+ assert "p1;" in l
+ assert "PERTURBED_IN_DISEASE" in l
+ assert "p1;" in l
+ assert "p2;" in l
+ assert "PERTURBED_IN_DISEASE" in l
+ assert "\n" in l
+ assert "m0;" in c
+ assert "p1;" in c
+ assert "Is_Mutated_In" in c
+ assert "m1;" in c
+ assert "p2;" in c
+ assert "Is_Mutated_In" in c
+ assert "\n" in c
+
+
+@pytest.mark.parametrize("l", [8], scope="module")
def test_write_edge_data_headers_import_call(bw, _get_nodes, _get_edges):
edges = _get_edges
@@ -673,9 +702,9 @@ def edge_gen2(edges):
tmp_path = bw.outdir
- ptl_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-header.csv')
- pts_csv = os.path.join(tmp_path, 'Is_Mutated_In-header.csv')
- call_csv = os.path.join(tmp_path, 'neo4j-admin-import-call.sh')
+ ptl_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-header.csv")
+ pts_csv = os.path.join(tmp_path, "Is_Mutated_In-header.csv")
+ call_csv = os.path.join(tmp_path, "neo4j-admin-import-call.sh")
with open(ptl_csv) as f:
l = f.read()
@@ -684,19 +713,19 @@ def edge_gen2(edges):
with open(call_csv) as f:
call = f.read()
- assert l == ':START_ID;id;residue;level:long;:END_ID;:TYPE'
- assert c == ':START_ID;id;site;confidence:long;:END_ID;:TYPE'
+ assert l == ":START_ID;id;residue;level:long;:END_ID;:TYPE"
+ assert c == ":START_ID;id;site;confidence:long;:END_ID;:TYPE"
- assert 'bin/neo4j-admin import' in call
- assert '--database=neo4j' in call
+ assert "bin/neo4j-admin import" in call
+ assert "--database=neo4j" in call
assert '--delimiter=";"' in call
- assert '--force=true' in call
+ assert "--force=true" in call
assert '--nodes="' in call
- assert 'PERTURBED_IN_DISEASE' in call
- assert 'Is_Mutated_In' in call
+ assert "PERTURBED_IN_DISEASE" in call
+ assert "Is_Mutated_In" in call
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_duplicate_edges(bw, _get_edges):
edges = _get_edges
edges.append(edges[0])
@@ -705,8 +734,8 @@ def test_write_duplicate_edges(bw, _get_edges):
tmp_path = bw.outdir
- ptl_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-part000.csv')
- pts_csv = os.path.join(tmp_path, 'Is_Mutated_In-part000.csv')
+ ptl_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-part000.csv")
+ pts_csv = os.path.join(tmp_path, "Is_Mutated_In-part000.csv")
l = sum(1 for _ in open(ptl_csv))
c = sum(1 for _ in open(pts_csv))
@@ -724,9 +753,9 @@ def gen(lis):
tmp_path = bw.outdir
- iso_csv = os.path.join(tmp_path, 'IS_SOURCE_OF-part000.csv')
- ito_csv = os.path.join(tmp_path, 'IS_TARGET_OF-part000.csv')
- pmi_csv = os.path.join(tmp_path, 'PostTranslationalInteraction-part000.csv')
+ iso_csv = os.path.join(tmp_path, "IS_SOURCE_OF-part000.csv")
+ ito_csv = os.path.join(tmp_path, "IS_TARGET_OF-part000.csv")
+ pmi_csv = os.path.join(tmp_path, "PostTranslationalInteraction-part000.csv")
with open(iso_csv) as f:
s = f.read()
@@ -736,39 +765,39 @@ def gen(lis):
p = f.read()
assert passed
- assert 'i1;' in s
- assert 'p1;' in s
- assert 'IS_SOURCE_OF' in s
- assert '\n' in s
- assert 'i0;' in t
- assert 'p2;' in t
- assert 'IS_TARGET_OF' in t
- assert '\n' in t
+ assert "i1;" in s
+ assert "p1;" in s
+ assert "IS_SOURCE_OF" in s
+ assert "\n" in s
+ assert "i0;" in t
+ assert "p2;" in t
+ assert "IS_TARGET_OF" in t
+ assert "\n" in t
assert "i1;True;-1;'i1';'id'" in p
- assert 'Association' in p
- assert '\n' in p
+ assert "Association" in p
+ assert "\n" in p
def _get_rel_as_nodes(l):
rels = []
for i in range(l):
n = BioCypherNode(
- node_id=f'i{i+1}',
- node_label='post translational interaction',
+ node_id=f"i{i+1}",
+ node_label="post translational interaction",
properties={
- 'directed': True,
- 'effect': -1,
+ "directed": True,
+ "effect": -1,
},
)
e1 = BioCypherEdge(
- source_id=f'i{i+1}',
- target_id=f'p{i+1}',
- relationship_label='IS_SOURCE_OF',
+ source_id=f"i{i+1}",
+ target_id=f"p{i+1}",
+ relationship_label="IS_SOURCE_OF",
)
e2 = BioCypherEdge(
- source_id=f'i{i}',
- target_id=f'p{i + 2}',
- relationship_label='IS_TARGET_OF',
+ source_id=f"i{i}",
+ target_id=f"p{i + 2}",
+ relationship_label="IS_TARGET_OF",
)
rels.append(BioCypherRelAsNode(n, e1, e2))
return rels
@@ -790,7 +819,7 @@ def gen2(lis):
tmp_path = bw.outdir
- iso_csv = os.path.join(tmp_path, 'IS_SOURCE_OF-part001.csv')
+ iso_csv = os.path.join(tmp_path, "IS_SOURCE_OF-part001.csv")
assert passed1 and passed2 and isfile(iso_csv)
@@ -800,25 +829,25 @@ def test_write_mixed_edges(bw):
le = 4
for i in range(le):
e3 = BioCypherEdge(
- source_id=f'p{i+1}',
- target_id=f'p{i+1}',
- relationship_label='PERTURBED_IN_DISEASE',
+ source_id=f"p{i+1}",
+ target_id=f"p{i+1}",
+ relationship_label="PERTURBED_IN_DISEASE",
)
mixed.append(e3)
n = BioCypherNode(
- f'i{i+1}',
- 'post translational interaction',
+ f"i{i+1}",
+ "post translational interaction",
)
e1 = BioCypherEdge(
- source_id=f'i{i+1}',
- target_id=f'p{i+1}',
- relationship_label='IS_SOURCE_OF',
+ source_id=f"i{i+1}",
+ target_id=f"p{i+1}",
+ relationship_label="IS_SOURCE_OF",
)
e2 = BioCypherEdge(
- source_id=f'i{i}',
- target_id=f'p{i+2}',
- relationship_label='IS_TARGET_OF',
+ source_id=f"i{i}",
+ target_id=f"p{i+2}",
+ relationship_label="IS_TARGET_OF",
)
mixed.append(BioCypherRelAsNode(n, e1, e2))
@@ -829,24 +858,26 @@ def gen(lis):
tmp_path = bw.outdir
- pmi_csv = os.path.join(tmp_path, 'PostTranslationalInteraction-header.csv')
- iso_csv = os.path.join(tmp_path, 'IS_SOURCE_OF-header.csv')
- ito_csv = os.path.join(tmp_path, 'IS_TARGET_OF-header.csv')
- ipt_csv = os.path.join(tmp_path, 'PERTURBED_IN_DISEASE-header.csv')
+ pmi_csv = os.path.join(tmp_path, "PostTranslationalInteraction-header.csv")
+ iso_csv = os.path.join(tmp_path, "IS_SOURCE_OF-header.csv")
+ ito_csv = os.path.join(tmp_path, "IS_TARGET_OF-header.csv")
+ ipt_csv = os.path.join(tmp_path, "PERTURBED_IN_DISEASE-header.csv")
assert (
- passed and os.path.isfile(pmi_csv) and os.path.isfile(iso_csv) and
- os.path.isfile(ito_csv) and os.path.isfile(ipt_csv)
+ passed
+ and os.path.isfile(pmi_csv)
+ and os.path.isfile(iso_csv)
+ and os.path.isfile(ito_csv)
+ and os.path.isfile(ipt_csv)
)
def test_duplicate_id(bw):
-
nodes = []
tmp_path = bw.outdir
- csv = os.path.join(tmp_path, 'Protein-part000.csv')
+ csv = os.path.join(tmp_path, "Protein-part000.csv")
# remove csv file in path
if os.path.exists(csv):
@@ -855,13 +886,13 @@ def test_duplicate_id(bw):
# four proteins, four miRNAs
for _ in range(2):
bnp = BioCypherNode(
- node_id=f'p1',
- node_label='protein',
+ node_id=f"p1",
+ node_label="protein",
properties={
- 'name': 'StringProperty1',
- 'score': 4.32,
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2'],
+ "name": "StringProperty1",
+ "score": 4.32,
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
},
)
nodes.append(bnp)
@@ -874,12 +905,11 @@ def test_duplicate_id(bw):
def test_write_synonym(bw):
-
nodes = []
tmp_path = bw.outdir
- csv = os.path.join(tmp_path, 'Complex-part000.csv')
+ csv = os.path.join(tmp_path, "Complex-part000.csv")
# remove csv file in path
if os.path.exists(csv):
@@ -887,12 +917,12 @@ def test_write_synonym(bw):
# four proteins, four miRNAs
for _ in range(4):
bnp = BioCypherNode(
- node_id=f'p{_+1}',
- node_label='complex',
+ node_id=f"p{_+1}",
+ node_label="complex",
properties={
- 'name': 'StringProperty1',
- 'score': 4.32,
- 'taxon': 9606,
+ "name": "StringProperty1",
+ "score": 4.32,
+ "taxon": 9606,
},
)
nodes.append(bnp)
@@ -904,21 +934,21 @@ def test_write_synonym(bw):
assert passed and os.path.exists(csv)
assert "p1;'StringProperty1';4.32;9606;'p1';'id'" in comp
- assert 'Complex' in comp
+ assert "Complex" in comp
-def test_write_strict(bw_strict):
+def test_write_strict(bw_strict):
n1 = BioCypherNode(
- node_id='p1',
- node_label='protein',
+ node_id="p1",
+ node_label="protein",
properties={
- 'name': 'StringProperty1',
- 'score': 4.32,
- 'taxon': 9606,
- 'genes': ['gene1', 'gene2'],
- 'source': 'source1',
- 'version': 'version1',
- 'licence': 'licence1',
+ "name": "StringProperty1",
+ "score": 4.32,
+ "taxon": 9606,
+ "genes": ["gene1", "gene2"],
+ "source": "source1",
+ "version": "version1",
+ "licence": "licence1",
},
)
@@ -928,30 +958,32 @@ def test_write_strict(bw_strict):
tmp_path = bw_strict.outdir
- csv = os.path.join(tmp_path, 'Protein-part000.csv')
+ csv = os.path.join(tmp_path, "Protein-part000.csv")
with open(csv) as f:
prot = f.read()
- assert "p1;'StringProperty1';4.32;9606;'gene1|gene2';'p1';'id';'source1';'version1';'licence1'" in prot
- assert 'BiologicalEntity' in prot
+ assert (
+ "p1;'StringProperty1';4.32;9606;'gene1|gene2';'p1';'id';'source1';'version1';'licence1'"
+ in prot
+ )
+ assert "BiologicalEntity" in prot
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_tab_delimiter(bw_tab, _get_nodes):
-
passed = bw_tab.write_nodes(_get_nodes)
assert passed
tmp_path = bw_tab.outdir
- header = os.path.join(tmp_path, 'Protein-header.csv')
+ header = os.path.join(tmp_path, "Protein-header.csv")
with open(header) as f:
prot = f.read()
- assert '\t' in prot
+ assert "\t" in prot
call = bw_tab._construct_import_call()
diff --git a/test/test_write_postgres.py b/test/test_write_postgres.py
index 6ffd1930..cbad24c7 100644
--- a/test/test_write_postgres.py
+++ b/test/test_write_postgres.py
@@ -4,7 +4,7 @@
import pytest
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_node_data_from_gen_comma_postgresql(
bw_comma_postgresql, _get_nodes
):
@@ -20,8 +20,8 @@ def node_gen(nodes):
tmp_path = bw_comma_postgresql.outdir
- p_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
+ p_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
with open(p_csv) as f:
pr = f.read()
@@ -30,15 +30,15 @@ def node_gen(nodes):
mi = f.read()
assert 'p1,"StringProperty1",4.0,9606' in pr
- assert 'uniprot' in pr
- assert 'BiologicalEntity' in pr
- assert 'Polypeptide' in pr
- assert 'Protein' in pr
+ assert "uniprot" in pr
+ assert "BiologicalEntity" in pr
+ assert "Polypeptide" in pr
+ assert "Protein" in pr
assert 'm1,"StringProperty1",9606,"m1","mirbase"' in mi
- assert 'ChemicalEntity' in mi
+ assert "ChemicalEntity" in mi
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_write_node_data_from_gen_tab_postgresql(bw_tab_postgresql, _get_nodes):
nodes = _get_nodes
@@ -49,8 +49,8 @@ def node_gen(nodes):
tmp_path = bw_tab_postgresql.outdir
- p_csv = os.path.join(tmp_path, 'Protein-part000.csv')
- m_csv = os.path.join(tmp_path, 'MicroRNA-part000.csv')
+ p_csv = os.path.join(tmp_path, "Protein-part000.csv")
+ m_csv = os.path.join(tmp_path, "MicroRNA-part000.csv")
with open(p_csv) as f:
pr = f.read()
@@ -61,19 +61,25 @@ def node_gen(nodes):
assert passed
assert 'p1\t"StringProperty1"\t4.0\t9606\t' in pr
assert '\t"uniprot"\t' in pr
- assert 'BiologicalEntity' in pr
- assert 'Polypeptide' in pr
- assert 'Protein' in pr
+ assert "BiologicalEntity" in pr
+ assert "Polypeptide" in pr
+ assert "Protein" in pr
assert 'm1\t"StringProperty1"\t9606\t"m1"\t"mirbase"' in mi
- assert 'ChemicalEntity' in mi
+ assert "ChemicalEntity" in mi
@pytest.mark.requires_postgresql
-@pytest.mark.parametrize('l', [4], scope='module')
+@pytest.mark.parametrize("l", [4], scope="module")
def test_database_import_node_data_from_gen_comma_postgresql(
bw_comma_postgresql, _get_nodes, create_database_postgres
):
- dbname, user, port, password, create_database_success = create_database_postgres
+ (
+ dbname,
+ user,
+ port,
+ password,
+ create_database_success,
+ ) = create_database_postgres
assert create_database_success
nodes = _get_nodes
@@ -88,8 +94,10 @@ def node_gen(nodes):
# verify that all files have been created
assert set(os.listdir(tmp_path)) == set(
[
- 'protein-create_table.sql', 'Protein-part000.csv',
- 'microrna-create_table.sql', 'MicroRNA-part000.csv'
+ "protein-create_table.sql",
+ "Protein-part000.csv",
+ "microrna-create_table.sql",
+ "MicroRNA-part000.csv",
]
)
@@ -97,7 +105,8 @@ def node_gen(nodes):
# verify that import call has been created
import_scripts = [
name
- for name in os.listdir(tmp_path) if name.endswith('-import-call.sh')
+ for name in os.listdir(tmp_path)
+ if name.endswith("-import-call.sh")
]
assert len(import_scripts) == 1
@@ -112,32 +121,38 @@ def node_gen(nodes):
assert result.returncode == 0
# check data in the databases
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM protein;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM protein;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 4 entires in table
- assert '4' in result.stdout.decode()
+ assert "4" in result.stdout.decode()
# check data in the databases
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM microrna;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM microrna;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 4 entires in table
- assert '4' in result.stdout.decode()
+ assert "4" in result.stdout.decode()
@pytest.mark.requires_postgresql
-@pytest.mark.parametrize('l', [5], scope='module')
+@pytest.mark.parametrize("l", [5], scope="module")
def test_database_import_node_data_from_gen_tab_postgresql(
bw_tab_postgresql, _get_nodes, create_database_postgres
):
- dbname, user, port, password, create_database_success = create_database_postgres
+ (
+ dbname,
+ user,
+ port,
+ password,
+ create_database_success,
+ ) = create_database_postgres
assert create_database_success
nodes = _get_nodes
@@ -152,8 +167,10 @@ def node_gen(nodes):
# verify that all files have been created
assert set(os.listdir(tmp_path)) == set(
[
- 'protein-create_table.sql', 'Protein-part000.csv',
- 'microrna-create_table.sql', 'MicroRNA-part000.csv'
+ "protein-create_table.sql",
+ "Protein-part000.csv",
+ "microrna-create_table.sql",
+ "MicroRNA-part000.csv",
]
)
@@ -161,7 +178,8 @@ def node_gen(nodes):
# verify that import call has been created
import_scripts = [
name
- for name in os.listdir(tmp_path) if name.endswith('-import-call.sh')
+ for name in os.listdir(tmp_path)
+ if name.endswith("-import-call.sh")
]
assert len(import_scripts) == 1
@@ -176,32 +194,38 @@ def node_gen(nodes):
assert result.returncode == 0
# check data in the databases
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM protein;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM protein;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 5 entires in table
- assert '5' in result.stdout.decode()
+ assert "5" in result.stdout.decode()
# check data in the databases
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM microrna;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM microrna;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 5 entires in table
- assert '5' in result.stdout.decode()
+ assert "5" in result.stdout.decode()
@pytest.mark.requires_postgresql
-@pytest.mark.parametrize('l', [8], scope='module')
+@pytest.mark.parametrize("l", [8], scope="module")
def test_database_import_edge_data_from_gen_comma_postgresql(
bw_comma_postgresql, _get_nodes, create_database_postgres, _get_edges
):
- dbname, user, port, password, create_database_success = create_database_postgres
+ (
+ dbname,
+ user,
+ port,
+ password,
+ create_database_success,
+ ) = create_database_postgres
assert create_database_success
edges = _get_edges
@@ -227,7 +251,8 @@ def edge_gen2(edges):
# verify that import call has been created
import_scripts = [
name
- for name in os.listdir(tmp_path) if name.endswith('-import-call.sh')
+ for name in os.listdir(tmp_path)
+ if name.endswith("-import-call.sh")
]
assert len(import_scripts) == 1
@@ -237,9 +262,9 @@ def edge_gen2(edges):
commands = f.readlines()
assert len(commands) > 0
- assert str(tmp_path) in '\n'.join(commands)
- assert 'protein-create_table.sql' in '\n'.join(commands)
- assert '--user' in '\n'.join(commands)
+ assert str(tmp_path) in "\n".join(commands)
+ assert "protein-create_table.sql" in "\n".join(commands)
+ assert "--user" in "\n".join(commands)
for command in commands:
result = subprocess.run(command, shell=True)
@@ -247,31 +272,37 @@ def edge_gen2(edges):
assert result.returncode == 0
# check data in the databases
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM is_mutated_in;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM is_mutated_in;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 2 entries in table
- assert '8' in result.stdout.decode()
+ assert "8" in result.stdout.decode()
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM perturbed_in_disease;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM perturbed_in_disease;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 2 entries in table
- assert '8' in result.stdout.decode()
+ assert "8" in result.stdout.decode()
@pytest.mark.requires_postgresql
-@pytest.mark.parametrize('l', [8], scope='module')
+@pytest.mark.parametrize("l", [8], scope="module")
def test_database_import_edge_data_from_gen_tab_postgresql(
bw_tab_postgresql, _get_nodes, create_database_postgres, _get_edges
):
- dbname, user, port, password, create_database_success = create_database_postgres
+ (
+ dbname,
+ user,
+ port,
+ password,
+ create_database_success,
+ ) = create_database_postgres
assert create_database_success
edges = _get_edges
@@ -297,7 +328,8 @@ def edge_gen2(edges):
# verify that import call has been created
import_scripts = [
name
- for name in os.listdir(tmp_path) if name.endswith('-import-call.sh')
+ for name in os.listdir(tmp_path)
+ if name.endswith("-import-call.sh")
]
assert len(import_scripts) == 1
@@ -307,29 +339,29 @@ def edge_gen2(edges):
commands = f.readlines()
assert len(commands) > 1
- assert str(tmp_path) in '\n'.join(commands)
- assert 'protein-create_table.sql' in '\n'.join(commands)
- assert '--user' in '\n'.join(commands)
+ assert str(tmp_path) in "\n".join(commands)
+ assert "protein-create_table.sql" in "\n".join(commands)
+ assert "--user" in "\n".join(commands)
for command in commands:
result = subprocess.run(command, shell=True)
assert result.returncode == 0
# check data in the databases
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM is_mutated_in;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM is_mutated_in;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 2 entires in table
- assert '8' in result.stdout.decode()
+ assert "8" in result.stdout.decode()
- command = f'PGPASSWORD={password} psql -c \'SELECT COUNT(*) FROM perturbed_in_disease;\' --dbname {dbname} --port {port} --user {user}'
+ command = f"PGPASSWORD={password} psql -c 'SELECT COUNT(*) FROM perturbed_in_disease;' --dbname {dbname} --port {port} --user {user}"
result = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# subprocess success
assert result.returncode == 0
# 2 entires in table
- assert '8' in result.stdout.decode()
+ assert "8" in result.stdout.decode()
diff --git a/tutorial/01__basic_import.py b/tutorial/01__basic_import.py
index 7bfdb0ec..861f7abe 100644
--- a/tutorial/01__basic_import.py
+++ b/tutorial/01__basic_import.py
@@ -17,8 +17,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/01_biocypher_config.yaml',
- schema_config_path='tutorial/01_schema_config.yaml',
+ biocypher_config_path="tutorial/01_biocypher_config.yaml",
+ schema_config_path="tutorial/01_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -27,5 +27,5 @@ def node_generator():
bc.write_import_call()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/01__basic_import_pandas.py b/tutorial/01__basic_import_pandas.py
index b0b26a4a..e12c4cc9 100644
--- a/tutorial/01__basic_import_pandas.py
+++ b/tutorial/01__basic_import_pandas.py
@@ -17,12 +17,13 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/01_biocypher_config.yaml',
- schema_config_path='tutorial/01_schema_config.yaml',
+ biocypher_config_path="tutorial/01_biocypher_config.yaml",
+ schema_config_path="tutorial/01_schema_config.yaml",
)
# Run the import
bc.add(node_generator())
bc.to_df()
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()
diff --git a/tutorial/02__merge.py b/tutorial/02__merge.py
index 245be65f..7141f133 100644
--- a/tutorial/02__merge.py
+++ b/tutorial/02__merge.py
@@ -5,10 +5,12 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[Protein() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -22,8 +24,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/02_biocypher_config.yaml',
- schema_config_path='tutorial/02_schema_config.yaml',
+ biocypher_config_path="tutorial/02_biocypher_config.yaml",
+ schema_config_path="tutorial/02_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -32,5 +34,5 @@ def node_generator():
bc.write_import_call()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/02__merge_pandas.py b/tutorial/02__merge_pandas.py
index a3583bb6..6f9aa126 100644
--- a/tutorial/02__merge_pandas.py
+++ b/tutorial/02__merge_pandas.py
@@ -5,10 +5,12 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[Protein() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -22,8 +24,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/02_biocypher_config.yaml',
- schema_config_path='tutorial/02_schema_config.yaml',
+ biocypher_config_path="tutorial/02_biocypher_config.yaml",
+ schema_config_path="tutorial/02_schema_config.yaml",
)
# Run the import
bc.add(node_generator())
@@ -31,5 +33,5 @@ def node_generator():
print(bc.to_df())
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/03__implicit_subclass.py b/tutorial/03__implicit_subclass.py
index 7ba277be..1c43269f 100644
--- a/tutorial/03__implicit_subclass.py
+++ b/tutorial/03__implicit_subclass.py
@@ -5,10 +5,12 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[Protein() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -22,8 +24,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/03_biocypher_config.yaml',
- schema_config_path='tutorial/03_schema_config.yaml',
+ biocypher_config_path="tutorial/03_biocypher_config.yaml",
+ schema_config_path="tutorial/03_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -32,5 +34,5 @@ def node_generator():
bc.write_import_call()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/03__implicit_subclass_pandas.py b/tutorial/03__implicit_subclass_pandas.py
index 0f7be14c..709984a1 100644
--- a/tutorial/03__implicit_subclass_pandas.py
+++ b/tutorial/03__implicit_subclass_pandas.py
@@ -5,10 +5,12 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[Protein() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -22,8 +24,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/03_biocypher_config.yaml',
- schema_config_path='tutorial/03_schema_config.yaml',
+ biocypher_config_path="tutorial/03_biocypher_config.yaml",
+ schema_config_path="tutorial/03_schema_config.yaml",
)
# Run the import
bc.add(node_generator())
@@ -33,5 +35,5 @@ def node_generator():
print(df)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/04__properties.py b/tutorial/04__properties.py
index 24a268be..9c3e683c 100644
--- a/tutorial/04__properties.py
+++ b/tutorial/04__properties.py
@@ -5,10 +5,12 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -22,8 +24,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/04_biocypher_config.yaml',
- schema_config_path='tutorial/04_schema_config.yaml',
+ biocypher_config_path="tutorial/04_biocypher_config.yaml",
+ schema_config_path="tutorial/04_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -32,5 +34,5 @@ def node_generator():
bc.write_import_call()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/04__properties_pandas.py b/tutorial/04__properties_pandas.py
index 052f1428..fc39e294 100644
--- a/tutorial/04__properties_pandas.py
+++ b/tutorial/04__properties_pandas.py
@@ -5,10 +5,12 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -22,8 +24,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/04_biocypher_config.yaml',
- schema_config_path='tutorial/04_schema_config.yaml',
+ biocypher_config_path="tutorial/04_biocypher_config.yaml",
+ schema_config_path="tutorial/04_schema_config.yaml",
)
# Run the import
bc.add(node_generator())
@@ -33,5 +35,5 @@ def node_generator():
print(df)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/05__property_inheritance.py b/tutorial/05__property_inheritance.py
index 4baadbf0..2f7577a9 100644
--- a/tutorial/05__property_inheritance.py
+++ b/tutorial/05__property_inheritance.py
@@ -9,11 +9,13 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[RandomPropertyProteinIsoform() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -27,8 +29,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/05_biocypher_config.yaml',
- schema_config_path='tutorial/05_schema_config.yaml',
+ biocypher_config_path="tutorial/05_biocypher_config.yaml",
+ schema_config_path="tutorial/05_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -37,5 +39,5 @@ def node_generator():
bc.write_import_call()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/05__property_inheritance_pandas.py b/tutorial/05__property_inheritance_pandas.py
index 3e11f074..b0ccf2ec 100644
--- a/tutorial/05__property_inheritance_pandas.py
+++ b/tutorial/05__property_inheritance_pandas.py
@@ -9,11 +9,13 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[RandomPropertyProteinIsoform() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -27,8 +29,8 @@ def node_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/05_biocypher_config.yaml',
- schema_config_path='tutorial/05_schema_config.yaml',
+ biocypher_config_path="tutorial/05_biocypher_config.yaml",
+ schema_config_path="tutorial/05_schema_config.yaml",
)
# Run the import
bc.add(node_generator())
@@ -38,5 +40,5 @@ def node_generator():
print(df)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/06__relationships.py b/tutorial/06__relationships.py
index 364377bc..768746ad 100644
--- a/tutorial/06__relationships.py
+++ b/tutorial/06__relationships.py
@@ -10,11 +10,13 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[RandomPropertyProteinIsoform() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -45,8 +47,8 @@ def edge_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/06_biocypher_config.yaml',
- schema_config_path='tutorial/06_schema_config.yaml',
+ biocypher_config_path="tutorial/06_biocypher_config.yaml",
+ schema_config_path="tutorial/06_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -59,5 +61,5 @@ def edge_generator():
bc.show_ontology_structure()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/06__relationships_pandas.py b/tutorial/06__relationships_pandas.py
index b839fef1..b67a5b4d 100644
--- a/tutorial/06__relationships_pandas.py
+++ b/tutorial/06__relationships_pandas.py
@@ -10,11 +10,13 @@
def main():
# Setup: create a list of proteins to be imported
proteins = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[RandomPropertyProteinIsoform() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -45,16 +47,17 @@ def edge_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/06_biocypher_config.yaml',
- schema_config_path='tutorial/06_schema_config_pandas.yaml',
+ biocypher_config_path="tutorial/06_biocypher_config.yaml",
+ schema_config_path="tutorial/06_schema_config_pandas.yaml",
)
# Run the import
bc.add(node_generator())
bc.add(edge_generator())
-
+
for name, df in bc.to_df().items():
print(name)
print(df)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()
diff --git a/tutorial/07__synonyms.py b/tutorial/07__synonyms.py
index a6fa7cff..c7af403e 100644
--- a/tutorial/07__synonyms.py
+++ b/tutorial/07__synonyms.py
@@ -11,12 +11,14 @@
def main():
# Setup: create a list of proteins to be imported
proteins_complexes = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[RandomPropertyProteinIsoform() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
[Complex() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -47,8 +49,8 @@ def edge_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/07_biocypher_config.yaml',
- schema_config_path='tutorial/07_schema_config.yaml',
+ biocypher_config_path="tutorial/07_biocypher_config.yaml",
+ schema_config_path="tutorial/07_schema_config.yaml",
)
# Run the import
bc.write_nodes(node_generator())
@@ -61,5 +63,5 @@ def edge_generator():
bc.summary()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/07__synonyms_pandas.py b/tutorial/07__synonyms_pandas.py
index 9b694514..95ce0a13 100644
--- a/tutorial/07__synonyms_pandas.py
+++ b/tutorial/07__synonyms_pandas.py
@@ -11,12 +11,14 @@
def main():
# Setup: create a list of proteins to be imported
proteins_complexes = [
- p for sublist in zip(
+ p
+ for sublist in zip(
[RandomPropertyProtein() for _ in range(10)],
[RandomPropertyProteinIsoform() for _ in range(10)],
[EntrezProtein() for _ in range(10)],
[Complex() for _ in range(10)],
- ) for p in sublist
+ )
+ for p in sublist
]
# Extract id, label, and property dictionary
@@ -47,8 +49,8 @@ def edge_generator():
# Create BioCypher driver
bc = BioCypher(
- biocypher_config_path='tutorial/07_biocypher_config.yaml',
- schema_config_path='tutorial/07_schema_config_pandas.yaml',
+ biocypher_config_path="tutorial/07_biocypher_config.yaml",
+ schema_config_path="tutorial/07_schema_config_pandas.yaml",
)
# Run the import
bc.add(node_generator())
@@ -62,5 +64,5 @@ def edge_generator():
bc.summary()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/tutorial/data_generator.py b/tutorial/data_generator.py
index 7f805252..27f18b48 100644
--- a/tutorial/data_generator.py
+++ b/tutorial/data_generator.py
@@ -6,14 +6,14 @@
import string
__all__ = [
- 'EntrezProtein',
- 'Interaction',
- 'InteractionGenerator',
- 'Node',
- 'Protein',
- 'ProteinProteinInteraction',
- 'RandomPropertyProtein',
- 'RandomPropertyProteinIsoform',
+ "EntrezProtein",
+ "Interaction",
+ "InteractionGenerator",
+ "Node",
+ "Protein",
+ "ProteinProteinInteraction",
+ "RandomPropertyProtein",
+ "RandomPropertyProteinIsoform",
]
@@ -21,6 +21,7 @@ class Node:
"""
Base class for nodes.
"""
+
def __init__(self):
self.id = None
self.label = None
@@ -49,9 +50,10 @@ class Protein(Node):
"""
Generates instances of proteins.
"""
+
def __init__(self):
self.id = self._generate_id()
- self.label = 'uniprot_protein'
+ self.label = "uniprot_protein"
self.properties = self._generate_properties()
def _generate_id(self):
@@ -62,7 +64,7 @@ def _generate_id(self):
nums = [random.choice(string.digits) for _ in range(3)]
# join alternating between lets and nums
- return ''.join([x for y in zip(lets, nums) for x in y])
+ return "".join([x for y in zip(lets, nums) for x in y])
def _generate_properties(self):
properties = {}
@@ -72,17 +74,17 @@ def _generate_properties(self):
# random int between 50 and 250
l = random.randint(50, 250)
- properties['sequence'] = ''.join(
- [random.choice('ACDEFGHIKLMNPQRSTVWY') for _ in range(l)],
+ properties["sequence"] = "".join(
+ [random.choice("ACDEFGHIKLMNPQRSTVWY") for _ in range(l)],
)
## random description
- properties['description'] = ' '.join(
+ properties["description"] = " ".join(
[random.choice(string.ascii_lowercase) for _ in range(10)],
)
## taxon
- properties['taxon'] = '9606'
+ properties["taxon"] = "9606"
return properties
@@ -91,9 +93,10 @@ class Complex(Node):
"""
Generates instances of complexes.
"""
+
def __init__(self):
self.id = self._generate_id()
- self.label = 'complex'
+ self.label = "complex"
self.properties = self._generate_properties()
def _generate_id(self):
@@ -109,12 +112,12 @@ def _generate_properties(self):
properties = {}
## random description
- properties['description'] = ' '.join(
+ properties["description"] = " ".join(
[random.choice(string.ascii_lowercase) for _ in range(10)],
)
## taxon
- properties['taxon'] = '9606'
+ properties["taxon"] = "9606"
return properties
@@ -123,6 +126,7 @@ class RandomPropertyProtein(Protein):
"""
Generates instances of proteins with random properties.
"""
+
def _generate_properties(self):
properties = {}
@@ -131,21 +135,21 @@ def _generate_properties(self):
# random int between 50 and 250
l = random.randint(50, 250)
- properties['sequence'] = ''.join(
- [random.choice('ACDEFGHIKLMNPQRSTVWY') for _ in range(l)],
+ properties["sequence"] = "".join(
+ [random.choice("ACDEFGHIKLMNPQRSTVWY") for _ in range(l)],
)
## random description
- properties['description'] = ' '.join(
+ properties["description"] = " ".join(
[random.choice(string.ascii_lowercase) for _ in range(10)],
)
## random taxon
- properties['taxon'] = str(random.randint(0, 10000))
+ properties["taxon"] = str(random.randint(0, 10000))
## randomly add 'mass'
if random.random() > 0.5:
- properties['mass'] = random.randint(0, 10000)
+ properties["mass"] = random.randint(0, 10000)
return properties
@@ -154,19 +158,21 @@ class RandomPropertyProteinIsoform(RandomPropertyProtein):
"""
Generates instances of protein isoforms with random properties.
"""
+
def __init__(self):
super().__init__()
- self.label = 'uniprot_isoform'
+ self.label = "uniprot_isoform"
class EntrezProtein(Protein):
"""
Generates instances of proteins with Entrez IDs.
"""
+
def __init__(self):
super().__init__()
self.id = self._generate_id()
- self.label = 'entrez_protein'
+ self.label = "entrez_protein"
def _generate_id(self):
"""
@@ -179,6 +185,7 @@ class Interaction:
"""
Base class for interactions.
"""
+
def __init__(self):
self.id = None
self.source_id = None
@@ -222,12 +229,13 @@ class ProteinProteinInteraction(Interaction):
Simulates interactions between proteins given a source and target protein
IDs. Occasionally generates an ID for the interaction itself.
"""
+
def __init__(self, source, target):
super().__init__()
self.id = self._generate_id()
self.source_id = source
self.target_id = target
- self.label = 'interacts_with'
+ self.label = "interacts_with"
self.properties = self._generate_properties()
def _generate_id(self):
@@ -237,18 +245,18 @@ def _generate_id(self):
if random.random() > 0.5:
return None
else:
- return 'intact' + str(random.randint(1, 1000000))
+ return "intact" + str(random.randint(1, 1000000))
def _generate_properties(self):
properties = {}
## randomly add 'source'
if random.random() > 0.5:
- properties['source'] = random.choice(['intact', 'signor'])
+ properties["source"] = random.choice(["intact", "signor"])
## randomly add 'method'
if random.random() > 0.5:
- properties['method'] = ' '.join(
+ properties["method"] = " ".join(
[random.choice(string.ascii_lowercase) for _ in range(10)],
)
@@ -260,6 +268,7 @@ class InteractionGenerator:
Simulates interactions given a list of potential interactors based on an
interaction probability or probability distribution.
"""
+
def __init__(self, interactors: list, interaction_probability: float):
self.interactors = interactors
self.interaction_probability = interaction_probability