From 28679f05940a79687bc01431019e3dbb8c8e4240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Ma=C3=B1as?= Date: Fri, 13 May 2022 13:20:48 +0200 Subject: [PATCH] docs: improved documentation for dev/local environments --- LOCAL_ENVS.md | 87 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 7 ++-- docs/index.md | 1 + docs/local_envs.md | 2 ++ 4 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 LOCAL_ENVS.md create mode 100755 docs/local_envs.md diff --git a/LOCAL_ENVS.md b/LOCAL_ENVS.md new file mode 100644 index 0000000..75f8cbd --- /dev/null +++ b/LOCAL_ENVS.md @@ -0,0 +1,87 @@ +# Local environments + +When you are working on your local environment, you expect all dependencies of DNARecords to be properly installed on +your environment. + +**It is highly recommendable to install DNARecords on a new and empty environment**, whatever environment +manager you prefer. + +Here you can see a few ways to set up a local/dev environment. + +## Test script + +To test the installation is correct, just copy this script into a file named `dnarecords-test.py`. + + +```python +import dnarecords as dr + + +hl = dr.helper.DNARecordsUtils.init_hail() +hl.utils.get_1kg('/tmp/1kg') +mt = hl.read_matrix_table('/tmp/1kg/1kg.mt').head(100,100) +mt = mt.annotate_entries(dosage=hl.pl_dosage(mt.PL)) + +path = '/tmp/dnarecords' +writer = dr.writer.DNARecordsWriter(mt.dosage) +writer.write(path, sparse=True, write_mode='overwrite', gzip=True, + sample_wise=True, variant_wise=True, + tfrecord_format=True, parquet_format=True) + +spark_reader = dr.reader.DNASparkReader(path) +cols = ['key','chr1_indices','chr1_values','chr1_dense_shape'] +spark_reader.sample_wise_dnarecords().select(cols).show(1) +spark_reader.sample_wise_dnaparquet().select(cols).show(1) +spark_reader.variant_wise_dnarecords().show(1) +spark_reader.variant_wise_dnaparquet().show(1) + +tensor_reader = dr.reader.DNARecordsReader(path) +print(next(iter(tensor_reader.sample_wise_dataset()))) +print(next(iter(tensor_reader.variant_wise_dataset()))) +``` + +## Conda + +```bash +$ conda create --prefix ./dna-conda pip +$ conda activate ./dna-conda +$ pip install dnarecords +$ python dnarecords-test.py +``` + +Or, if you prefer, you can test it from a jupyter-lab notebook: + +```bash +$ pip install jupyterlab +$ jupyter-lab +``` + +And now, in any cell: + +```python +%run dnarecords-test.py +``` + + +## venv + +```bash +$ python3 -m venv dna-venv +$ source dna-venv/bin/activate +$ pip install dnarecords +$ python dnarecords-test.py +``` + +You can run it with a jupyter-lab notebook as well. + +## poetry + +```bash +$ git clone https://github.com/amanas/dnarecords.git dna-poetry +$ cd dna-poetry +$ poetry shell +$ pip install dnarecords +$ python ../dnarecords-test.py +``` + +You can run it with a jupyter-lab notebook as well. diff --git a/README.md b/README.md index 150e89c..885b9db 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ ![example workflow](https://github.com/amanas/dnarecords/actions/workflows/ci-cd.yml/badge.svg) [![codecov](https://codecov.io/gh/amanas/dnarecords/branch/main/graph/badge.svg)](https://codecov.io/gh/amanas/dnarecords) ![pylint Score](https://mperlet.github.io/pybadge/badges/9.97.svg) +[![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release) **Genomics data ML ready.** @@ -29,7 +30,9 @@ For that reason, we recommend following these installation tips. $ pip install dnarecords ``` -### **On a Hail cluster or summiting a job to it** +For further details (or any trouble), review [Local environments](LOCAL_ENVS.md) section. + +### **On a Hail cluster or submitting a job to it** You will already have Pyspark installed and will not intend to install Tensorflow. @@ -42,7 +45,7 @@ $ /opt/conda/miniconda3/bin/python -m pip install dnarecords --no-deps ``` *Note: assuming Hail python executable is /opt/conda/miniconda3/bin/python* -### **On a Tensorflow environment or summiting a job to it** +### **On a Tensorflow environment or submitting a job to it** You will already have Tensorflow installed and will not intend to install Pyspark. diff --git a/docs/index.md b/docs/index.md index 6fcdc0e..67703e0 100755 --- a/docs/index.md +++ b/docs/index.md @@ -6,6 +6,7 @@ :hidden: example.ipynb +local_envs.md changelog.md contributing.md conduct.md diff --git a/docs/local_envs.md b/docs/local_envs.md new file mode 100755 index 0000000..9a26fbc --- /dev/null +++ b/docs/local_envs.md @@ -0,0 +1,2 @@ +```{include} ../LOCAL_ENVS.md +``` \ No newline at end of file