From 28679f05940a79687bc01431019e3dbb8c8e4240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Ma=C3=B1as?= <andresmanasmanas@gmail.com>
Date: Fri, 13 May 2022 13:20:48 +0200
Subject: [PATCH] docs: improved documentation for dev/local environments

---
 LOCAL_ENVS.md      | 87 ++++++++++++++++++++++++++++++++++++++++++++++
 README.md          |  7 ++--
 docs/index.md      |  1 +
 docs/local_envs.md |  2 ++
 4 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 LOCAL_ENVS.md
 create mode 100755 docs/local_envs.md

diff --git a/LOCAL_ENVS.md b/LOCAL_ENVS.md
new file mode 100644
index 0000000..75f8cbd
--- /dev/null
+++ b/LOCAL_ENVS.md
@@ -0,0 +1,87 @@
+# Local environments
+
+When you are working on your local environment, you expect all dependencies of DNARecords to be properly installed on 
+your environment.
+
+**It is highly recommendable to install DNARecords on a new and empty environment**, whatever environment
+manager you prefer.
+
+Here you can see a few ways to set up a local/dev environment.
+
+## Test script
+
+To test the installation is correct, just copy this script into a file named `dnarecords-test.py`.
+
+
+```python
+import dnarecords as dr
+
+
+hl = dr.helper.DNARecordsUtils.init_hail()
+hl.utils.get_1kg('/tmp/1kg')
+mt = hl.read_matrix_table('/tmp/1kg/1kg.mt').head(100,100)
+mt = mt.annotate_entries(dosage=hl.pl_dosage(mt.PL))
+
+path = '/tmp/dnarecords'
+writer = dr.writer.DNARecordsWriter(mt.dosage)
+writer.write(path, sparse=True, write_mode='overwrite', gzip=True,
+             sample_wise=True, variant_wise=True,
+             tfrecord_format=True, parquet_format=True)
+
+spark_reader = dr.reader.DNASparkReader(path)
+cols = ['key','chr1_indices','chr1_values','chr1_dense_shape']
+spark_reader.sample_wise_dnarecords().select(cols).show(1)
+spark_reader.sample_wise_dnaparquet().select(cols).show(1)
+spark_reader.variant_wise_dnarecords().show(1)
+spark_reader.variant_wise_dnaparquet().show(1)
+
+tensor_reader = dr.reader.DNARecordsReader(path)
+print(next(iter(tensor_reader.sample_wise_dataset())))
+print(next(iter(tensor_reader.variant_wise_dataset())))
+```
+
+## Conda
+
+```bash
+$ conda create --prefix ./dna-conda pip
+$ conda activate ./dna-conda
+$ pip install dnarecords
+$ python dnarecords-test.py
+```
+
+Or, if you prefer, you can test it from a jupyter-lab notebook:
+
+```bash
+$ pip install jupyterlab
+$ jupyter-lab
+```
+
+And now, in any cell:
+
+```python
+%run dnarecords-test.py
+```
+
+
+## venv
+
+```bash
+$ python3 -m venv dna-venv
+$ source dna-venv/bin/activate
+$ pip install dnarecords
+$ python dnarecords-test.py
+```
+
+You can run it with a jupyter-lab notebook as well.
+
+## poetry
+
+```bash
+$ git clone https://github.com/amanas/dnarecords.git dna-poetry
+$ cd dna-poetry
+$ poetry shell
+$ pip install dnarecords
+$ python ../dnarecords-test.py
+```
+
+You can run it with a jupyter-lab notebook as well.
diff --git a/README.md b/README.md
index 150e89c..885b9db 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
 ![example workflow](https://github.com/amanas/dnarecords/actions/workflows/ci-cd.yml/badge.svg)
 [![codecov](https://codecov.io/gh/amanas/dnarecords/branch/main/graph/badge.svg)](https://codecov.io/gh/amanas/dnarecords)
 ![pylint Score](https://mperlet.github.io/pybadge/badges/9.97.svg)
+[![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
 
 **Genomics data ML ready.**
 
@@ -29,7 +30,9 @@ For that reason, we recommend following these installation tips.
 $ pip install dnarecords
 ```
 
-### **On a Hail cluster or summiting a job to it**
+For further details (or any trouble), review [Local environments](LOCAL_ENVS.md) section.
+
+### **On a Hail cluster or submitting a job to it**
 
 You will already have Pyspark installed and will not intend to install Tensorflow. 
 
@@ -42,7 +45,7 @@ $ /opt/conda/miniconda3/bin/python -m pip install dnarecords --no-deps
 ```
 *Note: assuming Hail python executable is /opt/conda/miniconda3/bin/python* 
 
-### **On a Tensorflow environment or summiting a job to it**
+### **On a Tensorflow environment or submitting a job to it**
 
 You will already have Tensorflow installed and will not intend to install Pyspark. 
 
diff --git a/docs/index.md b/docs/index.md
index 6fcdc0e..67703e0 100755
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,6 +6,7 @@
 :hidden:
 
 example.ipynb
+local_envs.md
 changelog.md
 contributing.md
 conduct.md
diff --git a/docs/local_envs.md b/docs/local_envs.md
new file mode 100755
index 0000000..9a26fbc
--- /dev/null
+++ b/docs/local_envs.md
@@ -0,0 +1,2 @@
+```{include} ../LOCAL_ENVS.md
+```
\ No newline at end of file