# Download ATACseq Level2 (bam) & Level3 (peaks) data from a Galaxy history

In [1]:
import sys
import os
from glob import glob as GlobToFilePaths
from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy.histories import HistoryClient
from bioblend.galaxy.tools import ToolClient
from bioblend.galaxy.workflows import WorkflowClient
from bioblend.galaxy.datasets import DatasetClient
from bioblend.galaxy.libraries import LibraryClient

## I. Connect to Galaxy Instance

#### You will need a `.env` file with your API_KEY

In [2]:
import environ

ROOT_DIR = environ.Path('.')

# Load operating system environment variables and then prepare to use them
env = environ.Env()

# Operating System Environment variables have precedence over variables defined in the .env file,
# that is to say variables from the .env files will only be used if not defined
# as environment variables.
env_file = str(ROOT_DIR.path('.env'))
print('Loading : {}'.format(env_file))
env.read_env(env_file)
print('The .env file has been loaded.')

Loading : /Users/alex/Documents/galaxy-neurolincs/bioblend/.env
The .env file has been loaded.


In [3]:
GALAXY_URL = "https://answer.csbi.mit.edu"
API_KEY = env("API_KEY")
API_KEY == None

False

In [4]:
galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)

## II. Get All ATACseq Histories

In [5]:
published_histories = [history for history in galaxyInstance.histories.get_histories() if history['published']]
published_histories

[{'annotation': None,
  'deleted': False,
  'id': '0b900c60f93c0654',
  'model_class': 'History',
  'name': 'NeuroLINCS iMNs: (level1 > level3) RNAseq pipeline history',
  'published': True,
  'purged': False,
  'tags': [],
  'url': '/api/histories/0b900c60f93c0654'},
 {'annotation': None,
  'deleted': False,
  'id': '6b7e1d14aa0742ec',
  'model_class': 'History',
  'name': 'NeuroLINCS iPSC: (level1 > level3) RNAseq pipeline history',
  'published': True,
  'purged': False,
  'tags': [],
  'url': '/api/histories/6b7e1d14aa0742ec'},
 {'annotation': None,
  'deleted': False,
  'id': '1e3a7ada6d556982',
  'model_class': 'History',
  'name': 'Peak_Calling_Test_iMNs',
  'published': True,
  'purged': False,
  'tags': [],
  'url': '/api/histories/1e3a7ada6d556982'},
 {'annotation': None,
  'deleted': False,
  'id': 'd0bfe935d0f5258d',
  'model_class': 'History',
  'name': 'NeuroLINCS Exp 2: iMNs - ATAC pipeline',
  'published': True,
  'purged': False,
  'tags': [],
  'url': '/api/histories/

In [6]:
ATACseq_history_IDs = ['d0bfe935d0f5258d', '36ddb788a0f14eb3', 'f0f309c56aff0025', '52e496b945151ee8', 'f7bb1edd6b95db62']

In [8]:
published_ATACseq_histories = [h['name'] for h in published_histories if h['id'] in ATACseq_history_IDs]
published_ATACseq_histories

['NeuroLINCS Exp 2: iMNs - ATAC pipeline',
 'NeuroLINCS Exp 3: d18 diMNs - ATAC pipeline',
 'NeuroLINCS iMNs Johns Hopkins - ATAC pipeline',
 'd32 diMNs Johns Hopkins - ATAC pipeline',
 'NeuroLINCS Exp 4: d32 diMNs - ATAC pipeline']

## III. Choose ATAC history, find relevant dataset IDs

In [13]:
history_id = ATACseq_history_IDs[0]
history_contents = galaxyInstance.histories.show_history(history_id, contents=True)
history_contents = [(history_item.get('dataset_id'), history_item.get('name')) for history_item in history_contents]
history_contents

[('4f7cc1bb8a9d4ca5', 'A-042-00iCTR-iMNs_merged.fastq'),
 ('facef090b66760d3', 'A-042-25iCTR-iMNs_merged.fastq'),
 ('caf2f9e90e891d99', 'A-042-28iALS-iMNs_merged.fastq'),
 ('d3aa84ec8de72a88', 'A-042-29iALS-iMNs_merged.fastq'),
 ('753ad6be8086242f', 'A-042-30iALS-iMNs_merged.fastq'),
 ('29abca4d77da1498', 'A-042-32iSMA-iMNs_merged.fastq'),
 ('1cfde0ac77382e47', 'A-042-52iALS-iMNs_merged.fastq'),
 ('346a1b5d64cf73f2', 'A-042-77iSMA-iMNs_merged.fastq'),
 ('852e556f71dcab41', 'A-042-83iCTR-iMNs_merged.fastq'),
 ('c8c1111eb1801d20', 'A-042-83iSMA-iMNs_merged.fastq'),
 ('47ccc7c6a9e6bfad', 'A-042-00iCTR-iMNs_merged.fastq'),
 ('9c2f6b6c1e8ba742', 'A-042-25iCTR-iMNs_merged.fastq'),
 ('15d4615b874e6e06', 'A-042-28iALS-iMNs_merged.fastq'),
 ('c2be911056a03e76', 'A-042-29iALS-iMNs_merged.fastq'),
 ('59452c8c6453038f', 'A-042-30iALS-iMNs_merged.fastq'),
 ('6c5ceca5232566bb', 'A-042-32iSMA-iMNs_merged.fastq'),
 ('e0a91154092cc382', 'A-042-52iALS-iMNs_merged.fastq'),
 ('32be888db814288a', 'A-042-77

In [27]:
desired_dataset_ids = [history_item for history_item in history_contents if history_item[1][:12] == 'Sort on data' or history_item[1][-14:] == '(narrow Peaks)']
desired_dataset_ids

[('a9fd212685f6022b', 'Sort on data 63'),
 ('055c74093e851952', 'Sort on data 64'),
 ('209778aefcd144c0', 'Sort on data 65'),
 ('9d9ddb54d4721c8c', 'Sort on data 66'),
 ('d14ebfc5853c403d', 'Sort on data 67'),
 ('c71394df20d61ef7', 'Sort on data 68'),
 ('5e83903d11b4dc3e', 'Sort on data 69'),
 ('a334f9576e3b2235', 'Sort on data 70'),
 ('3c1244bd0e96458d', 'Sort on data 71'),
 ('bfcec2067b3505d8', 'Sort on data 72'),
 ('ea3adf04ae4fa3cd', 'Sort on data 73'),
 ('6621d3c323040a23', 'MACS2 callpeak on data 24 and data 75 (narrow Peaks)'),
 ('fc39724f289d6f0e', 'MACS2 callpeak on data 24 and data 76 (narrow Peaks)'),
 ('c5c14ec17f785f9e', 'MACS2 callpeak on data 24 and data 77 (narrow Peaks)'),
 ('00921075858e2c4f', 'MACS2 callpeak on data 24 and data 78 (narrow Peaks)'),
 ('2e751b854e2da6ae', 'MACS2 callpeak on data 24 and data 79 (narrow Peaks)'),
 ('ec1c8a683f62fd23', 'MACS2 callpeak on data 24 and data 80 (narrow Peaks)'),
 ('aac596740088c04a', 'MACS2 callpeak on data 24 and data 81 (na

## IV. Download

In [None]:
[galaxyInstance.datasets.download_dataset(dataset_id[0]) for dataset_id in desired_dataset_ids[:-1]]