# Jupyter to Galaxy for nextflow with data from MGnify

* need to upload nextflow directory with the files and dependencies
* trigger the Galaxy workflow
* retrieve results

In [None]:
import sys
import os
import io

if 'google.colab' in str(get_ipython()):
    # clone the momics-demos repository to use the utils module from there
    # TODO: eventually utils from momics will be used for that
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        print(f"Repository cloned")
    except OSError as e:
        print(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

else:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from utils import init_setup, get_notebook_environment
init_setup()

# Initialize the environment variable
notebook_environment = 'unknown'
# Determine the notebook environment
env = get_notebook_environment()
print(f"Environment: {env}")

## XML for galaxy

**XML taken from [here](https://galaxyproject.org/blog/2022-08-15-making-nextflow-work-with-galaxy-at-cfsan-fda/)**
```xml
<requirements>
    <requirement type="package" version="22.04">nextflow</requirement>
    <requirement type="package">graphviz</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
    nextflow run $__tool_directory__/nf-pipelines/my_pipeline.nf --input /path/to/input
]]></command>
```

**nf-core/rnaseq**
```bash
nextflow run nf-core/rnaseq \
    --input <SAMPLESHEET> \
    --outdir <OUTDIR> \
    --gtf <GTF> \
    --fasta <GENOME FASTA> \
    -profile <docker/singularity/.../institute>
```

**with the input sample `.csv` sheet as follows**
```csv
sample,fastq_1,fastq_2,strandedness
CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto
CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto
CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto
```


In [None]:
import os
import sys
import json
from datetime import datetime
from platform import python_version
import logging

from jsonapi_client import Session as APISession
from jsonapi_client import Modifier
import requests

# Import
import bioblend.galaxy as g  # BioBlend is a Python library, wrapping the functionality of Galaxy and CloudMan APIs
# import boto3
import pandas as pd
from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy.datasets import DatasetClient

from momics.galaxy.blue_cloud import BCGalaxy
# instead of the jupyter magic, you can also use
from dotenv import load_dotenv
load_dotenv()

In [3]:
# Read your secrets from the .env file
# To see your API key login -> click 'user' (top right) -> click 'preferences' -> click 'Manage API Key' (menu to the left) -> click the icon to 'copy key'
GALAXY_URL = os.getenv("GALAXY_EARTH_URL")  # alternatively os.environ.get('GALAXY_URL'), "https://earth-system.usegalaxy.eu/"
GALAXY_KEY = os.getenv("GALAXY_EARTH_KEY")  # alternatively os.environ.get('GALAXY_KEY')

## Downloads of fastq files from ena are under heavy construction

In [None]:
r = requests.get(f"https://www.ebi.ac.uk/ena/portal/api/results?dataPortal=ena")
content = r.content.decode('utf-8')
print(content)

In [None]:
r = requests.get(f"https://www.ebi.ac.uk/ena/portal/api/searchFields?result=read_run")
content = r.content.decode('utf-8')
print(content)

In [None]:
# this actually works but it is not waht I want
search = "ERS1153743"
r = requests.get(f"https://www.ebi.ac.uk/ena/portal/api/search?result=read_run&{search}?dataPortal=ena")
content = r.content.decode('utf-8')
print(content)

In [None]:
r = requests.get(f"https://www.ebi.ac.uk/ena/browser/api/fasta/{search}")
content = r.content.decode('utf-8')
print(content)

## Loading local FASTQ files