In [4]:
import requests
import pandas as pd
import json
import subprocess
import os
import urllib.request

In [12]:
# Helper functions

def delete_file(file_path):
    try:
        # Check if file exists
        if os.path.exists(file_path):
            os.remove(file_path)
            print(f"File {file_path} has been deleted.")
        else:
            print(f"File {file_path} does not exist.")
    except Exception as e:
        print(f"Error occurred while trying to delete the file: {e}")

def download_ftp_file(ftp_url, local_directory):
    # Parse the filename from the FTP URL
    filename = os.path.basename(ftp_url)
    
    # Ensure the local directory exists
    if not os.path.exists(local_directory):
        os.makedirs(local_directory)
    
    # Define the local file path
    local_file_path = os.path.join(local_directory, filename)
    
    # Check if the file already exists
    if os.path.exists(local_file_path):
        print(f"File {local_file_path} already exists. Skipping download.")
    else:
        # Download the file
        print(f"Downloading {filename} to {local_file_path}...")
        urllib.request.urlretrieve(ftp_url, local_file_path)
        print(f"Downloaded {filename} to {local_file_path}")



Downloading SRR8052713_1.fastq.gz to ./downloaded_files/SRR8052713_1.fastq.gz...
Downloaded SRR8052713_1.fastq.gz to ./downloaded_files/SRR8052713_1.fastq.gz


In [22]:
import docker

def is_docker_running():
    try:
        client = docker.from_env()
        client.ping()
        print("Docker daemon is running.")
        return True
    except docker.errors.APIError as e:
        print(f"Failed to connect to Docker daemon: {e}")
        return False

# Check if Docker is running
if is_docker_running():
    # Run your Docker-related code here
    pass
else:
    print("Docker is not running or not reachable.")


Docker daemon is running.


In [19]:
bio_project_id = "PRJNA496323"
r = requests.get("https://www.ebi.ac.uk/ena/portal/api/filereport?result=read_run&accession={bio_project_id}&limit=1000&format=json&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created,bam_ftp,bam_bytes,bam_md5".format(bio_project_id = bio_project_id))
response_dict = json.loads(r.text)

ftp_links = []
# get the run accessions
for run in response_dict:
    try:
        ftp_links.append(run['fastq_ftp'])
    except:
        pass
ftp_links

['ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/009/SRR8052709/SRR8052709_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/009/SRR8052709/SRR8052709_2.fastq.gz',
 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/003/SRR8052713/SRR8052713_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/003/SRR8052713/SRR8052713_2.fastq.gz',
 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/007/SRR8052717/SRR8052717_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/007/SRR8052717/SRR8052717_2.fastq.gz',
 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/000/SRR8052720/SRR8052720_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/000/SRR8052720/SRR8052720_2.fastq.gz',
 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/006/SRR8052726/SRR8052726_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/006/SRR8052726/SRR8052726_2.fastq.gz',
 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/000/SRR8052730/SRR8052730_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/000/SRR8052730/SRR8052730_2.fastq.gz',
 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR805/001/SRR8052731/SRR8052731_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fa

In [25]:
local_directory = './downloaded_files'
for link_pair in ftp_links[5:6]:
    files = link_pair.split(";")
    for file in files:

        # some files don't have the ftp:// prefix....
        if "ftp://" not in file:
            file = "ftp://" + file
        
        download_ftp_file(file, local_directory)


Downloading SRR8052730_1.fastq.gz to ./downloaded_files/SRR8052730_1.fastq.gz...


In [None]:
# Download Salmon Docker Image
try:
    # Execute docker pull command
    subprocess.run(['docker', 'pull', 'combinelab/salmon:latest'], check=True)
    print("Docker image 'combinelab/salmon:latest' pulled successfully.")
except subprocess.CalledProcessError as e:
    print(f"Failed to pull Docker image: {e}")


In [None]:
# Download Human Reference Genome
url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_39/gencode.v39.transcripts.fa.gz"
filename = "gencode.v39.transcripts.fa.gz"

try:
    # Download the file
    urllib.request.urlretrieve(url, filename)
    print(f"File '{filename}' downloaded successfully.")
except Exception as e:
    print(f"Failed to download file: {e}")


In [17]:
# Build Index

# Get the current working directory
cwd = os.getcwd()

# Define the Docker command
command = [
    "docker", "run", "--rm",
    "-v", f"{cwd}:/workdir",
    "-w", "/workdir",
    "combinelab/salmon:latest",
    "salmon", "index",
    "-t", "gencode.v39.transcripts.fa",
    "-i", "gencode_index_2"
]

# Execute the command
result = subprocess.run(command, capture_output=True, text=True)

# Print the output and error (if any)
print("Output:", result.stdout)
print("Error:", result.stderr)
print("Return Code:", result.returncode)


KeyboardInterrupt: 