In [11]:
from ftplib import FTP
import subprocess

# Use FPT to get the full URLs for all the samples
Use the `FTP` from fptlib library to get the full URLs for all the samples

In [12]:
def get_full_paths (host, user, password):
    """
    Returns the path of the FTP server.

    Parameters: given by Novogenes
    host (str): host name
    user(str): user name
    password (str): passward

    Returns:
    str: list of folders of from FTP path
    """
    
    directory = f"ftp://{user}:{password}@{host}:21/01.RawData"
    ftp = FTP(host)
    ftp.login(user = user, passwd = password)  # omit or replace with your username and password as needed
    ftp.cwd('01.RawData')  # change to the target directory
    files = ftp.nlst() 
    full_paths = [f"{directory}/{file}" for file in files]
    ftp.quit()
    
    return (full_paths);

## Use `subprocess`
Use `subprocess` to excute shell comment to invoke wget_novogene.sh. Tested on Sep. 24, 2024.

In [14]:
def check_directory(dest_path):
    if not os.path.isdir(dest_path):
        print(f"Directory does not exist: {dest_path}")
        return False;  # Return False to indicate directory doesn't exist
    return True;  # Return True if the directory exists

def check_file_exists(file_path):
    if not os.path.isfile(file_path):
        print(f"File does not exists: {file_path}")
        return False;
    return True;

def get_last_level(ftp_url):
    parsed_url = urlparse(ftp_url)
    path = parsed_url.path
    # Split the path into its components and get the last two parts
    last_levels = '/'.join(path.strip('/').split('/')[-1:])
    return last_levels;


def summit_subprocess_slurm(slurm_shell, full_paths, dest_path, 
                            mail_user,
                            account='cpu-Sarthy_lab-sponsored'):
    """
    Returns the result of the PBS job sumission

    Parameters:
    slurm_shell (str): path to the slurm shell script
    full_paths (list str): a list of full URL of Novogene FTP folders of all samples
    dest_path (str): destination path
    account (str): sasquatch sponsored account, i.e., cpu-<assoc lab>-sponsored

    Returns:
    str: list of subprocess result
    """
    
    result =[];
    
    # sanity check dest_path and slurm_shell
    if not check_directory(dest_path):
        return;
    
    if not check_file_exists(slurm_shell):
        return;
    
    for path in full_paths:
        last_level = get_last_level(path)
        # create a subdirectory of the dest_path
        sub_dest = os.path.join(dest_path, last_level)
        # exec comment
        cmt = f"sbatch --account {account} --mail-user {mail_user} {slurm_shell} {path} {sub_dest}"
        msg = subprocess.run(cmt, shell=True, capture_output=True, text=True)
        result.append(msg);
        
    # Handling the result 
    for l in result:
        if l.returncode == 0:
            print("Command executed successfully!")
            print("Output:\n", l.stdout)
        else:
            print("Error:", result.stderr)
    return result;
    

# Actions
- `get_full_path(host, user, password)`
- `summit_subprocess_slurm(slurm_shell, full_paths, dest_path, account)`

## `get_full_paths()`

In [None]:

# define parameters
host = ''
user = ''
password = ''

# get full URLs for each samples
# use get_full_paths to get FPT URLs
full_paths = get_full_paths(host, user, password)
filtered_paths = [p for p in full_paths if 'Undetermined' not in p]

for item in filtered_paths:
    print(item)

## `summit_subprocess_slurm`

In [26]:
# define parameters
main_dir = '/data/hps/assoc/private/sarthy_lab/NGS/FASTQs'
dest_path = os.path.join(main_dir, 'testing_GBe11')
cluster_account = 'cpu-Sarthy_lab-sponsored'
mail_user = 'chao-jen.wong@seattlechildrens.org'
slurm_shell = os.path.join(main_dir, 
                           '/data/hps/assoc/private/sarthy_lab/user/cwo11/projects/wget_novogene_using_slurm/wget_novogene.sh')

# call summit_subprocess_slurm
resutls = summit_subprocess_slurm(slurm_shell = slurm_shell, 
                                  full_paths  = filtered_paths, 
                                  dest_path   = dest_path, 
                                  mail_user   = mail_user,
                                  account     = cluster_account)

[0;31mSignature:[0m [0mftp[0m[0;34m.[0m[0mlogin[0m[0;34m([0m[0muser[0m[0;34m=[0m[0;34m''[0m[0;34m,[0m [0mpasswd[0m[0;34m=[0m[0;34m''[0m[0;34m,[0m [0macct[0m[0;34m=[0m[0;34m''[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Login, default anonymous.
[0;31mFile:[0m      /opt/python/3.12.5/lib/python3.12/ftplib.py
[0;31mType:[0m      method