In [2]:
import pandas as pd
import os
import shutil
from tqdm.notebook import tqdm

In [11]:
import ftplib
import ssl


class ImplicitFTP_TLS(ftplib.FTP_TLS):
    """FTP_TLS subclass that automatically wraps sockets in SSL to support implicit FTPS."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._sock = None

    @property
    def sock(self):
        """Return the socket."""
        return self._sock

    @sock.setter
    def sock(self, value):
        """When modifying the socket, ensure that it is ssl wrapped."""
        if value is not None and not isinstance(value, ssl.SSLSocket):
            value = self.context.wrap_socket(value)
        self._sock = value

In [38]:
def download_biosino(data_links_df, output_dir:str = './'):
    """
    param:data_links_df - pd.DataFrame - should be downloaded from biosino database via Export Data Links
    """
    import pandas as pd
    from tqdm.notebook import tqdm
    import os
    
    if (isinstance(data_links_df, pd.DataFrame)) & (isinstance(output_dir, str)):
    #     Connect to fms.biosino.org with FTP over TLS protocol
        try:
            ftp_client = ImplicitFTP_TLS()
            ftp_client.connect(host='fms.biosino.org', port=2122)
            ftp_client.login(user='avanesyanbogdan@gmail.com', passwd='x$3L!ma$wFfn2m')
            ftp_client.prot_p()
            print('Connected to fms.biosino.org')
        except:
            print('An error occured while connecting to fms.biosino.org')
    #     Obtain run IDs
        run_ids = data_links_df.groupby('run_id').agg({'fileName': list}).to_dict()['fileName']
        id_list = tuple(run_ids.keys())
    #     Download files in output directory
        for run_id in tqdm(id_list, desc = 'Downloading data'):
            local_dir = os.path.join(output_dir, run_id)
            if os.path.isdir(local_dir):
                pass
            else:
                os.makedirs(local_dir)
            for file in run_ids[run_id]:
                local_file = os.path.join(local_dir, file)
                if os.path.isfile(local_file):
                    continue
                else:
                    path_on_server = f'/Public/byrun/{run_id[0:5]}/{run_id[0:7]}/{run_id}/{file}'
                    with open(local_file, 'wb') as local_file:
                        try:
                            print(f'Downloading {run_id}:{file}')
                            ftp_client.retrbinary(f'RETR {path_on_server}' , local_file.write)
                            print(f'{run_id}:{file} successfully downloaded')
                        except Exception as e:
                            if e == 'KeyboardInterrupt':
                                break
                            else:
                                print(f'An error occured while downloading {path_on_server}, trying next file')
                                print(e)
                                continue
    else:
        print(f'{data_links_df} should be a pandas DataFrame')