In [51]:
# Import required librairies
import os
from os import listdir, stat, makedirs, mkdir, walk, remove, pardir, rename
from os.path import isdir, isfile, join, splitext, getmtime, basename, normpath, exists, expanduser, split, dirname, getsize, abspath
from blackfynn import Blackfynn
import json 
from datetime import datetime, timezone
import pandas as pd
import pathlib
import shutil
import gevent
import urllib.request
import copy
import re
import platform
import time
from datetime import datetime, timezone
import subprocess
bf_recognized_file_extensions = ['.cram','.jp2','.jpx','.lsm','.ndpi','.nifti','.oib','.oif','.roi','.rtf','.swc','.abf','.acq','.adicht','.adidat',\
                                 '.aedt','.afni','.ai','.avi','.bam','.bash','.bcl','.bcl.gz','.bin','.brik','.brukertiff.gz','.continuous','.cpp','.csv',\
                                 '.curv','.cxls','.czi','.data','.dcm','.df','.dicom','.doc','.docx','.e','.edf','.eps','.events','.fasta','.fastq','.fcs',\
                                 '.feather','.fig','.gif','.h4','.h5','.hdf4','.hdf5','.hdr','.he2','.he5','.head','.hoc','.htm','.html','.ibw','.img','.ims',\
                                 '.ipynb','.jpeg','.jpg','.js','.json','.lay','.lh','.lif','.m','.mat','.md','.mef','.mefd.gz','.mex','.mgf','.mgh','.mgh.gz',\
                                 '.mgz','.mnc','.moberg.gz','.mod','.mov','.mp4','.mph','.mpj','.mtw','.ncs','.nd2','.nev','.nex','.nex5','.nf3','.nii','.nii.gz',\
                                 '.ns1','.ns2','.ns3','.ns4','.ns5','.ns6','.nwb','.ogg','.ogv','.ome.btf','.ome.tif','.ome.tif2','.ome.tif8','.ome.tiff','.ome.xml',\
                                 '.openephys','.pdf','.pgf','.png','.ppt','.pptx','.ps','.pul','.py','.r','.raw','.rdata','.rh','.rhd','.sh','.sldasm','.slddrw',\
                                 '.smr','.spikes','.svg','.svs','.tab','.tar','.tar.gz','.tcsh','.tdm','.tdms','.text','.tif','.tiff','.tsv','.txt','.vcf','.webm',\
                                 '.xlsx','.xml','.yaml','.yml','.zip','.zsh']
# Define constants
userpath = expanduser("~")

### Internal functions
def TZLOCAL():
    return datetime.now(timezone.utc).astimezone().tzinfo

In [52]:
def bf_get_current_user_permission(bf, myds):

    """
    Function to get the permission of currently logged in user for a selected dataset

    Args:
        bf: logged Blackfynn acccount (dict)
        myds: selected Blackfynn dataset (dict)
    Output:
        permission of current user (string)
    """

    try:
        selected_dataset_id = myds.id
        user_role = bf._api._get('/datasets/' + str(selected_dataset_id) + '/role')['role']

        return user_role

    except Exception as e:
        raise e

In [53]:
def create_high_level_manifest_files_existing_bf(soda_json_structure, bf, ds):
    """
    Function to create manifest files for each high-level SPARC folder.

    Args:
        soda_json_structure: soda dict with information about the dataset to be generated/modified
    Action:
        manifest_files_structure: dict including the local path of the manifest files
    """
    try:

        def recursive_manifest_info_import_bf(my_item, my_relative_path, dict_folder_manifest, manifest_df):

            for item in my_item.items:
                if item.type == "Collection":
                    folder_name = item.name
                    relative_path = generate_relative_path(my_relative_path, folder_name)
                    dict_folder_manifest = recursive_manifest_info_import_bf(item, relative_path, dict_folder_manifest, manifest_df)
                else:
                    if item.name != 'manifest':
                        file_id = item.id
                        file_details = bf._api._get('/packages/' + str(file_id) + '/view')
                        file_name = file_details[0]["content"]["name"]
                        file_extension = splitext(file_name)[1]
                        file_name_with_extension = splitext(item.name)[0] + file_extension
                        relative_path = generate_relative_path(my_relative_path, file_name_with_extension)
                        dict_folder_manifest["filename"].append(relative_path)

                        #file type
                        file_extension = splitext(file_name)[1]
                        dict_folder_manifest["file type"].append(file_extension)

                        #timestamp, description, Additional Metadata
                        if not manifest_df.empty:
                            if relative_path in manifest_df["filename"].values:
                                timestamp = manifest_df[manifest_df["filename"] == relative_path]["timestamp"].iloc[0]
                                description = manifest_df[manifest_df["filename"] == relative_path]["description"].iloc[0]
                                additional_metadata = manifest_df[manifest_df["filename"] == relative_path]["Additional Metadata"].iloc[0]
                            else:
                                timestamp = ""
                                description = ""
                                additional_metadata= ""
                            dict_folder_manifest["timestamp"].append(timestamp)
                            dict_folder_manifest["description"].append(description)
                            dict_folder_manifest["Additional Metadata"].append(additional_metadata)
                        else:
                            dict_folder_manifest["timestamp"].append("")
                            dict_folder_manifest["description"].append("")
                            dict_folder_manifest["Additional Metadata"].append("")

            return dict_folder_manifest


        # Merge existing folders
        def recursive_manifest_builder_existing_bf(my_folder, my_bf_folder, my_bf_folder_exists, my_relative_path, dict_folder_manifest):

            if "folders" in my_folder.keys():
                if my_bf_folder_exists:
                    my_bf_existing_folders, my_bf_existing_folders_name = bf_get_existing_folders_details(my_bf_folder)
                else:
                    my_bf_existing_folders = []
                    my_bf_existing_folders_name = []

                for folder_key, folder in my_folder["folders"].items():
                    relative_path = generate_relative_path(my_relative_path, folder_key)
                    if folder_key in my_bf_existing_folders_name:
                        bf_folder_index = my_bf_existing_folders_name.index(folder_key)
                        bf_folder = my_bf_existing_folders[bf_folder_index]
                        bf_folder_exists = True
                    else:
                        bf_folder = ''
                        bf_folder_exists = False
                    dict_folder_manifest = recursive_manifest_builder_existing_bf(folder, bf_folder, bf_folder_exists, relative_path, dict_folder_manifest)

            if "files" in my_folder.keys():
                if my_bf_folder_exists:
                    my_bf_existing_files, my_bf_existing_files_name, my_bf_existing_files_name_with_extension = bf_get_existing_files_details(my_bf_folder)
                else:
                    my_bf_existing_files = []
                    my_bf_existing_files_name = []
                    my_bf_existing_files_name_with_extension = []

                for file_key, file in my_folder["files"].items():
                    gevent.sleep(0)
                    if file["type"] == "local":
                        file_path = file["path"]
                        if isfile(file_path):
                            desired_name = splitext(file_key)[0]
                            file_extension = splitext(file_key)[1]

                            # manage existing file request
                            if existing_file_option == "skip":
                                if file_key in my_bf_existing_files_name_with_extension:
                                    continue

                            if existing_file_option == "replace":
                                if file_key in my_bf_existing_files_name_with_extension:
                                    #remove existing from manifest
                                    filename = generate_relative_path(my_relative_path, file_key)
                                    filename_list = dict_folder_manifest["filename"]
                                    index_file = filename_list.index(filename)
                                    del dict_folder_manifest["filename"][index_file]
                                    del dict_folder_manifest["timestamp"][index_file]
                                    del dict_folder_manifest["description"][index_file]
                                    del dict_folder_manifest["file type"][index_file]
                                    del dict_folder_manifest["Additional Metadata"][index_file]

                                    index_name = my_bf_existing_files_name_with_extension.index(file_key)
                                    del my_bf_existing_files[index_name]
                                    del my_bf_existing_files_name[index_name]
                                    del my_bf_existing_files_name_with_extension[index_name]

                            if desired_name not in my_bf_existing_files_name:
                                final_name = file_key
                            else:

                                # expected final name
                                count_done = 0
                                final_name = desired_name
                                output = get_base_file_name(desired_name)
                                if output:
                                    base_name = output[0]
                                    count_exist = output[1]
                                    while count_done == 0:
                                        if final_name in my_bf_existing_files_name:
                                            count_exist += 1
                                            final_name = base_name + "(" + str(count_exist) + ")"
                                        else:
                                            count_done = 1
                                else:
                                    count_exist = 0
                                    while count_done == 0:
                                        if final_name in my_bf_existing_files_name:
                                            count_exist += 1
                                            final_name = desired_name + " (" + str(count_exist) + ")"
                                        else:
                                            count_done = 1

                                final_name = final_name + file_extension
                                my_bf_existing_files_name.append(splitext(final_name)[0])

                            #filename
                            filename = generate_relative_path(my_relative_path, final_name)
                            dict_folder_manifest["filename"].append(filename)

                            #timestamp
                            file_path = file["path"]
                            filepath = pathlib.Path(file_path)
                            mtime = filepath.stat().st_mtime
                            lastmodtime = datetime.fromtimestamp(mtime).astimezone(local_timezone)
                            dict_folder_manifest["timestamp"].append(lastmodtime.isoformat().replace('.', ',').replace('+00:00', 'Z'))

                            #description
                            if "description" in file.keys():
                                dict_folder_manifest["description"].append(file["description"])
                            else:
                                dict_folder_manifest["description"].append("")

                            #file type
                            if file_extension == "":
                                file_extension = "None"
                            dict_folder_manifest["file type"].append(file_extension)

                            #addtional metadata
                            if "additional-metadata" in file.keys():
                                dict_folder_manifest["Additional Metadata"].append(file["additional-metadata"])
                            else:
                                dict_folder_manifest["Additional Metadata"].append("")

            return dict_folder_manifest

        #create local folder to save manifest files temporarly (delete any existing one first)
        shutil.rmtree(manifest_folder_path) if isdir(manifest_folder_path) else 0
        makedirs(manifest_folder_path)

        # import info about files already on bf
        dataset_structure = soda_json_structure["dataset-structure"]
        manifest_dict_save = {}
        for item in ds.items:
            if item.type == "Collection" and item.name in dataset_structure["folders"].keys():

                    relative_path = ""
                    item_id = item.id
                    # Initialize dict where manifest info will be stored
                    dict_folder_manifest = {}
                    dict_folder_manifest["filename"] = []
                    dict_folder_manifest["timestamp"] = []
                    dict_folder_manifest["description"] = []
                    dict_folder_manifest["file type"] = []
                    dict_folder_manifest["Additional Metadata"] = []

                    # pull manifest file into if exists
                    manifest_df = pd.DataFrame()
                    for file in item.items:
                        if file.type != "Collection":
                            file_id = file.id
                            file_details = bf._api._get('/packages/' + str(file_id) +'/view')
                            file_name_with_extension = file_details[0]['content']['name']
                            if file_name_with_extension in manifest_sparc:
                                file_id_2 = file_details[0]["content"]["id"]
                                file_url_info = bf._api._get('/packages/' + str(file_id) + '/files/' + str(file_id_2))
                                file_url = file_url_info['url']
                                manifest_df = pd.read_excel(file_url)
                                manifest_df = manifest_df.fillna('')
                                if "filename" not in manifest_df.columns or "description" not in manifest_df.columns or "Additional Metadata" not in manifest_df.columns:
                                    manifest_df = pd.DataFrame()
                                break
                    dict_folder_manifest = recursive_manifest_info_import_bf(item, relative_path, dict_folder_manifest, manifest_df)
                    manifest_dict_save[item.name] = {'manifest': dict_folder_manifest, 'bf_folder': item}


        # import info from local files to be uploaded
        local_timezone = TZLOCAL()
        manifest_files_structure = {}
        existing_folder_option = soda_json_structure["generate-dataset"]["if-existing"]
        existing_file_option = soda_json_structure["generate-dataset"]["if-existing-files"]
        for folder_key, folder in dataset_structure["folders"].items():
            relative_path = ''

            if folder_key in manifest_dict_save.keys() and existing_folder_option == "merge":
                bf_folder = manifest_dict_save[folder_key]['bf_folder']
                bf_folder_exists = True
                dict_folder_manifest = manifest_dict_save[folder_key]['manifest']

            elif folder_key in manifest_dict_save.keys() and existing_folder_option == "skip":
                continue

            else:
                bf_folder = ''
                bf_folder_exists = False
                dict_folder_manifest = {}
                dict_folder_manifest["filename"] = []
                dict_folder_manifest["timestamp"] = []
                dict_folder_manifest["description"] = []
                dict_folder_manifest["file type"] = []
                dict_folder_manifest["Additional Metadata"] = []

            dict_folder_manifest = recursive_manifest_builder_existing_bf(folder, bf_folder, bf_folder_exists, relative_path, dict_folder_manifest)

            #create high-level folder at the temporary location
            folderpath = join(manifest_folder_path, folder_key)
            makedirs(folderpath)

            #save manifest file
            manifestfilepath = join(folderpath, 'manifest.xlsx')
            df = pd.DataFrame.from_dict(dict_folder_manifest)
            df.to_excel(manifestfilepath, index=None, header=True)

            manifest_files_structure[folder_key] = manifestfilepath

        return manifest_files_structure

    except Exception as e:
        raise e

In [54]:
def bf_get_dataset_files_folders(soda_json_structure, requested_sparc_only = True):
    """
    Function for importing blackfynn data files info into the "dataset-structure" key of the soda json structure, 
    including metadata from any existing manifest files in the high-level folders 
    (name, id, timestamp, description, additional metadata)

    Args:
        soda_json_structure: soda structure with bf account and dataset info available
    Output:
        same soda structure with blackfyn data file info included under the "dataset-structure" key
    """

    high_level_sparc_folders = ["code", "derivative", "docs", "primary", "protocol", "source"]
    manifest_sparc = ["manifest.xlsx", "manifest.csv"]
    high_level_metadata_sparc = ['submission.xlsx', 'submission.csv', 'submission.json', 'dataset_description.xlsx', 'dataset_description.csv', 'dataset_description.json', 'subjects.xlsx', 'subjects.csv', 'subjects.json', 'samples.xlsx', 'samples.csv', 'samples.json', 'README.txt', 'CHANGES.txt']

    def verify_file_name(item_name, file_name):
        filename, file_extension = os.path.splitext(file_name)
        if file_extension in bf_recognized_file_extensions:
            return item_name + file_extension
        else:
            return file_name

    def recursive_dataset_import(my_item, dataset_folder, metadata_files, my_folder_name, my_level, manifest_dict):
        col_count = 0
        file_count = 0

        for item in my_item:
            if item.type == "Collection":
                if "folders" not in dataset_folder:
                    dataset_folder["folders"] = {}
                if "files" not in dataset_folder:
                    dataset_folder["files"] = {}
                col_count += 1
                folder_name = item.name
                if my_level == 0 and folder_name not in high_level_sparc_folders and requested_sparc_only:  # only import SPARC folders
                    continue
                if col_count == 1:
                    #dataset_folder["folders"] = {}
                    level = my_level + 1
                dataset_folder["folders"][folder_name] = {
                    "type": "bf", "action": ["existing"], "path": item.id}
                sub_folder = dataset_folder["folders"][folder_name]
                if "folders" not in sub_folder:
                    sub_folder["folders"] = {}
                if "files" not in sub_folder:
                    sub_folder["files"] = {}
                recursive_dataset_import(
                    item, sub_folder, metadata_files, folder_name, level, manifest_dict)
            else:
                if "folders" not in dataset_folder:
                    dataset_folder["folders"] = {}
                if "files" not in dataset_folder:
                    dataset_folder["files"] = {}
                package_id = item.id
                file_details = bf._api._get(
                    '/packages/' + str(package_id) + '/view')
                file_name = file_details[0]["content"]["name"]
                file_name = verify_file_name(item.name, file_name)

                if my_level == 0 and file_name in high_level_metadata_sparc:
                    metadata_files[file_name] = {
                        "type": "bf", "action": ["existing"], "path": item.id}

                else:
                    file_count += 1
                    #if file_count == 1:
                    #dataset_folder["files"] = {}
#                     if my_level == 0:
#                         dataset_folder["files"][file_name] = {"type": "bf", "action": ["existing"], "path": item.id}
                    if my_level == 1 and file_name in manifest_sparc:
                        file_id = file_details[0]["content"]["id"]
                        manifest_url = bf._api._get(
                            '/packages/' + str(package_id) + '/files/' + str(file_id))
                        df = pd.read_excel(manifest_url['url'])
                        manifest_dict[my_folder_name] = df
                    else:
                        timestamp = file_details[0]["content"]["updatedAt"]
                        dataset_folder["files"][file_name] = {
                            "type": "bf","action": ["existing"], "path": item.id, "timestamp": timestamp}


    def recursive_manifest_info_import(my_folder, my_relative_path, manifest_df):
        
        if "files" in my_folder.keys():
            for file_key, file in my_folder["files"].items():
                    filename = join(my_relative_path, file_key)
                    colum_headers = manifest_df.columns.tolist()
                    if filename in list(manifest_df["filename"].values):
                        if "description" in colum_headers:
                            mydescription = manifest_df[manifest_df['filename'] == filename]["description"].values[0]
                            if mydescription:
                                file["description"] = mydescription
                        if "Additional Metadata" in colum_headers:
                            my_additional_medata = manifest_df[manifest_df['filename'] == filename]["Additional Metadata"].values[0]
                            if mydescription:
                                file["additional-metadata"] = my_additional_medata

        if "folders" in my_folder.keys():
            for folder_key, folder in my_folder["folders"].items():
                relative_path = join(my_relative_path, folder_key)
                recursive_manifest_info_import(folder, relative_path, manifest_df)
    
    # START
    
    error = []
    
    # check that the blackfynn account is valid
    try:
        bf_account_name = soda_json_structure["bf-account-selected"]["account-name"]
    except Exception as e:
        raise e
        
    try:
        bf = Blackfynn(bf_account_name)
    except Exception as e:
        error.append('Error: Please select a valid Blackfynn account')
        raise Exception(error)  

    # check that the blackfynn dataset is valid
    try:
        bf_dataset_name = soda_json_structure["bf-dataset-selected"]["dataset-name"]
    except Exception as e:
        raise e
    try:
        myds = bf.get_dataset(bf_dataset_name)
    except Exception as e:
        error.append('Error: Please select a valid Blackfynn dataset')
        raise Exception(error)
    
    # check that the user has permission to edit this dataset
    try:
        role = bf_get_current_user_permission(bf, myds)
        if role not in ['owner', 'manager', 'editor']:
            curatestatus = 'Done'
            error.append("Error: You don't have permissions for uploading to this Blackfynn dataset")
            raise Exception(error)
    except Exception as e:
        raise e
    
    try:
        # import files and folders in the soda json structure
        soda_json_structure["dataset-structure"] = {}
        soda_json_structure["metadata-files"] = {}
        dataset_folder = soda_json_structure["dataset-structure"]
        metadata_files = soda_json_structure["metadata-files"]
        manifest_dict = {}
        level = 0
        folder_name = ""
        recursive_dataset_import(myds, dataset_folder, metadata_files, folder_name, level, manifest_dict)
        
        #remove metadata files keys if empty
        metadata_files = soda_json_structure["metadata-files"]
        if not metadata_files:
            del soda_json_structure['metadata-files']
        
        # pull information from the manifest files if they satisfy the SPARC format
        if "folders" in dataset_folder.keys():
            for folder_key in manifest_dict.keys():
                manifest_df = manifest_dict[folder_key]
                manifest_df = manifest_df.fillna('')  
                colum_headers = manifest_df.columns.tolist()
                folder = dataset_folder["folders"][folder_key]
                if "filename" in colum_headers:
                    if "description" in colum_headers or "Additional Metadata" in colum_headers:
                        relative_path = ""
                        recursive_manifest_info_import(folder, relative_path, manifest_df)

        success_message = "Data files under a valid high-level SPARC folders have been imported"
        return [soda_json_structure, success_message]
    
    except Exception as e:
        raise e

In [55]:
def bf_update_existing_dataset(soda_json_structure, bf, ds):    
    global main_curate_progress_message
    global main_total_generate_dataset_size
    global start_generate
    global main_initial_bfdataset_size
    bfsd = ""

    # Delete any files on blackfynn that have been marked as deleted
    def recursive_file_delete(folder):
        if "files" in folder.keys():
            for item in list(folder["files"]):
                if "deleted" in folder["files"][item]['action']:
                    file = bf.get(folder["files"][item]['path'])
                    file.delete()
                    del folder["files"][item]

        for item in list(folder["folders"]):
            recursive_file_delete(folder["folders"][item])
        return
    
    # Add a new key containing the path to all the files and folders on the 
    # local data structure.
    # Allows us to see if the folder path of a specfic file already 
    # exists on blackfynn.
    def recursive_item_path_create(folder, path):
        if "files" in folder.keys():
            for item in list(folder["files"]):
                if "folderpath" not in folder["files"][item]:
                    folder["files"][item]['folderpath'] = path[:]

        for item in list(folder["folders"]):
            if "folderpath" not in folder["folders"][item]:
                folder["folders"][item]['folderpath'] = path[:]
                folder["folders"][item]['folderpath'].append(item)
            recursive_item_path_create(folder["folders"][item], folder["folders"][item]['folderpath'][:])

        return

    # Check and create any non existing folders for the file move process
    def recursive_check_and_create_bf_file_path(folderpath, index, bfsd):
        folder = folderpath[index]
        
        if folder not in bfsd["folders"]:
            if (index == 0):
                new_folder = ds.create_collection(folder)
            else:
                current_folder = bf.get(bfsd["path"])
                new_folder = current_folder.create_collection(folder)
            bfsd["folders"][folder] = {"type": "bf", "action": ["existing"], "path": new_folder.id, "folders":{}, "files":{}}
            
        index += 1
        
        if index < len(folderpath):
            recursive_check_and_create_bf_file_path(folderpath, index, bfsd["folders"][folder])
        else:
            return bfsd["folders"][folder]["path"]

    # Check for any files that have been moved and verify paths before moving
    def recursive_check_moved_files(folder):
        if "files" in folder.keys():
            for item in list(folder["files"]):
                if "moved" in folder["files"][item]['action'] and folder["files"][item]["type"] == "bf":
                    new_folder_id = ""
                    new_folder_id = recursive_check_and_create_bf_file_path(folder["files"][item]["folderpath"].copy(), 0, bfsd)
                    destination_folder = bf.get(new_folder_id)
                    bf.move(destination_folder, folder["files"][item]["path"])

        for item in list(folder["folders"]):
            recursive_check_moved_files(folder["folders"][item])

        return

    # Rename any files that exist on blackfynn
    def recursive_file_rename(folder):
        if "files" in folder.keys():
            for item in list(folder["files"]):
                if "renamed" in folder["files"][item]['action'] and folder["files"][item]["type"] == "bf":
                    file = bf.get(folder["files"][item]["path"])
                    file.name = item
                    file.update()

        for item in list(folder["folders"]):
            recursive_file_rename(folder["folders"][item])

        return
    
    # Delete any stray folders that exist on blackfynn
    # Only top level files are deleted since the api deletes any 
    # files and folders that exist inside.
    def recursive_folder_delete(folder):
        for item in list(folder["folders"]):
            if "deleted" in folder["folders"][item]['action']:
                file = bf.get(folder["folders"][item]['path'])
                file.delete()
                del folder["folders"][item]
            else:
                recursive_folder_delete(folder["folders"][item])

        return

    # Rename any folders that still exist.
    def recursive_folder_rename(folder):
        for item in list(folder["folders"]):
            if "renamed" in folder["folders"][item]['action'] and folder["folders"][item]["type"] == "bf":
                file = bf.get(folder["folders"][item]["path"])
                file.name = item
                file.update()
        else:
            recursive_file_rename(folder["folders"][item])

        return

    # 1. Remove all existing files on blackfynn, that the user deleted.
    main_curate_progress_message = "Deleting files on blackfynn"
    dataset_structure = soda_json_structure["dataset-structure"]
    recursive_file_delete(dataset_structure)
    main_curate_progress_message = "Files on blackfynn marked for deletion have been deleted"
    
    # 2. Get the status of all files currently on blackfynn and create 
    # the folderpath for all items in both dataset structures.
    main_curate_progress_message = "Retreiving files and folders from blackfynn"
    current_bf_dataset_files_folders = bf_get_dataset_files_folders (soda_json_structure)[0]
    bfsd = current_bf_dataset_files_folders["dataset-structure"]
    main_curate_progress_message = "Creating file paths for all files on blackfynn"
    recursive_item_path_create(dataset_structure, [])
    recursive_item_path_create(bfsd, [])
    main_curate_progress_message = "File paths created"
    

    # 3. Move any files that are marked as moved on blackfynn. 
    # Create any additional folders if required
    main_curate_progress_message = "Moving all files requested by the user"
    recursive_check_moved_files(dataset_structure)
    main_curate_progress_message = "Moved all files requested by the user"

    # 4. Rename any blackfynn files that are marked as renamed. 
    main_curate_progress_message = "Renaming all files requested by the user"
    recursive_file_rename(dataset_structure)
    main_curate_progress_message = "Renamed all files requested by the user"

    # 5. Delete any blackfynn folders that are marked as deleted. 
    main_curate_progress_message = "Deleting any additional folders present on blackfynn"
    recursive_folder_delete(dataset_structure)
    main_curate_progress_message = "Deletion of additional folders complete"


    # 6. Run the original code to upload any new files added to the dataset.
    soda_json_structure["manifest-files"] = {"destination": "bf"}
    soda_json_structure["generate-dataset"] = {"destination" : "bf", "if-existing": "merge", "if-existing-files": "replace"}
    bf_generate_new_dataset(soda_json_structure, bf, ds)

    return

In [56]:
def bf_get_existing_folders_details(bf_folder):
    bf_existing_folders = [x for x in bf_folder.items if x.type == "Collection"]
    bf_existing_folders_name = [x.name for x in bf_existing_folders]

    return bf_existing_folders, bf_existing_folders_name

In [57]:
def generate_relative_path(x,y):
    if x:
        relative_path = x + '/' + y
    else:
        relative_path = y
    return relative_path

In [58]:
def bf_get_existing_files_details(bf_folder):
    bf_existing_files = [x for x in bf_folder.items if x.type != "Collection"]
    bf_existing_files_name = [splitext(x.name)[0] for x in bf_existing_files]
    bf_existing_files_name_with_extension = []
    for file in bf_existing_files:
        file_id = file.id
        file_details = bf._api._get('/packages/' + str(file_id) + '/view')
        file_name_with_extension = file_details[0]["content"]["name"]
        file_extension = splitext(file_name_with_extension)[1]
        file_name_with_extension = splitext(file.name)[0] + file_extension
        bf_existing_files_name_with_extension.append(file_name_with_extension)

    return bf_existing_files, bf_existing_files_name, bf_existing_files_name_with_extension

In [115]:
def get_base_file_name(file_name):
    output = []
    if file_name[-1] == ')':
        string_length = len(file_name)
        count_start = string_length
        character = file_name[count_start-1]
        while character != '(' and count_start>=0:
            count_start -= 1
            character = file_name[count_start-1]
        if character == '(':
            base_name = file_name[0:count_start-1]
            num = file_name[count_start:string_length-1]
            if check_if_int(num):
                output = [base_name, int(num)]
            return output
        else:
            return output

    else:
        return output

In [116]:
def bf_generate_new_dataset(soda_json_structure, bf, ds):

    global main_curate_progress_message
    global main_total_generate_dataset_size
    global start_generate
    global main_initial_bfdataset_size

    try:

        def recursive_create_folder_for_bf(my_folder, my_tracking_folder, existing_folder_option):

            # list of existing bf folders at this level
            my_bf_folder = my_tracking_folder["value"]
            my_bf_existing_folders, my_bf_existing_folders_name = bf_get_existing_folders_details(my_bf_folder)

            # create/replace/skip folder
            if "folders" in my_folder.keys():
                my_tracking_folder["folders"] = {}
                for folder_key, folder in my_folder["folders"].items():

                    if existing_folder_option == "skip":
                        if folder_key in my_bf_existing_folders_name:
                            continue
                        else:
                            bf_folder = my_bf_folder.create_collection(folder_key)

                    elif existing_folder_option == "create-duplicate":
                        bf_folder = my_bf_folder.create_collection(folder_key)

                    elif existing_folder_option == "replace":
                        if folder_key in my_bf_existing_folders_name:
                            index_folder = my_bf_existing_folders_name.index(folder_key)
                            bf_folder_delete = my_bf_existing_folders[index_folder]
                            bf_folder_delete.delete()
                            my_bf_folder.update()
                        bf_folder = my_bf_folder.create_collection(folder_key)

                    elif existing_folder_option == "merge":
                        if folder_key in my_bf_existing_folders_name:
                            index_folder = my_bf_existing_folders_name.index(folder_key)
                            bf_folder = my_bf_existing_folders[index_folder]
                        else:
                            bf_folder = my_bf_folder.create_collection(folder_key)
                    bf_folder.update()
                    my_tracking_folder["folders"][folder_key] = {"value": bf_folder}
                    tracking_folder = my_tracking_folder["folders"][folder_key]
                    recursive_create_folder_for_bf(folder, tracking_folder, existing_folder_option)

        def recursive_dataset_scan_for_bf(my_folder, my_tracking_folder, existing_file_option, list_upload_files, my_relative_path):

            global main_total_generate_dataset_size

            my_bf_folder = my_tracking_folder["value"]

            if "folders" in my_folder.keys():
                my_bf_existing_folders, my_bf_existing_folders_name = bf_get_existing_folders_details(my_bf_folder)

                for folder_key, folder in my_folder["folders"].items():
                    relative_path = generate_relative_path(my_relative_path, folder_key)

                    if existing_folder_option == "skip":
                        if folder_key in my_bf_existing_folders_name:
                            continue

                    tracking_folder = my_tracking_folder["folders"][folder_key]
                    list_upload_files = recursive_dataset_scan_for_bf(folder, tracking_folder, existing_file_option, list_upload_files, relative_path)

            if "files" in my_folder.keys():

                #delete files to be deleted
                my_bf_existing_files, my_bf_existing_files_name, my_bf_existing_files_name_with_extension = bf_get_existing_files_details(my_bf_folder)
                for file_key, file in my_folder["files"].items():
                    if file["type"] == "local":
                        file_path = file["path"]
                        if isfile(file_path):
                            if existing_file_option == "replace":
                                if file_key in my_bf_existing_files_name_with_extension:
                                    index_file = my_bf_existing_files_name_with_extension.index(file_key)
                                    my_file = my_bf_existing_files[index_file]
                                    my_file.delete()
                                    my_bf_folder.update()

                #create list of files to be uploaded with projected and desired names saved
                my_bf_existing_files, my_bf_existing_files_name, my_bf_existing_files_name_with_extension = bf_get_existing_files_details(my_bf_folder)

                list_local_files = []
                list_projected_names = []
                list_desired_names = []
                list_final_names = []
                additional_upload_lists = []
                additional_list_count = 0
                list_upload_schedule_projected_names = []
                list_initial_names = []
                for file_key, file in my_folder["files"].items():
                    if file["type"] == "local":
                        file_path = file["path"]
                        if isfile(file_path):

                            initial_name = splitext(basename(file_path))[0]
                            initial_extension = splitext(basename(file_path))[1]
                            initial_name_with_extention = basename(file_path)
                            desired_name = splitext(file_key)[0]

                            if existing_file_option == "skip":
                                if file_key in my_bf_existing_files_name_with_extension:
                                    continue

                            # check if initial filename exists on Blackfynn dataset and get the projected name of the file after upload
                            count_done = 0
                            count_exist = 0
                            if initial_extension in bf_recognized_file_extensions:
                                projected_name = initial_name
                                while count_done == 0:
                                    if projected_name in my_bf_existing_files_name:
                                        count_exist += 1
                                        projected_name = initial_name + " (" + str(count_exist) + ")"
                                    else:
                                        count_done = 1
                            else:
                                count_done = 0
                                count_exist = 0
                                projected_name = initial_name_with_extention
                                while count_done == 0:
                                    if projected_name in my_bf_existing_files_name_with_extension:
                                        count_exist += 1
                                        projected_name = initial_name + " (" + str(count_exist) + ")" + initial_extension
                                    else:
                                        count_done = 1

                            # expected final name
                            count_done = 0
                            final_name = desired_name
                            output = get_base_file_name(desired_name)
                            if output:
                                base_name = output[0]
                                count_exist = output[1]
                                while count_done == 0:
                                    if final_name in my_bf_existing_files_name:
                                        count_exist += 1
                                        final_name = base_name + "(" + str(count_exist) + ")"
                                    else:
                                        count_done = 1
                            else:
                                count_exist = 0
                                while count_done == 0:
                                    if final_name in my_bf_existing_files_name:
                                        count_exist += 1
                                        final_name = desired_name + " (" + str(count_exist) + ")"
                                    else:
                                        count_done = 1

                            # save in list accordingly
                            if initial_name in list_initial_names or initial_name in list_final_names or projected_name in list_final_names or final_name in list_projected_names:
                                additional_upload_lists.append([[file_path], my_bf_folder, [projected_name], [desired_name], [final_name], my_tracking_folder, my_relative_path])
                            else:
                                list_local_files.append(file_path)
                                list_projected_names.append(projected_name)
                                list_desired_names.append(desired_name)
                                list_final_names.append(final_name)
                                list_initial_names.append(initial_name)

                            my_bf_existing_files_name.append(final_name)
                            if initial_extension in bf_recognized_file_extensions:
                                my_bf_existing_files_name_with_extension.append(final_name)
                            else:
                                my_bf_existing_files_name_with_extension.append(final_name + initial_extension)

                            # add to projected dataset size to be generated
                            main_total_generate_dataset_size += getsize(file_path)

                if list_local_files:
                    list_upload_files.append([list_local_files, my_bf_folder, list_projected_names, list_desired_names, list_final_names, my_tracking_folder, my_relative_path])

                for item in additional_upload_lists:
                    list_upload_files.append(item)

            return list_upload_files


        # 1. Scan the dataset structure to create all non-existent folders
        # create a tracking dict which would track the generation of the dataset on Blackfynn
        main_curate_progress_message = "Creating folder structure"
        dataset_structure = soda_json_structure["dataset-structure"]
        tracking_json_structure = {"value": ds}
        existing_folder_option = soda_json_structure["generate-dataset"]["if-existing"]
        recursive_create_folder_for_bf(dataset_structure, tracking_json_structure, existing_folder_option)

        # 2. Scan the dataset structure and compile a list of files to be uploaded along with desired renaming
        ds.update()
        main_curate_progress_message = "Preparing a list of files to upload"
        existing_file_option = soda_json_structure["generate-dataset"]["if-existing-files"]
        list_upload_files = []
        relative_path = ds.name
        list_upload_files = recursive_dataset_scan_for_bf(dataset_structure, tracking_json_structure, existing_file_option, list_upload_files, relative_path)
        print(list_upload_files)

        # 3. Add high-level metadata files to a list
        ds.update()
        list_upload_metadata_files = []
        if "metadata-files" in soda_json_structure.keys():

            my_bf_existing_files, my_bf_existing_files_name, my_bf_existing_files_name_with_extension = bf_get_existing_files_details(ds)
            metadata_files = soda_json_structure["metadata-files"]
            for file_key, file in metadata_files.items():
                if file["type"] == "local":
                    metadata_path = file["path"]
                    if isfile(metadata_path):
                        initial_name = splitext(basename(metadata_path))[0]
                        if existing_file_option == "replace":
                            if initial_name in my_bf_existing_files_name:
                                index_file = my_bf_existing_files_name.index(initial_name)
                                my_file = my_bf_existing_files[index_file]
                                my_file.delete()
                                
                        if existing_file_option == "skip":
                            if initial_name in my_bf_existing_files_name:
                                continue

                        list_upload_metadata_files.append(metadata_path)
                        main_total_generate_dataset_size += getsize(metadata_path)
            
        # 4. Prepare and add manifest files to a list
        list_upload_manifest_files = []
        if "manifest-files" in soda_json_structure.keys():

            # prepare manifest files
            if soda_json_structure["generate-dataset"]["destination"] == "bf" and "dataset-name" not in soda_json_structure["generate-dataset"]:
                #generating dataset on an existing bf dataset - account for existing files and manifest files
                manifest_files_structure = create_high_level_manifest_files_existing_bf(soda_json_structure, bf, ds)
            else:
                #generating on new bf
                manifest_files_structure = create_high_level_manifest_files(soda_json_structure)

            # add manifest files to list after deleting existing ones
            list_upload_manifest_files = []
            for key in manifest_files_structure.keys():
                manifestpath = manifest_files_structure[key]
                item = tracking_json_structure['folders'][key]['value']
                destination_folder_id = item.id
                #delete existing manifest files
                for subitem in item:
                    if subitem.name == "manifest":
                        subitem.delete()
                        item.update()
                #upload new manifest files
                list_upload_manifest_files.append([[manifestpath], item])
                main_total_generate_dataset_size += getsize(manifestpath)

        # 5. Upload files, rename, and add to tracking list
        '''
        main_initial_bfdataset_size = bf_dataset_size()
        start_generate = 1
        for item in list_upload_files:
            list_upload = item[0]
            bf_folder = item[1]
            list_projected_names = item[2]
            list_desired_names = item[3]
            list_final_names = item[4]
            tracking_folder = item[5]
            relative_path = item[6]

            #upload
            main_curate_progress_message = "Uploading files in " + str(relative_path)
            bf_folder.upload(*list_upload)
            #bf_folder.update()

            #rename to final name
            for index, projected_name in enumerate(list_projected_names):
                final_name = list_final_names[index]
                desired_name = list_desired_names[index]
                if final_name != projected_name:
                    bf_item_list = bf_folder.items
                    my_bf_existing_files, my_bf_existing_files_name, my_bf_existing_files_name_with_extension = bf_get_existing_files_details(bf_folder)
                    for item in my_bf_existing_files:
                        if item.name == projected_name:
                            item.name = final_name
                            item.update()
                            if "files" not in tracking_folder:
                                tracking_folder["files"] = {}
                            tracking_folder["files"][desired_name] = {"value": item}

        if list_upload_metadata_files:
            main_curate_progress_message = "Uploading metadata files in high-level dataset folder " + str(ds.name)
            ds.upload(*list_upload_metadata_files)

        if list_upload_manifest_files:
            for item in list_upload_manifest_files:
                manifest_file = item[0]
                bf_folder = item[1]
                main_curate_progress_message = "Uploading manifest file in " + str(bf_folder.name) + " folder"
                bf_folder.upload(*manifest_file)
                #bf_folder.update()
        shutil.rmtree(manifest_folder_path) if isdir(manifest_folder_path) else 0
        '''

    except Exception as e:
        raise e

In [117]:
sd = {
    "bf-account-selected":{"account-name":"calmilinux"},
    "bf-dataset-selected":{"dataset-name":"testddataset"},
    "dataset-structure":{
        "folders":{
            "code":{"type":"bf","action":["existing"],"path":"N:collection:cdeabbcd-47b6-4f0b-b500-56f5e67fc0b6",
                    "folders":{
                        "fhj":{"type":"bf","action":["existing"],"path":"N:collection:67f32258-459e-4db6-a884-859ab38b2efb",
                               "folders":{
                                   "png":{"type":"local","path":"/home/dev/Desktop/SODA/src/assets/app-icon/png",
                                          "folders":{},
                                          "files":{
                                              "icon.ico":{"path":"/home/dev/Desktop/SODA/src/assets/app-icon/png/icon.ico","description":"","additional-metadata":"","type":"local","action":["new"]},
                                              "soda_icon.png":{"path":"/home/dev/Desktop/SODA/src/assets/app-icon/png/soda_icon.png","description":"","additional-metadata":"","type":"local","action":["new"]}},
                                          "action":["new"]}},
                               "files":{}},},
                    "files":{
                        "test.odb":{"type":"bf","action":["existing"],"path":"N:package:7e2651f9-3124-4ade-aff1-9d36bf3c45e2","timestamp":"2020-12-21T23:28:11.934797Z"},
                        "soda_icon2.png":{"type":"bf","action":["existing"],"path":"N:package:ce7e3c3d-7111-4c4c-8a7c-ef1554dcaa9f","timestamp":"2020-12-21T18:41:52.399846Z"},
                        "soda_icon.png":{"type":"bf","action":["existing"],"path":"N:package:5294f17d-0de1-4023-9322-4eae7c60eff0","timestamp":"2020-12-21T18:41:51.311691Z"}}},
            "primary":{"type":"bf","action":["existing"],"path":"N:collection:95155ae6-df69-45f4-8493-0b988720c92a",
                       "folders":{
                           "png":{"type":"local","path":"/home/dev/Desktop/SODA/src/assets/app-icon/png",
                                  "folders":{},
                                  "files":{
                                      "icon.ico":{"path":"/home/dev/Desktop/SODA/src/assets/app-icon/png/icon.ico","description":"","additional-metadata":"","type":"local","action":["new"]},
                                      "soda_icon.png":{"path":"/home/dev/Desktop/SODA/src/assets/app-icon/png/soda_icon.png","description":"","additional-metadata":"","type":"local","action":["new"]}},
                                  "action":["new"]}},
                       "files":{}},
            "source":{"type":"bf","action":["existing"],"path":"N:collection:2994d497-37c0-41c9-b88a-1daff9a7c0bc",
                      "folders":{
                          "png":{"type":"bf","action":["existing"],"path":"N:collection:252325e6-ee96-4a79-9379-1b3536d33877",
                                 "folders":{},
                                 "files":{
                                     "soda_icon (1).png":{"type":"bf","action":["existing"],"path":"N:package:ed1ae9a2-5067-428a-9dde-65a76b5f2732","timestamp":"2020-12-21T19:16:17.28051Z"},
                                     "soda_icon2.png":{"type":"bf","action":["existing"],"path":"N:package:ab3029d9-31f0-49d5-9e37-97cd1066692a","timestamp":"2020-12-21T19:16:18.86915Z"},
                                     "icon.ico":{"type":"bf","action":["existing"],"path":"N:package:1d277d08-b129-4684-b611-1d33ddcdda2a","timestamp":"2020-12-21T19:16:18.124946Z"}}}},
                      "files":{}}}},
    "metadata-files":{
        "submission.csv":{"type":"bf","action":["existing"],"path":"N:package:3b504e70-58ca-4bfb-8c80-f214f32ccaa0"},
        "subjects.csv":{"type":"bf","action":["existing"],"path":"N:package:41295a11-4fe0-4d4d-95fb-26b50646f74b"},
        "dataset_description.csv":{"type":"bf","action":["existing"],"path":"N:package:69a01be0-42c3-4129-9898-4aa47734b4ea"}},
    "manifest-files":{},
    "generate-dataset":{},
    "starting-point":"bf"}
sdmy = sd["dataset-structure"]
sdbf = bf_get_dataset_files_folders(sd.copy())[0]["dataset-structure"]
mysd = sdmy.copy()
bfsd = sdbf.copy()
#path to local SODA folder for saving manifest files
manifest_sparc = ["manifest.xlsx", "manifest.csv"]
manifest_folder_path = join(userpath, 'SODA', 'manifest_files')
main_total_generate_dataset_size = 10

In [118]:
sd["manifest-files"] = {"destination": "bf"}
sd["generate-dataset"] = {"destination" : "bf", "if-existing": "merge", "if-existing-files": "create-duplicate"}
bf = Blackfynn("calmilinux")
myds = bf.get_dataset("testddataset")

In [119]:
bf_generate_new_dataset(sd, bf, myds)

{'path': '/home/dev/Desktop/SODA/src/assets/app-icon/png/icon.ico', 'description': '', 'additional-metadata': '', 'type': 'local', 'action': ['new']}
{'path': '/home/dev/Desktop/SODA/src/assets/app-icon/png/soda_icon.png', 'description': '', 'additional-metadata': '', 'type': 'local', 'action': ['new']}
{'path': '/home/dev/Desktop/SODA/src/assets/app-icon/png/icon.ico', 'description': '', 'additional-metadata': '', 'type': 'local', 'action': ['new']}
{'path': '/home/dev/Desktop/SODA/src/assets/app-icon/png/soda_icon.png', 'description': '', 'additional-metadata': '', 'type': 'local', 'action': ['new']}
[[['/home/dev/Desktop/SODA/src/assets/app-icon/png/icon.ico', '/home/dev/Desktop/SODA/src/assets/app-icon/png/soda_icon.png'], <Collection name='png' id='N:collection:3281ef66-20b0-4122-a768-d1eb45fa7b38'>, ['icon.ico', 'soda_icon'], ['icon', 'soda_icon'], ['icon', 'soda_icon'], {'value': <Collection name='png' id='N:collection:3281ef66-20b0-4122-a768-d1eb45fa7b38'>, 'folders': {}}, 'tes