Import library used in functions, just in case

In [None]:
import os

Define constants used in this exercise

In [None]:
# change this to match the name of the dataset that you are using to run this demo
DATCORE_DATASET = 'SPARC December 2018 Presentation'

In [None]:
# path of the folder where you saved this notebook together with all the others in SPARC 201812 package
BASE_PATH = os.path.join(os.getcwd(),'..')
# name of the folder where the test dataset is located
# this variable is used by "SPARC_201812_create_test_dataset" notebook to create the test dataset
DATASET_FOLDER = 'SPARC_local_test_dataset'
# full path to the test dataset folder
# if you do not change it, the test dataset will be created in the same folder where you saved this notebook
DATASET_PATH = os.path.join(BASE_PATH,DATASET_FOLDER)
# name of the folder where will we save the files downloaded from DAT CORE
DOWNLOAD_FOLDER = 'SPARC_dataset_download'
# full path to the download folder.
# if you do not change it, the download folder will be created in the same folder where this notebook is saved
DOWNLOAD_PATH = os.path.join(BASE_PATH,DOWNLOAD_FOLDER)

In [None]:
# name of the models that we will create on DAT CORE
#
# subject model name
DC_MODEL_SUBJECT = 'Subject'
# emg channels model name
DC_MODEL_EMG = 'Emg'

In [3]:
# name and description of the relationships
DC_SUBJECT_TO_EMG_NAME = 'has_emg_channel'
DC_SUBJECT_TO_EMG_DESCRIPTION = 'The subject has emg channel data recording'
DC_SUBJECT_TO_FILE_NAME = 'has_data_file'
DC_SUBJECT_TO_FILE_DESCRIPTION = 'The subject has this data file associated'

In [4]:
# images file names
# These are images that are presented in all the notebooks through the presentations
DC_IMAGE_FILES_1 = '../images/dataset_files_uploaded_1.png'
DC_IMAGE_FILES_1 = '../images/dataset_files_uploaded_2.png'
DC_IMAGE_FILES_3 = '../images/files_downloaded.png'
DC_IMAGE_MODEL_1 = '../images/first_model_created.png'
DC_IMAGE_MODEL_2 = '../images/first_model_fields.png'
DC_IMAGE_RECORDS_1 = '../images/first_model_records.png'
DC_IMAGE_RECORDS_2 = '../images/emg_model_records.png'
DC_IMAGE_RELATIONSHIPS = '../images/records_relationships.png'
DC_IMAGE_RELATIONSHIPS_1 = '../images/records_relationships_1.png'
DC_IMAGE_RELATIONSHIPS_2 = '../images/records_relationships_2.png'
DC_IMAGE_RELATIONSHIPS_3 = '../images/records_relationships_3.png'

In [None]:
# constants used in code and functions
#
# how many spaces each indentation uses in the prettyShow functions
PRETTY_INDENTATION = 4

Define useful functions

In [None]:
def createDirIfNeeded(path):
    if not os.path.isdir(path):
        os.makedirs(path)
    return os.path.isdir(path)

In [None]:
def prettyShowFolderTree(path):
    tree = []
    indent = 0
    for root, dirs, files in os.walk(path):
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * PRETTY_INDENTATION * (level)
        tree.append('{}{}/'.format(indent,os.path.basename(root)))    
        for f in files:
            subindent=' ' * PRETTY_INDENTATION * (level+1)
            tree.append('{}{}'.format(subindent,f))
    for item in tree:
        print(item)

In [None]:
def prettyShowDatCoreDatasetFiles(hDC,level=0):
    '''
    This function provides a well formatted rapresentation of all the folders and files saved on the DAT CORE dataset
    
    Input
    - hDC   = handle to DAT CORE dataset
    - level = the level of the current folder
    '''
    if level == 0:
        print('--------------------------------------------')
        print('DAT CORE')
        print('Dataset : {}'.format(hDC.name))
        print('Collections(Folders) and DataPackages(Files)')
        print('--------------------------------------------')
    for item in hDC.items:
        # check which type we have
        objectType = 'Unknown'
        objectName = ''
        if isinstance(item,Collection):
            objectType = 'Collection'
            objectName = item.name
        elif isinstance(item,DataPackage):
            files = item.files
            objectType = 'DataPackage' + ('s' if len(files)>1 else '')
            objectName = ', '.join(['.'.join([item2.name,item2.type.lower()]) for item2 in files])
        # prints this entry
        print('{:s}{:10s} = {:s}'.format(' '*PRETTY_INDENTATION*(1+level),objectType,objectName))
        # if it is a folder, descends into it
        if isinstance(item,Collection):
            prettyShowDatCoreDatasetFiles(item,level+1)
        elif isinstance(item,DataPackage):
            for file in item.files:
                print('{:s}{:10s} = {:s} ({:s})'.format(' '*PRETTY_INDENTATION*(2+level),'File',file.name,file.s3_key.split('/')[-1]))

In [None]:
def prettyShowDatCoreDatasetModels(hDC):
    print('--------------------------------------------')
    print('DAT CORE')
    print('Dataset : {}'.format(hDC.name))
    print('Models, and Records')
    print('--------------------------------------------')    
    for mName, mObject in dcDataset.models().items():
        print('{:s} Model        : {:s}'.format(' '*PRETTY_INDENTATION,mName))
        print('{:s} Records #    : {:d}'.format(' '*PRETTY_INDENTATION*2,mObject.count))
        print('{:s} Properties   : {:d}'.format(' '*PRETTY_INDENTATION*2,len(mObject.as_dict()['schema'])))
        for prop in mObject.as_dict()['schema']:
            print('{:s} {:s}'.format(' '*PRETTY_INDENTATION*4,prop['displayName']))
    print('--------------------------------------------')    
    print('Relationships')
    print('--------------------------------------------')    
    for rName,rObject in dcDataset.relationships().items():
        print('{:s} Relationship : {:s}'.format(' '*PRETTY_INDENTATION,rName))
        

In [None]:
def schemaReport(schema):
    for key,item in schema.items():
        print("{:>30} => Presence : {:d} , Types : {:d} , Types list : {}".format(key,item['presence'],len(item['types']),item['types']))

In [1]:
def schemaReportSimplified(schema):
    for key,item in schema.items():
        print("{:>30} => Presence : {:d} , Type : {}".format(key,item['presence'],item['types'][0]))