# Validate your dataset

In [1]:
from IPython.display import Markdown, display
import ipywidgets as widgets # load library for interactive widgets (drop-down lists, button, etc.)
from ipywidgets import HBox, Label, Layout
from IPython.display import clear_output # clear output if you click on button several times
import os
from os import listdir, walk
from os.path import isfile, join
import pandas as pd

### Define some useful functions

# Print with style (bold etc.)
def printmd(string): 
    display(Markdown(string))

# Dropdown list
def mydropdownlist(listoptions):
    mydropdownlistis = widgets.Dropdown(
    options = listoptions,
    value = listoptions[0],
    disabled = False    
    )
    return mydropdownlistis

# Button
def mybutton(mydescription):
    mybuttonis = widgets.Button(
    description = mydescription,
    disabled = False,
    button_style = '', # 'success', 'info', 'warning', 'danger' or ''    
    )
    return mybuttonis

# Textbox
def mytextbox(placeholder):
    mytextboxis = widgets.Text(
        #value='Type dataset name',
        placeholder=placeholder,
        disabled=False,
        layout=Layout(width='60%')
    )
    return mytextboxis

### Data type selection box

# List of validators currently included the program (based on files included in the validator folder)
datatypelist = ['Select']
current_folder = os.getcwd() 
validators_folder = r'Validators'
validators_path = os.path.join(current_folder, validators_folder) # full path = main folder/validators
validators_names = [".".join(f.split(".")[:-1]) for f in os.listdir(validators_path) \
                  if os.path.isfile(os.path.join(validators_path, f))] # Import validator names without extension
validators_names = [x for x in validators_names if not x.startswith('~')] # discard temp excel files starting with ~
datatypelist = datatypelist + validators_names # dropdown list options

# Drop down list for selecting datatype
select_datatype = mydropdownlist(datatypelist)

#Specify file location
folderlocation_textbox = mytextbox(r'e.g. C:\User\Desktop\DOPEv1.1\examples\validators\Computational model dataset')

# Button to validate dataytype selection and display checklist
checkdataset_button = mybutton('Check dataset') 
  
# Box
selection_box = HBox([Label('Select your data type:'), select_datatype])
display(selection_box)
folderlocation_box = HBox([Label('Specify the path to your dataset folder:'), folderlocation_textbox])
display(folderlocation_box)
display(checkdataset_button)  

### Check datatype
listfoldername = r'Validation'
# Show checklist as a table with link to useful webpages, form templates, and interactive notebooks (when applicable)
output = widgets.Output()
@output.capture()
def on_button_clicked(b):
    clear_output()
    if (select_datatype.value == 'Select'):
        clear_output()
        printmd('**Please select data type**')
    else:
        clear_output()
        printmd('**Checking the files in your dataset**')

        # Check minimum file/folder requirement is met
        validationfile = os.path.join(validators_path, select_datatype.value + '.xlsx')
        df = pd.read_excel(validationfile)
        expectedfiles = df['Expected files']
        try:
            mypath = folderlocation_textbox.value
            onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
            onlyfolders = next(os.walk(os.path.join(mypath,'.')))[1]
            allfiles = onlyfiles + onlyfolders 
            for myfilename in expectedfiles:
                if (myfilename in allfiles):
                    print(myfilename, ': Checked')
                else:
                    print(myfilename, ': Missing!')
                    
        # Check manifest.xlsx included in each non-empty folder and filled out properly (if empty folder found, generate warning)
        
        # Check submission.xlsx filled properly
        
        # Check dataset_description.xlsx filled properly
        
        except:
            clear_output()
            print('Dataset folder could not be located')
            
checkdataset_button.on_click(on_button_clicked)
display(output)

HBox(children=(Label(value='Select your data type:'), Dropdown(options=('Select', 'Minimum SPARC requirements'…

HBox(children=(Label(value='Specify the path to your dataset folder:'), Text(value='', layout=Layout(width='60…

Button(description='Check dataset', style=ButtonStyle())

Output()