# This is a demo on how to use the DataLoader to upload kgtk(\*.tsv) and annotated spreadsheets (\*.csv or \*.xlsx) to Datamart

## Current Support
---
`DataLoader` currently supports the following functionalities:
1. `template` ++ `[spreadsheet]+` => `annotated_spreadsheet` => `Datamart` => `None`
2. `annotated_spreadsheet` ++ `[yaml_file]?` => `Datamart` => **UNION**[`t2wml_output`, `exploded_kgtk`, `None`]
3. `exploded_kgtk` => `Datamart` => `None`

## Parameters

Users will be able to inject their parameters through commandline. Currently the following parameters are supported:

In [None]:
# Parameters to be injected

datamart_api_url = 'http://localhost:12543'
# [optional]
dataset_id = None
put_data = False
DEBUG = False
username = None
password = None
TEST_ALL = False


# [params] combining template and data
template_path = None
dataset_path = None
# [optional params]
flag_combine_files = False
save_template_path = None
save_tsv_path = None
save_t2wml_path = None
Verbose = False


# [params] submitting one annotated spreadsheet
annotated_path = None
annotated_dir_path = None
# [optional params]
yamlfile_path = None
wikifier_path = None
extra_edges_path = None
validate = True

# [params] submitting kgtk file
tsv_path = None
tsv_tar_path = None


# [params] erase one dataset
dataset_id_to_erase = None

In [None]:
# Prase authentication
if username and password:
    auth = (username, password)
else:
    auth = None

### Import python modules and utilities

In [None]:
from utils import spreadsheet, utility, upload, erase, template, get_, plot_
%matplotlib inline

### Delete a dataset if needed

In [None]:
if dataset_id_to_erase:
    erase.erase_dataset(datamart_api_url, dataset_id_to_erase)

### Build the annotated sheet, and add it to Datamart

In [None]:
nfiles, nsheets = 0, 0
if template_path and dataset_path:
    if dataset_id is None:
        dataset_id = utility.read_tsv(template_path).iat[0,1]

    nfiles, nsheets = upload.submit_sheet_bulk(datamart_api_url, template_path, 
                                               dataset_path, flag_combine_files, put_data, auth=auth)

### If needed, save the template

In [None]:
if not save_template_path is None:
    template.save_annotation_template(utility.read_tsv(template_path), save_template_path)

### Get the annotated sheet, and add it to Datamart

Returned files will be saved at save_tsv or save_files

In [None]:
if annotated_path:
    if upload.submit_annotated_sheet(datamart_api_url, annotated_path, yamlfile_path, put_data=put_data,
                                        verbose=Verbose, save_tsv=save_tsv_path, save_files=save_t2wml_path,
                                        auth=auth, validate=validate, wikifier_file=wikifier_path,
                                        extra_edges_file=extra_edges_path):
        nsheets += 1
        nfiles += 1

In [None]:
if annotated_dir_path:
    success, ct = upload.submit_annotated_dir(datamart_api_url, annotated_dir_path, yamlfile_path, put_data=put_data,
                                                verbose=Verbose, save_tsv=save_tsv_path, save_files=save_t2wml_path,
                                                auth=auth, validate=validate, wikifier_file=wikifier_path,
                                                extra_edges_file=extra_edges_path)
    nsheets += ct
    nfiles += ct

### Get tsv file, and add it to Datamart

In [None]:
if tsv_path:
    if upload.submit_tsv(datamart_api_url, tsv_path, put_data=put_data, auth=auth):
        nfiles += 1

In [None]:
if tsv_tar_path:
    if upload.submit_tar(datamart_api_url, tsv_tar_path, dataset_id, put_data=put_data, 
                         verbose=Verbose, auth=auth):
        nfiles += 1

### Generate statistics for bug checking

In [None]:
print(f'{nfiles} files processed.')
print(f'{nsheets} sheets uploaded.')

### Check the state after uploading datasets

#### metadata

In [None]:
metadata = get_.metadata(datamart_api_url, auth=auth)
display(metadata)

#### variable metadata, data, trends

In [None]:
from IPython.display import display

In [None]:
if DEBUG and dataset_id is not None:
    var_metadata = get_.variable_metadata(datamart_api_url, dataset_id, auth=auth)
    display(var_metadata)

In [None]:
if DEBUG:
    try:
        for variable_id in var_metadata['variable_id']:
            try:
                data = get_.variable_data(datamart_api_url, dataset_id, variable_id, auth=auth)
                display(data)

                plot_.trend_df(data, variable_id)

                if not TEST_ALL:
                    break
            except Exception as ex:
                print('Error:', ex)

    except Exception as e:
        print('Error:', e)