# Dataload

The following notebook walks through how to load prerequisite data into ESCALATE. This presumes all database tables are completely empty to start.

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
import escalateclient
import importlib
import pandas as pd

In [None]:
importlib.reload(escalateclient)
server_url = 'http://localhost:8000'
# server_url = 'https://escalate.cs.haverford.edu/test_server'
username = 'vshekar'
password = 'copperhead123'
client = escalateclient.ESCALATEClient(server_url, username, password)

# Users and Organizations

In [None]:
#List of statuses to create
statuses = ["inactive", "active", "test", "do_not_use", "prototype"]    

#Create statuses 
for s in statuses:
    status_response = client.get_or_create(endpoint='status', data={'description': s})

#Get active status 
active_status = client.get_or_create(endpoint='status', data={'description': 'active'})

In [None]:
# Create organizations - they can be hierarchical
# First, a university-level organization 
cu_info = { "description": "University of Colorado",
            "full_name": "University of Colorado at Boulder",
            "short_name": "CU",
            "address1": "Boulder, CO 80309",
            "address2": "",
            "city": "Boulder",
            "state_province": "CO",
            "zip": "80309",
            "country": "USA",
            "phone": '123456',
            "website_url": 'www.colorado.edu',
            }

cu_response = client.get_or_create(endpoint='organization', data=cu_info)

# Next, a lab-level organization within the university
neilson_lab_data = { 
    'description': 'Neilson Lab', 
    'address1': 'Boulder, CO 80309', 
    'address2': '', 
    'city': 'Boulder', 
    'state_province': 'CO', 
    'zip': '80309', 
    'country': 'USA', 
    'phone': '123456',
    'full_name': 'Neilson Lab', 
    'short_name': 'NL', 
    'website_url': 'www.colorado.edu', 
    'parent': cu_response[0]['url']} #This associates the lab with the university and establishes a hierarchical relationship

nl_response = client.get_or_create(endpoint='organization', data=neilson_lab_data)

In [None]:
# Create a person entry
# Note: If a user account is created, a person entry is automatically created for that user

neilson_data = {
    "first_name": "James",
    "middle_name": "",
    "last_name": "Neilson",
    "address1": "",
    "address2": "",
    "city": "",
    "state_province": "",
    "zip": "",
    "country": "",
    "phone": "",
    "email": "",
    "title": "",
    "suffix": "",
    "organization": cu_response[0]['url'] #This associates the person with the organization
}

neilson_response = client.get_or_create('person', data=neilson_data)
neilson_actor = client.get('actor', data={'person': neilson_response[0]['url']})

In [8]:
#List of people to add to db (a list of lists, where first element is first name and second element is last name)
person_data = [
    ["Wesley", "Wang"],
    ["Isaac", "Chang"],
    ["Shekar", "V"],
    ["Nicole", "Smina"],
    ["Philip", "Nega"],
    ["Mansoor", "Nellikkal"],
    ["Ian", "Pendleton"],
    ["Minji", "Lee"],
    ["Mike", "Tynes"],
    ["Liana", "Alves"],
    ["Zhi", "Li"],
    ["Gary", "Cattabriga"],
    ["T", "Testuser"],
    ["Matt", "Castillo"],
    ["Joseph", "Kawamura"],
]

for entry in person_data:
    #Add person data to db
    person_response = client.get_or_create(endpoint='person', data={'first_name': entry[0], 'last_name': entry[1]})

    #Create an actor for each person entry
    actor_response = client.get_or_create(endpoint="actor", data={'person': person_response})
    

Wesley Wang
Isaac Chang
Shekar V
Nicole Smina
Philip Nega
Mansoor Nellikkal
Ian Pendleton
Minji Lee
Mike Tynes
Liana Alves
Zhi Li
Gary Cattabriga
T Testuser
Matt Castillo
Joseph Kawamura


In [None]:
#List of organizations (e.g. labs) to add to db (a list of lists, where first element is description, second is full name, and third is abbreviation)
org_data = [
    ["Cheminfomatics software", "ChemAxon", "ChemAxon"],
    ["Laboratory", "Emerald Cloud Lab", "ECL"],
    ["DBMS", "PostgreSQL", "postgres"],
    ["Chemical vendor", "Sigma-Aldrich", "Sigma-Aldrich"],
    ["Chemical vendor", "Greatcell Solar", "Greatcell"],
    ["Test Co", "TestCo", "TC"],
    ["Cheminfomatics software", "RDKit open source software", "RDKit"],
    ["Laboratory", "Norquist Lab", "NL"],
    ["College", "Haverford College", "HC"],
    ["Laboratory", "Lawrence Berkeley National Laboratory", "LBL"],
]

for entry in org_data:
    #Add organization data to db
    org_response = client.get_or_create(endpoint='organization', data={'description': entry[0], 'full_name': entry[1], 'short_name': entry[2]})

    #Create an actor for each organzation entry
    actor_response = client.get_or_create(endpoint="actor", data={'organization': org_response})

# Inventory

In [None]:
# Details for inventory
nl_inventory = {
    "description": "Nielson Lab Inventory",
    "status": active_status[0]['url'], # Indicates active status
    "actor": neilson_actor[0]['url'],
    "owner": neilson_actor[0]['url'], # Indicates James Neilson as owner
    "operator": neilson_actor[0]['url'], # Indicates James Neilson as operator
    "lab": neilson_actor[0]['url'] # Associates inventory with Neilson Lab
}

#Create inventory
nl_inventory_response = client.get_or_create(endpoint='inventory', data=nl_inventory)

# Materials

In [None]:
#List of material identifier definitions to create
material_identifier_defs =  ["SMIRKS", "Chemical_Name", "InChI", "SMILES", "Molecular_Formula", "SMARTS", "RInChI", "InChIKey", "Abbreviation"]

#Create material identifier definitions 
for mid in material_identifier_defs:
    mid_response = client.get_or_create(endpoint='material-identifier-def', data={'description': mid})

#List of material types to create
material_types =  ["solvent","antisolvent", "acid", "organic", "inorganic", "polymer"]

#Create material types 
for mt in material_types:
    mt_response = client.get_or_create(endpoint='material-type', data={'description': mt})

In [None]:
#Material data
chem_list = pd.read_csv('Chemicals List.csv')
chem_list = chem_list.fillna('')
chem_list['Material type'] = chem_list['Material type'].str.replace('Gas', 'flux')

for i, row in chem_list.iterrows():
    chemical_name = row['Chemical Name']
    material_types = row['Material type'].lower().split(',')
    mt_responses = []
    for mt in material_types:
        mt_responses.append(client.get_or_create('material-type', data={'description': mt})[0]) # Get material type from a separate database table
    # Add material to db
    material_data = {'description': chemical_name, 'material_type': [mtr['url'] for mtr in mt_responses], 'material_class':'model'}
    material_response = client.get_or_create('material', data=material_data)
    # Add material to inventory
    if row['Inventory Name']:
        description = f"{chemical_name} {row['Inventory Name']}"
    else:
        description = chemical_name
    im_data = {
        "description": description,
        "part_no": f"{row['CAS Num']}",
        "phase": row['Phase'].lower(),
        "inventory": nl_inventory_response[0]['url'], #Associates inventory material with Neilson lab inventory
        "material": material_response[0]['url'] #Associates inventory material with material
    }
    im_response = client.get_or_create('inventory-material', data=im_data)

# Vessels

In [None]:
#Vessel data
vessels_list = pd.read_csv('load_opentrons_vessels.csv')
vessels_list = vessels_list.fillna('')

for i, row in vessels_list.iterrows():
    vessel_name = row['description']
    vol = row["total_volume"]
    #Enter volume as a value field
    if len(vol.split()) > 1:
        val = vol.split()[0]
        unit = vol.split()[1]
        total_volume = {"value": val, "unit": unit, "type": "num"}
    else:
        total_volume = None
    
    well_number = row["well_number"]
    column_order = row["column_order"]

    vessel_data = {'description': vessel_name,
                    'total_volume': total_volume,
                    'well_number': well_number,
                    'column_order': column_order,
                    'status': active_status[0]['url'],
                    }
    # Add vessel data to db
    vessel_response = client.get_or_create('vessel', data=vessel_data)

# Experiment-related Definitions (Actions, Parameters, Properties)

### Properties

In [None]:
# Create default values
volume_value = {"value": 0, "unit": "ml", "type": "num"}
zero_ml_data = {"description": "Zero ml", "nominal_value": volume_value, "actual_value": volume_value,}
zero_ml_response = client.get_or_create(endpoint='default-values', data=zero_ml_data)

mass_value = {"value": 0, "unit": "g", "type": "num"}
zero_g_data = {"description": "Zero g", "nominal_value": mass_value, "actual_value": mass_value,}
zero_g_response = client.get_or_create(endpoint='default-values', data=zero_g_data)

concentration_value = {"value": 0, "unit": "M", "type": "num"}
zero_M_data = {"description": "Zero M", "nominal_value": concentration_value, "actual_value": concentration_value,}
zero_M_response = client.get_or_create(endpoint='default-values', data=zero_M_data)

# List of extrinsic properties to create
property_data={"total volume": zero_ml_response,
                "dead volume": zero_ml_response,
                "concentration": zero_M_response,
                "amount": zero_g_response}

# Create extrinsic properties
for key, val in property_data:
    prop_data = {
                "description": key,
                "property_def_class": "extrinsic",
                "short_description": key,
                "default_value": val[0]['url'], #Associates property with default value
            }
    prop_response = client.get_or_create(endpoint='property-template', data=prop_data)

In [None]:
# Create default values
density_value = {"value": 0, "unit": "g/ml", "type": "num"}
zero_gml_data = {"description": "Zero g/ml", "nominal_value": density_value, "actual_value": density_value,}
zero_gml_response = client.get_or_create(endpoint='default-values', data=zero_gml_data)

mw_value = {"value": 0, "unit": "g/mol", "type": "num"}
zero_mw_data = {"description": "Zero g/mol", "nominal_value": mw_value, "actual_value": mw_value,}
zero_mw_response = client.get_or_create(endpoint='default-values', data=zero_mw_data)

# List of intrinsic properties to create
property_data={"density": zero_gml_response,
                "molecular weight": zero_mw_response,
                }

# Create extrinsic properties
for key, val in property_data:
    prop_data = {
                "description": key,
                "property_def_class": "intrinsic",
                "short_description": key,
                "default_value": val[0]['url'], #Associates property with default value
            }
    prop_response = client.get_or_create(endpoint='property-template', data=prop_data)

### Parameters

In [None]:
#Parameter data
params_list = pd.read_csv('parameter_def.csv', sep="\t")
params_list = params_list.fillna('')


for i, row in params_list.iterrows():
    description = row["description"]
    type_ = row["type"]
    value_from_csv = str(row["value"])
    unit = row["unit"]
    required = row["required"]
    unit_type = row["unit_type"]
    params_data = {
        "description": description,
        "default_val": {'value': float(value_from_csv), 'unit': unit, 'type': type_},
        "unit_type": unit_type,
        "required": required,
        "status": active_status[0]['url'],
    }
    params_response = client.get_or_create('parameter-def', data=params_data) 

### Actions

In [None]:
#Action data
action_list = pd.read_csv('action_def.csv', sep="\t")
action_list = action_list.fillna('')


for i, row in action_list.iterrows():
    description = row["description"]
    parameters = row["parameter_def_descriptions"]
    synonym = row["synonym"]
    action_data = {
        "description": description,
        "synonym": synonym,
        "status": active_status[0]['url'],
        "parameter_def": [],

    }
    for x in parameters:
        param_response = client.get_or_create('parameter-def', data={'description': x})
        action_data["parameter_def"].append(param_response)
    action_response = client.get_or_create('action-def', data=action_data) 