# Register iNat observations in openBIS using pyBIS
Demo notebook to import iNat observation in CSV file into openBIS. The CSV file is output generated by `inat_fetcher.py`.

In [None]:
import pandas as pd
import getpass, math
from datetime import datetime
from pybis import Openbis

### 1. Connect to openBIS

In [None]:
server_url = 'https://openbis-dbgi.labnotebook.ch/' # URL of the openBIS server
user_name = 'hluetcke_admin' # user login for openBIS server

# further openBIS specific info
sample_type = 'OBSERVATION'
space = 'MATERIALS'
collection = '/MATERIALS/SAMPLES/SAMPLES_EXP_4'

In [None]:
o = Openbis(server_url, verify_certificates=False)

In [None]:
if o.is_session_active():
    print(f"Session is active and token is {o.token}")
else:
    password = getpass.getpass()
    o.login(user_name, password, save_token=True)   # save the session token in ~/.pybis/example.com.token
    del password
    print(f"Session is active: {o.is_session_active()} and token is {o.token}")

### 2. Read CSV file

In [None]:
csv_file = '../data/out/test_inat_output_hl.csv'

df = pd.read_csv(csv_file)

properties_csv = df.columns.tolist()

### 3. Register new samples
Todo: option for updating existing samples

In [None]:
samples = o.get_samples(type=sample_type, collection=collection, props='observation_id')

In [None]:
def get_openbis_properties(o, row, properties):
    # get properties for openBIS sample
    properties_ob = {}
    for prop in properties:
        val_ob = row[prop]
        prop_ob = prop
        if prop == 'id':
            prop_ob = 'observation_id'
        elif prop.startswith('observed_on_details') or prop.startswith('created_at_details'):
            continue
        elif prop.startswith('ofvs'):
            if prop == 'ofvs.15466':
                prop_ob = 'emi_external_id'
                prop = 'emi_external_id' 
            else:
                continue
        elif "." in prop:
            prop_ob = prop.replace('.','_')
    
        if is_nan(val_ob): # NaN causes error. This should be fixed in pyBIS.
            val_ob = 0

        # get expected data type for the property in openBIS
        # TODO: this should happen only once, not for every sample!
        ob_dataType = o.get_property_type(prop_ob).dataType
    
        # fix some types and edge cases
        # if prop_ob == 'taxon_default_photo_id' or prop_ob == 'user_site_id' or prop_ob.startswith('taxon_default_photo_original_dimensions') or prop_ob.startswith('community_taxon_id'):
        #     val_ob = int(val_ob)

        if ob_dataType == 'HYPERLINK' and val_ob == 0:
            val_ob = "http://123456"

        if ob_dataType == 'INTEGER':
            val_ob = int(float(val_ob))
    
        # if prop_ob == 'user_icon' and val_ob == 0:
        #     val_ob = "http://123456"
    
        # if prop_ob == 'user_icon_url' and val_ob == 0:
        #     val_ob = "http://123456"

        # if prop_ob == 'taxon_wikipedia_url' and val_ob == 0:
        #     val_ob = "http://123456"
         
        properties_ob[prop_ob] = val_ob

        if prop == 'observed_on':
            properties_ob['time_observed_at'] = val_ob

    return properties_ob

In [None]:
def is_nan(x):
    return (x != x)

In [None]:
break_tf = False
registered_samples = 0
for index, row in df.iterrows():
    # print(row['id'])
    tf = samples.df['OBSERVATION_ID'] == str(row['id'])
    if tf.any():
        print(f"{row['id']} - sample has been registered before")
        # option to update sample metadata
    else:
        print(f"{row['id']} - sample has not been registered before. registering now ...")

        properties_ob = get_openbis_properties(o, row, properties_csv)

        sample = o.new_sample(
            type       = sample_type,
            space      = space,
            experiment = collection,
            props      = properties_ob
        )
        sample.save()
        print('sample has been registered with code ' + sample.code)
        registered_samples = registered_samples + 1

        if registered_samples >= 30:
            break_tf = True
    
    if break_tf:
        break

print(f"Registered {registered_samples} samples.")