# Register iNat observations in openBIS using pyBIS
Demo notebook to import iNat observation in CSV file into openBIS. The CSV file is output generated by `inat_fetcher.py`.

In [1]:
import pandas as pd
import getpass, math
from datetime import datetime
from pybis import Openbis

### 1. Connect to openBIS

In [2]:
server_url = 'https://openbis-dbgi.labnotebook.ch/' # URL of the openBIS server
user_name = 'hluetcke_admin' # user login for openBIS server

# further openBIS specific info
sample_type = 'OBSERVATION'
space = 'MATERIALS'
collection = '/MATERIALS/SAMPLES/SAMPLES_EXP_5'

In [3]:
o = Openbis(server_url, verify_certificates=False)

In [None]:
if o.is_session_active():
    print(f"Session is active and token is {o.token}")
else:
    password = getpass.getpass()
    o.login(user_name, password, save_token=True)   # save the session token in ~/.pybis/example.com.token
    del password
    print(f"Session is active: {o.is_session_active()} and token is {o.token}")

### 2. Read CSV file

In [5]:
csv_file = '../data/out/test_inat_output_hl_short.csv'

df = pd.read_csv(csv_file)

properties_csv = df.columns.tolist()

### 3. Register new samples
Todo: option for updating existing samples

In [6]:
def get_openbis_properties(o, row, properties):
    # get properties for openBIS sample
    properties_ob = {}
    for prop in properties:
        val_ob = row[prop]
        prop_ob = prop
        if prop == 'id':
            prop_ob = 'observation_id'
        elif prop.startswith('observed_on_details') or prop.startswith('created_at_details'):
            continue
        elif prop.startswith('ofvs'):
            if prop == 'ofvs.15466':
                prop_ob = 'emi_external_id'
                prop = 'emi_external_id' 
            else:
                continue
        elif "." in prop:
            prop_ob = prop.replace('.','_')
    
        if is_nan(val_ob): # NaN causes error. This should be fixed in pyBIS.
            val_ob = 0

        # get expected data type for the property in openBIS
        # TODO: this should happen only once, not for every sample!
        ob_dataType = o.get_property_type(prop_ob).dataType

        if ob_dataType == 'HYPERLINK' and val_ob == 0:
            val_ob = None

        if ob_dataType == 'INTEGER':
            val_ob = int(float(val_ob))
         
        properties_ob[prop_ob] = val_ob

        if prop == 'observed_on':
            properties_ob['time_observed_at'] = val_ob

    return properties_ob

In [7]:
def is_nan(x):
    return (x != x)

In [10]:
def check_if_sample_registered(row, samples):
    # check if a sample with observation_id has been registered before
    if samples.df.shape[0] > 0:
        tf = samples.df['OBSERVATION_ID'] == str(row['id'])
        tf = tf.any()
    else:
        tf = False

    return tf

In [14]:
samples = o.get_samples(type=sample_type, collection=collection, props='observation_id')
break_tf = False
registered_samples = 0
updated_samples = 0
for index, row in df.iterrows():

    tf = check_if_sample_registered(row, samples)
    
    if tf:
        print(f"{row['id']} - sample has been registered before")
        # option to update sample metadata
        break
        updated_samples = updated_samples + 1
    else:
        print(f"{row['id']} - sample has not been registered before. registering now ...")

        properties_ob = get_openbis_properties(o, row, properties_csv)

        sample = o.new_sample(
            type       = sample_type,
            space      = space,
            experiment = collection,
            props      = properties_ob
        )
        sample.save()
        print('sample has been registered with code ' + sample.code)
        registered_samples = registered_samples + 1

        # if registered_samples >= 20:
        #     break_tf = True
    
    if break_tf:
        break

print(f"Registered {registered_samples} samples.")
print(f"Updated {updated_samples} samples.")

109442071 - sample has been registered before
Registered 0 samples.
Updated 0 samples.


In [15]:
samples.df

Unnamed: 0,permId,identifier,registrationDate,modificationDate,type,registrator,modifier,OBSERVATION_ID
0,20231130144435315-1260,/MATERIALS/SAMPLES/O59,2023-11-30 14:44:35,2023-11-30 14:44:35,OBSERVATION,hluetcke_admin,hluetcke_admin,109442071
1,20231130144436455-1261,/MATERIALS/SAMPLES/O60,2023-11-30 14:44:36,2023-11-30 14:44:36,OBSERVATION,hluetcke_admin,hluetcke_admin,110560140
2,20231130144436810-1262,/MATERIALS/SAMPLES/O61,2023-11-30 14:44:37,2023-11-30 14:44:37,OBSERVATION,hluetcke_admin,hluetcke_admin,110561024
3,20231130144437270-1263,/MATERIALS/SAMPLES/O62,2023-11-30 14:44:37,2023-11-30 14:44:37,OBSERVATION,hluetcke_admin,hluetcke_admin,112079178
4,20231130144438265-1264,/MATERIALS/SAMPLES/O63,2023-11-30 14:44:38,2023-11-30 14:44:38,OBSERVATION,hluetcke_admin,hluetcke_admin,112079181
5,20231130144438777-1265,/MATERIALS/SAMPLES/O64,2023-11-30 14:44:39,2023-11-30 14:44:39,OBSERVATION,hluetcke_admin,hluetcke_admin,112079182
6,20231130144439211-1266,/MATERIALS/SAMPLES/O65,2023-11-30 14:44:39,2023-11-30 14:44:39,OBSERVATION,hluetcke_admin,hluetcke_admin,117770383
7,20231130144439633-1267,/MATERIALS/SAMPLES/O66,2023-11-30 14:44:40,2023-11-30 14:44:40,OBSERVATION,hluetcke_admin,hluetcke_admin,117770414
8,20231130144440124-1268,/MATERIALS/SAMPLES/O67,2023-11-30 14:44:40,2023-11-30 14:44:40,OBSERVATION,hluetcke_admin,hluetcke_admin,117770416
9,20231130144440506-1269,/MATERIALS/SAMPLES/O68,2023-11-30 14:44:41,2023-11-30 14:44:41,OBSERVATION,hluetcke_admin,hluetcke_admin,117770434


In [26]:
type(row['id'])

int

In [34]:
# get openBIS permId to retrieve the sample
permId = samples.df.loc[samples.df['OBSERVATION_ID'] == str(row['id']), 'permId'].iloc[0]
sample_ob = o.get_sample(permId)

In [36]:
sample_ob.props

property,value,description,type,mandatory
observation_id,109442071,inaturalist observation ID,INTEGER,False
quality_grade,casual,quality grade,VARCHAR,False
time_observed_at,2022-03-25 10:33:03 +0100,time of observation,TIMESTAMP,False
taxon_geoprivacy,0,taxon geoprivacy,VARCHAR,False
annotations,[],annotations,VARCHAR,False
uuid,71517a11-d415-4ea3-9ff6-1cc54a839811,uuid,MULTILINE_VARCHAR,False
cached_votes_total,0,cached votes total,INTEGER,False
identifications_most_agree,True,identifications most agree,BOOLEAN,False
species_guess,Euphorbia resinifera,name of species guess,VARCHAR,False
identifications_most_disagree,False,identifications most disagree,BOOLEAN,False
