# What

LOINC property is very critical in linking process. Component along with other details can have multiple LOINC ids for different properties.

There are multiple ways to handle unit > properties. 
1. Directly mapping unit > properties
2. Going from unit > UCUM > properties 

In this notebook we explore option 1, using example units provided by LOINC for different tests

In [14]:
import pandas as pd
import numpy as np
from collections import Counter

In [15]:
loinc_master_filename = '../../data/Loinc.csv'

In [16]:
loinc_data = pd.read_csv(loinc_master_filename)
loinc_data = loinc_data[loinc_data.STATUS=='ACTIVE']

  exec(code_obj, self.user_global_ns, self.user_ns)


In [17]:
loinc_data.head()

Unnamed: 0,LOINC_NUM,COMPONENT,PROPERTY,TIME_ASPCT,SYSTEM,SCALE_TYP,METHOD_TYP,CLASS,VersionLastChanged,CHNG_TYPE,...,COMMON_ORDER_RANK,COMMON_SI_TEST_RANK,HL7_ATTACHMENT_STRUCTURE,EXTERNAL_COPYRIGHT_LINK,PanelType,AskAtOrderEntry,AssociatedObservations,VersionFirstReleased,ValidHL7AttachmentRequest,DisplayName
0,10000-8,R wave duration.lead AVR,Time,Pt,Heart,Qn,EKG,EKG.MEAS,2.48,MIN,...,0,0,,,,,,1.0i,,
1,10001-6,R wave duration.lead I,Time,Pt,Heart,Qn,EKG,EKG.MEAS,2.48,MIN,...,0,0,,,,,,1.0i,,
2,10002-4,R wave duration.lead II,Time,Pt,Heart,Qn,EKG,EKG.MEAS,2.48,MIN,...,0,0,,,,,,1.0i,,
3,10003-2,R wave duration.lead III,Time,Pt,Heart,Qn,EKG,EKG.MEAS,2.48,MIN,...,0,0,,,,,,1.0i,,
4,10004-0,R wave duration.lead V1,Time,Pt,Heart,Qn,EKG,EKG.MEAS,2.48,MIN,...,0,0,,,,,,1.0i,,


In [64]:
property_units = []
for ind, row in loinc_data[loinc_data.EXAMPLE_UCUM_UNITS.notna()|loinc_data.EXAMPLE_UNITS.notna()].iterrows():
    units = []
    if str(row.EXAMPLE_UCUM_UNITS) != 'nan':
        units.extend(row.EXAMPLE_UCUM_UNITS.split(';'))
    if str(row.EXAMPLE_UNITS) != 'nan':
        units.extend(row.EXAMPLE_UNITS.split(';'))
    if str(row.EXAMPLE_SI_UCUM_UNITS) != 'nan':
        units.extend(row.EXAMPLE_SI_UCUM_UNITS.split(';'))        
    for u in np.unique(units):
        property_units.append(dict(property=row.PROPERTY, unit=u.lower().strip()))

In [66]:
property_units = pd.DataFrame(property_units)

In [39]:
units = Counter([x.strip() for x in "||".join(property_units[property_units.property=='NCnc'].unit.values).split("||")])

In [40]:
units

Counter({'10*3/ul': 419,
         '10*6/ml': 13,
         '/l': 7,
         '10*9/l': 195,
         '/ml': 29,
         '/ul': 318,
         '10*6/l': 82,
         '/mm3': 9,
         '10*6/ul': 11,
         'wbcs/ul': 1,
         'leu/ul': 1,
         'rbcs/ul': 1,
         'ery/ul': 1,
         'copies/ml': 205,
         '{copies}/ml': 207,
         '10*6 eq/ml': 2,
         '10*6{viral equivalents}/ml': 2,
         'cells/ul': 70,
         '10*3/mm3': 18,
         '10*12/l': 12,
         '10*3': 1,
         'copies/ul': 2,
         '{copies}/ul': 2,
         '10*9/ul': 4,
         '10*3/l': 3,
         'copies': 1,
         '{copies}': 1,
         'copies/l': 2,
         'ctcs/7.5 ml': 3,
         '{ctcs}/(7.5.ml)': 3,
         '#/ml': 15,
         '{#}/ml': 16,
         '{cells}/ul': 44,
         'cfu/ml': 1,
         'cells/mcl': 1})

In [41]:
loinc_data[(loinc_data.PROPERTY=='Ratio')&(loinc_data.EXAMPLE_UNITS=='ug/mmol')].EXAMPLE_UNITS

91262    ug/mmol
Name: EXAMPLE_UNITS, dtype: object

In [23]:
loinc_data[loinc_data.COMPONENT=='Lutropin/Creatinine'].EXAMPLE_UNITS

20874    IU/g creatinine
26624    IU/g creatinine
Name: EXAMPLE_UNITS, dtype: object

In [24]:
loinc_data.STATUS.value_counts()

ACTIVE    87583
Name: STATUS, dtype: int64

# Computing probability of unitt > property

In [67]:
vc1 = property_units.groupby('unit').property.value_counts()
vc2 = property_units.groupby('unit').property.count()

In [68]:
vc1.sort_values()

unit             property
                 Len            1
nmol/mg protein  Ratio          1
g/4 h            MRat           1
nmol/hr/ml       CCnc           1
nmol/h/g{hb}     CCnt           1
                             ... 
ug/ml            MCnc        2000
ng/ml            MCnc        2570
titer            Titr        2769
{titer}          Titr        2771
[arb'u]/ml       ACnc        2829
Name: property, Length: 1411, dtype: int64

In [69]:
vc = vc1/vc2

In [70]:
vc.to_csv('unit_to_prop_prob.csv')

In [62]:
!pwd

/Users/naad/Work/Orbi/code/Lo-INcly/notebooks/exps
