In [1]:
import platform
print (platform.python_version())

3.5.3


In [None]:
""" 
Straightforward table converter to convert Excel tables into ontology files. 
See the inline documentation in the notebook.

7-19-18:
1. Start with Chris' i2b2 Hierarchy View
2. Last column can optionally be comments
3. File is "*i2b2 Hierarchy View.xslx"
4. By default process all sheets in a file
5. There will be a "ready for i2b2" folder
"""

In [40]:
# Import and set paths
import glob
import pandas as pd
import numpy as np
import keyring

basepath="/Users/jeffklann/Dropbox (Partners HealthCare)/CONCERN All Team Work/Data Elements/Data Structures/Ready for i2b2 Hierarchy Views/"
outpath="/Users/jeffklann/Dropbox (Partners HealthCare)/CONCERN All Team Work/Data Elements/Data Structures/i2b2-ontologies/"
password = keyring.get_password(service_name='db.concern_phs',username='concern_user') # You need to previously have set it with set_password

In [41]:
# Connect to SQL for persistence
%load_ext sql
connect = "mssql+pymssql://concern_user:%s@phssql2193.partners.org/CONCERN_DEV?charset=utf8" % password
%sql $connect
%sql USE CONCERN_DEV

import sqlalchemy
engine = sqlalchemy.create_engine(connect)

The sql extension is already loaded. To reload it, use:
  %reload_ext sql
 * mssql+pymssql://concern_user:***@phssql2193.partners.org/CONCERN_DEV?charset=utf8
Done.


In [29]:
# (Re)create the target ontology table
sql = """
CREATE TABLE [dbo].[autoprocessed_i2b2ontology]  ( 
    [index]                 int NOT NULL,
	[C_HLEVEL]          	int NOT NULL,
	[C_FULLNAME]        	varchar(4000) NOT NULL,
	[C_NAME]            	varchar(2000) NOT NULL,
	[C_SYNONYM_CD]      	char(1) NOT NULL,
	[C_VISUALATTRIBUTES]	char(3) NOT NULL,
	[C_TOTALNUM]        	int NULL,
	[C_BASECODE]        	varchar(250) NULL,
	[C_METADATAXML]     	varchar(max) NULL,
	[C_FACTTABLECOLUMN] 	varchar(50) NOT NULL,
	[C_TABLENAME]       	varchar(50) NOT NULL,
	[C_COLUMNNAME]      	varchar(50) NOT NULL,
	[C_COLUMNDATATYPE]  	varchar(50) NOT NULL,
	[C_OPERATOR]        	varchar(10) NOT NULL,
	[C_DIMCODE]         	varchar(700) NOT NULL,
	[C_COMMENT]         	varchar(max) NULL,
	[C_TOOLTIP]         	varchar(900) NULL,
	[M_APPLIED_PATH]    	varchar(700) NOT NULL,
	[UPDATE_DATE]       	datetime NULL,
	[DOWNLOAD_DATE]     	datetime NULL,
	[IMPORT_DATE]       	datetime NULL,
	[SOURCESYSTEM_CD]   	varchar(50) NULL,
	[VALUETYPE_CD]      	varchar(50) NULL,
	[M_EXCLUSION_CD]    	varchar(25) NULL,
	[C_PATH]            	varchar(300) NULL,
	[C_SYMBOL]          	varchar(100) NULL 
	)
ON [PRIMARY]
	TEXTIMAGE_ON [PRIMARY]
	WITH (
		DATA_COMPRESSION = NONE
	)
"""
engine.execute("drop table autoprocessed_i2b2ontology")
engine.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x117103080>

In [30]:
""" Input a df with columns (minimally): Name, Code, [Ancestor_Code]*
     Will add additional columns: tooltip, h_level, fullname 
     
     Derived from ontology_gen_flowsheet.py
     """
def OntProcess(rootName, df):
    df['fullname']=''
    df['tooltip']=''
    df['path']=''
    df['h_level']=np.nan
    df['has_children']=0
    df=doNonrecursive(df)
    df['fullname']=df['fullname'].map(lambda x: x.lstrip(':\\')).map(lambda x: x.rstrip(':\\'))
    df['fullname']='\\'+rootName+'\\'+df['fullname'].map(str)+"\\"
    df=df.append({'fullname':'\\'+rootName+'\\','Name':rootName.replace('\\',' '),'Code':'toplevel|'+rootName.replace('\\',' ')},ignore_index=True) # Add root node
    #df['has_children']='Y'
    df['h_level']=df['fullname'].str.count('\\\\')-1
    df['has_children'] = df['h_level']-len(df.columns[1:-5])-2
    #df['has_children'] = df['has_children'].replace({-1:'Y',0:'N'})
    return df

def doNonrecursive(df):
    cols=df.columns[1:-5][::-1] # Go from column 5 before the end (we added a bunch of columns) backward to first column
    print(cols)
    for col in cols:
        # doesn't work - mycol = df[col].to_string(na_rep='')
        mycol = df[col].apply(lambda x: x if isinstance(x, str) else "{:.0f}".format(x)).astype('str').replace('nan','')
        df.fullname = df.fullname.str.cat(mycol,sep='\\',na_rep='')
    return df

""" Input a df with (minimally): Name, Code, [Ancestor_Code]*, fullname, path, h_level
       Outputs an i2b2 ontology compatible df. 
        """
def OntBuild(df):
    odf = pd.DataFrame()
    odf['c_hlevel']=df['h_level']
    odf['c_fullname']=df['fullname']
    odf['c_visualattributes']=df['has_children'].apply(lambda x: 'FAE' if x<0 else 'LAE')
    odf['c_name']=df['Name']
    odf['c_path']=df['path']
    odf['c_basecode']=df['Code'] # Assume here leafs are unique, not dependent on parent code (unlike flowsheets)
    odf['c_symbol']=odf['c_basecode']
    odf['c_synonym_cd']='N'
    odf['c_facttablecolumn']='concept_cd'
    odf['c_tablename']='concept_dimension'
    odf['c_columnname']='concept_path'
    odf['c_columndatatype']='T' #this is not the tval/nval switch - 2/20/18 - df['vtype'].apply(lambda x: 'T' if x==2 else 'N')
    odf['c_totalnum']=''
    odf['c_operator']='LIKE'
    odf['c_dimcode']=df['fullname']
    odf['c_comment']=None
    odf['c_tooltip']=df['fullname'] # Tooltip right now is just the fullname again
    odf['m_applied_path']='@'
    #odf['c_metadataxml']=df[['vtype','Label']].apply(lambda x: mdx.genXML(mdx.mapper(x[0]),x[1]),axis=1)
    return odf

In [36]:
# Main loop to process all files in a directory, export to csv, and upload the concatenated version to a database
dfs = []
for f in glob.iglob(basepath+"*.xlsx"): # the old place, multi-directory - now all in one dir"**/*i2b2 Hierarchy View*.xlsx"):
    dfd = pd.read_excel(f,sheet_name=None)
    if 'Sheet1' in dfd.keys(): df=dfd['Sheet1'].dropna(axis='columns',how='all')
    if len(df.columns)>1:
        print(f)
        shortf = f[f.rfind('/')+1:]
        shortf = shortf[:shortf.find("i2b2")].strip(' ')
        df = df.rename(columns={'Code (concept_CD/inpatient note type CD)':'Code'}) # Hack bc one file has wrong col name
        df = df.drop(['Definition','definition'],axis=1,errors='ignore') # Drop occasional definition columns
        print(df.columns)
        df = OntProcess('CONCERN\\'+shortf,df)
        ndf = OntBuild(df)
        dfs.append(ndf)
        #ndf.to_csv(outpath+shortf+"_autoprocessed.csv")
outdf = pd.concat(dfs)
outdf = outdf.append({'c_hlevel':1,'c_fullname':'\\CONCERN\\','c_name':'CONCERN Root','c_basecode':'.dummy','c_visualattributes':'CAE','c_synonym_cd':'N','c_facttablecolumn':'concept_cd','c_tablename':'concept_dimension','c_columnname':'concept_path','c_columndatatype':'T','c_operator':'LIKE','c_dimcode':'\\CONCERN\\','m_applied_path':'@'},ignore_index=True)
outdf.to_csv(outpath+"autoprocessed_i2b2ontology.csv")
engine.execute("delete from autoprocessed_i2b2ontology") # if we use SQLMagic in the same cell as SQLAlchemy, it seems to hang
outdf.to_sql('autoprocessed_i2b2ontology',con=engine,if_exists='append')

/Users/jeffklann/Dropbox (Partners HealthCare)/CONCERN All Team Work/Data Elements/Data Structures/Ready for i2b2 Hierarchy Views/Notes i2b2 Hierarchy View.xlsx
Index(['Name', 'Code'], dtype='object')
Index(['Code'], dtype='object')
/Users/jeffklann/Dropbox (Partners HealthCare)/CONCERN All Team Work/Data Elements/Data Structures/Ready for i2b2 Hierarchy Views/Outcome Rapid Response and Cardiopulmonary Arrest i2b2 Hierarchy View-5-30-2018.xlsx
Index(['Name', 'Code', 'Parent', 'Grandparent'], dtype='object')
Index(['Grandparent', 'Parent', 'Code'], dtype='object')
/Users/jeffklann/Dropbox (Partners HealthCare)/CONCERN All Team Work/Data Elements/Data Structures/Ready for i2b2 Hierarchy Views/Outcome Readmission i2b2 Hierarchy View.xlsx
Index(['Name', 'Code', 'Parent'], dtype='object')
Index(['Parent', 'Code'], dtype='object')
/Users/jeffklann/Dropbox (Partners HealthCare)/CONCERN All Team Work/Data Elements/Data Structures/Ready for i2b2 Hierarchy Views/Outcome Sepsis i2b2 Hierarchy Vie

In [None]:
# Special hacked code for the weird ADT table file format
dfs = []
dfd=pd.read_excel(basepath+"ADT/ADTEventHierarchy AND LocationHierarchy for Each site i2b2 June 21 2018_update.xlsx",
                  sheet_name=None)
for k,v in dfd.items():
    shortf=k[0:k.find(' ',k.find(' ')+1)].replace(' ','_')
    print(shortf)
    df=v.dropna(axis='columns',how='all')
    df = df.drop(['C_TOOLTIP','c_tooltip'],axis=1,errors='ignore')
    print(df.columns)
    df = OntProcess('CONCERN\\'+shortf,df)
    ndf = OntBuild(df)
    dfs.append(ndf)
    ndf.to_csv(outpath+shortf+"_autoprocessed.csv")
#tname = 'out_'+shortf
#globals()[tname]=ndf
#%sql DROP TABLE $tname
#%sql PERSIST $tname

# End of main code...
------------------------

In [None]:
# Example of persisting table with SQL Magic
testdict={"animal":["dog",'cat'],'size':[30,15]}
zoop = pd.DataFrame(testdict)
tname = 'zoop'
%sql DROP TABLE $tname
%sql PERSIST $tname

In [25]:
%sql SELECT * from autoprocessed_i2b2ontology
#engine.execute("SELECT * FROM autoprocessed_i2b2ontology").fetchall()

 * mssql+pymssql://concern_user:***@phssql2193.partners.org/CONCERN_DEV?charset=utf8
Done.


index,C_HLEVEL,C_FULLNAME,C_NAME,C_SYNONYM_CD,C_VISUALATTRIBUTES,C_TOTALNUM,C_BASECODE,C_METADATAXML,C_FACTTABLECOLUMN,C_TABLENAME,C_COLUMNNAME,C_COLUMNDATATYPE,C_OPERATOR,C_DIMCODE,C_COMMENT,C_TOOLTIP,M_APPLIED_PATH,UPDATE_DATE,DOWNLOAD_DATE,IMPORT_DATE,SOURCESYSTEM_CD,VALUETYPE_CD,M_EXCLUSION_CD,C_PATH,C_SYMBOL
0,3,\CONCERN\Notes\NOTE:28\,Assessment & Plan Note,N,LAE,0.0,NOTE:28,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:28\,,\CONCERN\Notes\NOTE:28\,@,,,,,,,,NOTE:28
1,3,\CONCERN\Notes\NOTE:1000076\,Family Meeting,N,LAE,0.0,NOTE:1000076,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:1000076\,,\CONCERN\Notes\NOTE:1000076\,@,,,,,,,,NOTE:1000076
2,3,\CONCERN\Notes\NOTE:4\,H&P,N,LAE,0.0,NOTE:4,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:4\,,\CONCERN\Notes\NOTE:4\,@,,,,,,,,NOTE:4
3,3,\CONCERN\Notes\NOTE:70\,Nursing Note,N,LAE,0.0,NOTE:70,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:70\,,\CONCERN\Notes\NOTE:70\,@,,,,,,,,NOTE:70
4,3,\CONCERN\Notes\NOTE:1000015\,Nursing Summary,N,LAE,0.0,NOTE:1000015,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:1000015\,,\CONCERN\Notes\NOTE:1000015\,@,,,,,,,,NOTE:1000015
5,3,\CONCERN\Notes\NOTE:1000001\,Plan of Care,N,LAE,0.0,NOTE:1000001,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:1000001\,,\CONCERN\Notes\NOTE:1000001\,@,,,,,,,,NOTE:1000001
6,3,\CONCERN\Notes\NOTE:3\,Procedures,N,LAE,0.0,NOTE:3,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:3\,,\CONCERN\Notes\NOTE:3\,@,,,,,,,,NOTE:3
7,3,\CONCERN\Notes\NOTE:1\,Progress Notes,N,LAE,0.0,NOTE:1,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:1\,,\CONCERN\Notes\NOTE:1\,@,,,,,,,,NOTE:1
8,3,\CONCERN\Notes\NOTE:100004\,Rapid Response Documentation,N,LAE,0.0,NOTE:100004,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:100004\,,\CONCERN\Notes\NOTE:100004\,@,,,,,,,,NOTE:100004
9,3,\CONCERN\Notes\NOTE:1000007\,Significant Event,N,LAE,0.0,NOTE:1000007,,concept_cd,concept_dimension,concept_path,T,LIKE,\CONCERN\Notes\NOTE:1000007\,,\CONCERN\Notes\NOTE:1000007\,@,,,,,,,,NOTE:1000007
