In [1]:
%matplotlib inline
from redcap import Project
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
sns.set_context('notebook')

In [3]:
api_url = 'https://redcap.vanderbilt.edu/api/'

hospitalized_key = open("token.txt").read()
hospitalized_proj = Project(api_url, hospitalized_key)
hospitalized_raw = hospitalized_proj.export_records(format='df', 
                            df_kwargs={'index_col': hospitalized_proj.field_names[0]})

hospitalized_raw.to_csv("kawasaki.csv")
metadata = hospitalized_proj.export_metadata()

In [9]:
data = hospitalized_raw.copy()

data['race_new'] = 'Other'
data.loc[data.race==4, 'race_new'] = 'White'
data.loc[data.race==3, 'race_new'] = 'Black'
data.loc[data.race==1, 'race_new'] = 'Asian'
data.loc[data.race==8, 'race_new'] = 'Hispanic'

summary = (data.race_new.value_counts()/data.shape[0]).round(2)

summary['foo'] = 'bar'
summary = summary.drop('foo')
sexcount = data.sex.value_counts()
summary['Male:Female'] = float(sexcount.ix[1])/sexcount.ix[0]

def getage(td):
    tdr = repr(td)
    return int(tdr[tdr.index('\'')+1:tdr.find('days')])/365.25

dob = pd.to_datetime(data.dob)
adm = pd.to_datetime(data.date_admission)
age = adm-dob
age = age[age>np.timedelta64(0)]

medstr = repr(age.median())
summary['Median age'] = np.round(getage(age.median()),1)
summary['Age range'] = np.round(getage(age.min()),2), np.round(getage(age.max()), 2)
summary['Subjects'] = data.shape[0]

In [25]:
summary_df = pd.DataFrame(dict(value=summary, type='Demographic'))
summary_df

Unnamed: 0,type,value
White,Demographic,0.62
Black,Demographic,0.23
Other,Demographic,0.1
Asian,Demographic,0.04
Hispanic,Demographic,0.01
Male:Female,Demographic,1.97619
Median age,Demographic,2.8
Age range,Demographic,"(0.16, 14.06)"
Subjects,Demographic,375


In [11]:
cc = ['clinical_criteria___0','clinical_criteria___1','clinical_criteria___2','clinical_criteria___3','clinical_criteria___4']

clinical = data[cc].mean().round(2)
clinical.index = 'Fever','Rash','Conjunctivitis','Mucositis','Cervical node'

In [13]:
clinical

Fever             0.99
Rash              0.91
Conjunctivitis    0.93
Mucositis         0.90
Cervical node     0.28
dtype: float64

In [26]:
level_incomplete = data.level_incomplete.replace({1:'Meeting AHA lab criteria',
                              2:'Not meeting AHA criteria',
                              3:'Meeting AHA echo criteria',
                              4:'Meeting AHA lab and echo criteria'}).value_counts()
clinical = clinical.append(level_incomplete/data.level_incomplete.notnull().sum())
clinical_df = pd.DataFrame(dict(value=clinical, type='Clinical'))
clinical_df

Unnamed: 0,type,value
Fever,Clinical,0.99
Rash,Clinical,0.91
Conjunctivitis,Clinical,0.93
Mucositis,Clinical,0.9
Cervical node,Clinical,0.28
Not meeting AHA criteria,Clinical,0.5
Meeting AHA lab criteria,Clinical,0.25
Meeting AHA echo criteria,Clinical,0.15
Meeting AHA lab and echo criteria,Clinical,0.1
Not meeting AHA criteria,Clinical,0.5


In [31]:
ab = ['abnormality___1','abnormality___2','abnormality___3','abnormality___4']

abnormality = data[ab].mean().round(3)
abnormality.index = 'Ectasia','Aneurysm','Pericardial effusion','Mitral regurgitation'
abnormality['Aneurysm or ectasia'] = data[ab[:2]].sum(axis=1).astype(bool).mean().round(3)
abnormality_df = pd.DataFrame(dict(value=abnormality, type='Abnormal initial echo'))
abnormality_df

Unnamed: 0,type,value
Ectasia,Abnormal initial echo,0.197
Aneurysm,Abnormal initial echo,0.027
Pericardial effusion,Abnormal initial echo,0.163
Mitral regurgitation,Abnormal initial echo,0.192
Aneurysm or ectasia,Abnormal initial echo,0.203


In [37]:
clinical_course = pd.Series([])
clinical_course['Median days of fever before IVIG treatment'] = data.illness_day_at_rx.median()
clinical_course['Range days of fever before IVIG treatment'] = (data.illness_day_at_rx.min(),
                                                                data.illness_day_at_rx.max())
clinical_course

Median days of fever before IVIG treatment              6
Range days of fever before IVIG treatment     (2.0, 50.0)
dtype: object

In [49]:
ivig_within_10 = ((pd.to_datetime(data.kd_therapy_0) - adm) < np.timedelta64(10, 'D'))
clinical_course['IVIG within 10 days'] = ivig_within_10.dropna().mean()

In [55]:
clinical_course['Readmitted'] = data.readmission_date.notnull().mean().round(2)
clinical_course['KDSS'] = data.kdss.mean().round(2)

In [57]:
clinical_course_df = pd.DataFrame(dict(value=clinical_course, type='Clinical course'))

In [102]:
table = pd.concat([summary_df, clinical_df, abnormality_df, clinical_course_df])
table.index.name = 'variable'

In [105]:
value_series = table.value
value_series.index = [table.type, table.index]

In [107]:
pd.DataFrame(value_series)

Unnamed: 0_level_0,Unnamed: 1_level_0,value
type,variable,Unnamed: 2_level_1
Demographic,White,0.62
Demographic,Black,0.23
Demographic,Other,0.1
Demographic,Asian,0.04
Demographic,Hispanic,0.01
Demographic,Male:Female,1.97619
Demographic,Median age,2.8
Demographic,Age range,"(0.16, 14.06)"
Demographic,Subjects,375
Clinical,Fever,0.99
