In [None]:
# Ignore irritating pandas warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
import sweetviz as sv
from redcap import Project
from tableone import TableOne
wd = '/Volumes/PEDS/RI Biostatistics Core/Shared/Shared Projects/Laura/BDC/'\
    'Projects/Laurel Messer/SEEDS/'
api = pd.read_table(wd+'/Data_Raw/api_token.txt')
api_url = 'https://redcap.ucdenver.edu/api/'
project = Project(api_url, api)
df = pd.DataFrame(project.export_records(raw_or_label='label'))
df.replace('', np.nan, inplace=True)


In [None]:
# Data cleaning
# Real records only
df = df[df.real_record == 'Real']
df.loc[df['otherspecialty'] == 'Pediatric Diabetology',
       'specialty'] = 'Pediatric Endocrinology'
df.loc[df['otherspecialty'] == 'Adult Gerontology Primary Care',
       'specialty'] = 'Primary care-family medicine'
# Specialty and provider type
df['specialty'].replace(dict.fromkeys(['Primary care-family medicine', 'Primary care-internal medicine',
                                       'Primary care-pediatrics', 'Primary care-med/peds'], 'Primary Care'), inplace=True)
prescribe_replace = dict.fromkeys(['Physician (MD, DO or equivalent)',
                                  'Advance practice professional (Physician assistant, nurse practitioner)', 'Endocrinology fellow'], 'Prescriber')
prescribe_replace.update(dict.fromkeys(
    ['Diabetes care and education specialist (diabetes educator, nurse, dietitian, other)', 'Medical resident'], 'Non-Prescriber'), inplace=True)
df['providertype'].replace(prescribe_replace, inplace=True)
# Drop 'Other' providers
df = df.loc[df['specialty'] != 'Other', :]


# Table 1: Participant Characteristics


In [None]:
cat_vars = ['gender','race','providertype','specialty','practicesetting','medicare','medicaid','privateins','uninsured']
t1_vars = ['age','practiceyears','cgmpatientratio','pumppatientratio','penpatientratio','hclpatientratio','educationtime']+cat_vars
group = 'specialty'
t1 = TableOne(data=df,columns=t1_vars,categorical=cat_vars, groupby=group,dip_test=True, normal_test=True, tukey_test=True)
t1

# Exploratory data analyis

In [None]:
# All
report=sv.analyze(df)
report.show_html(filepath=wd+'Reports/seeds_eda_all.html')
# By prescriber status
compare=sv.compare_intra(df,df['providertype']=='Prescriber',['Prescriber','Non-Prescriber'])
compare.show_html(filepath=wd+'Reports/seeds_eda_by_prescriber.html')