In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
# Load data into a pandas dataframe
df = pd.read_csv("~/Data/all_subspecialty_geocoded_230315.csv")

In [None]:
data =df.loc[((df["timepoint"]==2021)|
            (df["timepoint"]==2022)) &
           (df["insurance_cat2"]!="Other") &
            (df["race_ethnic_cat2"]!= "Other") &
            (df["encounter_status_name"]!= "Canceled")]

In [None]:
data

In [None]:
data["timepoint"].value_counts()

In [None]:
data["month_quarter"].value_counts()

In [None]:
data["month_quarter_ord"] = data["month_quarter"].apply(lambda x: 1 if x == "Jan to Mar 2021" 
                                                          else (
                                                          2 if x== "Apr to Jun 2021" else
                                                          (3 if x== "Jul to Sep 2021" else (
                                                          4 if x== "Oct to Dec 2021" else (
                                                          5 if x== "Jan to Mar 2022" else(
                                                          6 if x == "Apr to Jun 2022" else 0))))))
data["month_quarter_ord"].value_counts()

In [None]:
data["New_Patient"].value_counts()

In [None]:
data["visit_weekDiff_cat"] = data.apply(lambda x: 0 if x["New_Patient"] == 1
                                                          else (
                                                          1 if x["visit_weekDiff"] < 3 else
                                                          (2 if x["visit_weekDiff"]< 12 else (3))),axis=1)
data["visit_weekDiff_cat"].value_counts()

In [None]:
data["noshow"] = data["encounter_status_name"].apply(lambda x: 1 if x == "No Show" 
                                                          else 0)
data["noshow"].value_counts()

In [None]:
data["encounter_status_name"].value_counts()

In [None]:
data["Household Median Income"].describe()

In [None]:
data["Median_Income_Cat"] = data["Household Median Income"].apply(lambda x: 1 if(x<50000)
                                                                                      else (2 if x<=100000
                                                                                            else (3)))
data["Median_Income_Cat"].value_counts()

In [None]:
297279/(297279+970774+1523871)

In [None]:
data["insurance_cat2"].value_counts()

In [None]:
data['insurance_cat2'].mask(data['insurance_cat2'] == 'Medicare Advantage', "Medicare", inplace=True)
data["insurance_cat2"].value_counts()

In [None]:
data["age_at_firstVisit"]=data.groupby("ir_id")["age_at_visit"].transform("first")
data[["ir_id","age_at_visit","age_at_firstVisit"]]

In [None]:
age_cut_labels = ['18-39', '40-64','65+']
age_cut_bins = [0, 40, 65,150]
data['age_firstVisit_cat'] = pd.cut(data['age_at_firstVisit'], bins=age_cut_bins, labels=age_cut_labels)
data['age_firstVisit_cat'].value_counts(dropna=False)

In [None]:
data["Distance"].describe()

In [None]:
distance_cut_labels = [1,2,3,4]
distance_cut_bins = [0,3.152,6.137,12.327,350]
data['distance_cat'] = pd.cut(data['Distance'], bins=distance_cut_bins, labels=distance_cut_labels)
data['distance_cat'].value_counts(dropna=False)

In [None]:
data["Charlson_cat"].value_counts()

In [None]:
#combining 1-2 as one category
data["Charlson_cat"].mask(data['Charlson_cat'] == '2', "1", inplace=True)
data["Charlson_cat"].value_counts()

In [None]:
data["Charlson_cat"] = data["Charlson_cat"].apply(lambda x: 0 if x == "0"
                                                          else (1 if x == "1"
                                                          else(2)))
data["Charlson_cat"].value_counts()

# basic stats for the paper

In [None]:
#determining basic stats for the paper
print(data["ir_id"].nunique())

In [None]:
data.shape

In [None]:
data[data["is_telehealth"]==True].shape

In [None]:
246014/2791924*100

In [None]:
data[data["is_telehealth"]==False].shape

In [None]:
#data.to_csv("~/Data/tableone_data_230813.csv")

In [None]:
data["encounter_status_name"].value_counts()

In [None]:
1888808/2791924*100

In [None]:
818826/2791924*100

In [None]:
84290/2791924*100

In [None]:
data["gender"].value_counts()

In [None]:
data["gender"].value_counts(normalize=True)

In [None]:
data["age_at_firstVisit"].mean()

In [None]:
data["age_at_firstVisit"].std()

In [None]:
data.columns.values

In [None]:
fam = sm.families.Binomial()

In [None]:
ind = sm.cov_struct.Independence()

# mod8

In [None]:
mod8 = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter) + C(insurance_cat2,Treatment('Commercial')):C(is_telehealth) + C(race_ethnic_cat2,Treatment('Non-Hispanic White')):C(is_telehealth)", "ir_id", data, cov_struct=ind, family=fam)


In [None]:
res8 = mod8.fit()

In [None]:
print(res8.summary())

In [None]:
params = res8.params
conf = res8.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
res8.summary()

# mod9

In [None]:
mod9 = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(race_ethnic_cat2,Treatment('Non-Hispanic White')) + C(insurance_cat2,Treatment('Commercial')) + C(is_telehealth,Treatment(False))", "ir_id", data, cov_struct=ind, family=fam)


In [None]:
res9 = mod9.fit()

In [None]:
print(res9.summary())

In [None]:
params = res9.params
conf = res9.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
res9.summary()

In [None]:
data_telehealth = data[data["is_telehealth"]==True]
data_inperson = data[data["is_telehealth"]==False]

# mod9 telehealth

In [None]:
mod9 = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(race_ethnic_cat2,Treatment('Non-Hispanic White')) + C(insurance_cat2,Treatment('Commercial'))", "ir_id", data_telehealth, cov_struct=ind, family=fam)


In [None]:
res9 = mod9.fit()

In [None]:
print(res9.summary())

In [None]:
params = res9.params
conf = res9.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
res9.summary()

# mod9 inperson

In [None]:
mod9 = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(race_ethnic_cat2,Treatment('Non-Hispanic White')) + C(insurance_cat2,Treatment('Commercial'))", "ir_id", data_inperson, cov_struct=ind, family=fam)


In [None]:
res9 = mod9.fit()

In [None]:
print(res9.summary())

In [None]:
params = res9.params
conf = res9.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
res9.summary()

# mod9 + race interactions

In [None]:
mod9_race = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(race_ethnic_cat2,Treatment('Non-Hispanic White')):C(is_telehealth,Treatment(False))", "ir_id", data, cov_struct=ind, family=fam)


In [None]:
res9_race = mod9_race.fit()

In [None]:
print(res9_race.summary())

In [None]:
params = res9_race.params
conf = res9_race.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

# mod9 + insurance interactions

In [None]:
mod9_insurance = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(insurance_cat2,Treatment('Commercial')):C(is_telehealth,Treatment(False))", "ir_id", data, cov_struct=ind, family=fam)


In [None]:
res9_insurance = mod9_insurance.fit()

In [None]:
print(res9_insurance.summary())

In [None]:
params = res9_insurance.params
conf = res9_insurance.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

# mod9 + specialty*new patient interactions

In [None]:
data_telehealth = data[data["is_telehealth"]==True]
data_inperson = data[data["is_telehealth"]==False]

In [None]:
mod9_sp = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(race_ethnic_cat2,Treatment('Non-Hispanic White')) + C(insurance_cat2,Treatment('Commercial'))  + C(provider_specialty_cat,Treatment('Primary Care'))*New_Patient", "ir_id", data_telehealth, cov_struct=ind, family=fam)


In [None]:
res9_sp = mod9_sp.fit()

In [None]:
print(res9_sp.summary())

In [None]:
params = res9_sp.params
conf = res9_sp.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod9_sp = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+ C(race_ethnic_cat2,Treatment('Non-Hispanic White')) + C(insurance_cat2,Treatment('Commercial'))  + C(provider_specialty_cat,Treatment('Primary Care'))*New_Patient", "ir_id", data_inperson, cov_struct=ind, family=fam)


In [None]:
res9_sp = mod9_sp.fit()

In [None]:
print(res9_sp.summary())

In [None]:
params = res9_sp.params
conf = res9_sp.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

# univariate analyses

In [None]:
mod = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care'))", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ distance_cat", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(age_firstVisit_cat)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ New_Patient", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(Median_Income_Cat)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(Charlson_cat)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(provider_specialty_cat,Treatment('Primary Care')) + distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter) + C(insurance_cat2,Treatment('Commercial')):C(is_telehealth) + C(race_ethnic_cat2,Treatment('Non-Hispanic White')):C(is_telehealth)", "ir_id", data, cov_struct=ind, family=fam)


In [None]:
mod = smf.gee("noshow ~ C(gender)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(month_quarter)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(insurance_cat2,Treatment('Commercial')):C(is_telehealth)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
mod = smf.gee("noshow ~ C(race_ethnic_cat2,Treatment('Non-Hispanic White')):C(is_telehealth)", "ir_id", data, cov_struct=ind, family=fam)
res = mod.fit()
res.summary()

In [None]:
params = res.params
conf = res.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)