In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
# Load data into a pandas dataframe
df = pd.read_csv("~/Data/all_subspecialty_geocoded_230315.csv")

In [None]:
data =df.loc[((df["timepoint"]==2021)|
            (df["timepoint"]==2022)) &
           (df["insurance_cat2"]!="Other") &
            (df["race_ethnic_cat2"]!= "Other") &
            (df["provider_specialty_cat"] == "Mental Health and Behavioral Health") &
            (df["encounter_status_name"]!= "Canceled")]

In [None]:
#just creating this dataframe to see the number of canceled visits 
data2 =df.loc[((df["timepoint"]==2021)|
            (df["timepoint"]==2022)) &
           (df["insurance_cat2"]!="Other") &
            (df["race_ethnic_cat2"]!= "Other") &
            (df["provider_specialty_cat"] == "Mental Health and Behavioral Health")]

data2["encounter_status_name"].value_counts()

In [None]:
data["primary_financial_class"].value_counts()

In [None]:
data

In [None]:
data["timepoint"].value_counts()

In [None]:
data["month_quarter"].value_counts()

In [None]:
data["month_quarter_ord"] = data["month_quarter"].apply(lambda x: 1 if x == "Jan to Mar 2021" 
                                                          else (
                                                          2 if x== "Apr to Jun 2021" else
                                                          (3 if x== "Jul to Sep 2021" else (
                                                          4 if x== "Oct to Dec 2021" else (
                                                          5 if x== "Jan to Mar 2022" else(
                                                          6 if x == "Apr to Jun 2022" else 0))))))
data["month_quarter_ord"].value_counts()

In [None]:
data["New_Patient"].value_counts()

In [None]:
data["encounter_status_name"].value_counts()

In [None]:
data["noshow"] = data["encounter_status_name"].apply(lambda x: 1 if x == "No Show"
                                                          else 0)
data["noshow"].value_counts()

In [None]:
data["Household Median Income"].describe()

In [None]:
data["Median_Income_Cat"] = data["Household Median Income"].apply(lambda x: 1 if(x<50000)
                                                                                      else (2 if x<=100000
                                                                                            else (3)))
data["Median_Income_Cat"].value_counts()

In [None]:
297279/(297279+970774+1523871)

In [None]:
data["insurance_cat2"].value_counts()

In [None]:
data['insurance_cat2'].mask(data['insurance_cat2'] == 'Medicare Advantage', "Medicare", inplace=True)
data["insurance_cat2"].value_counts()

In [None]:
data["age_at_firstVisit"]=data.groupby("ir_id")["age_at_visit"].transform("first")
data[["ir_id","age_at_visit","age_at_firstVisit"]]

In [None]:
age_cut_labels = ['18-39', '40-64','65+']
age_cut_bins = [0, 40, 65,150]
data['age_firstVisit_cat'] = pd.cut(data['age_at_firstVisit'], bins=age_cut_bins, labels=age_cut_labels)
data['age_firstVisit_cat'].value_counts(dropna=False)

In [None]:
data["Distance"].describe()

In [None]:
distance_cut_labels = [1,2,3,4]
distance_cut_bins = [0,3.152,6.137,12.327,350]
data['distance_cat'] = pd.cut(data['Distance'], bins=distance_cut_bins, labels=distance_cut_labels)
data['distance_cat'].value_counts(dropna=False)

In [None]:
data["Charlson_cat"].value_counts()

In [None]:
#combining 1-2 as one category
data["Charlson_cat"].mask(data['Charlson_cat'] == '2', "1", inplace=True)
data["Charlson_cat"].value_counts()

In [None]:
data["Charlson_cat"] = data["Charlson_cat"].apply(lambda x: 0 if x == "0"
                                                          else (1 if x == "1"
                                                          else(2)))
data["Charlson_cat"].value_counts()

# Basic stats for paper

In [None]:
print(data["ir_id"].nunique())

In [None]:
data.shape

In [None]:
data["is_telehealth"].value_counts()

In [None]:
data["is_telehealth"].value_counts(normalize=True)

In [None]:
#data.to_csv("~/Data/tableone_data_mh_230813.csv")

In [None]:
data.columns

In [None]:
fam = sm.families.Binomial()

In [None]:
ind = sm.cov_struct.Independence()

In [None]:
mod8 = smf.gee("noshow ~ distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+C(insurance_cat2,Treatment('Commercial')) + C(race_ethnic_cat2,Treatment('Non-Hispanic White'))+ C(is_telehealth,Treatment(False))", "ir_id", data, cov_struct=ind, family=fam)


In [None]:
res8 = mod8.fit()

In [None]:
print(res8.summary())

In [None]:
params = res8.params
conf = res8.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
res8.summary()

In [None]:
params = res8.params
conf = res8.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
telehealth_dat = data.loc[(data["is_telehealth"]==True)]
inperson_dat = data.loc[(data["is_telehealth"]==False)]

# telehealth

In [None]:
mod8 = smf.gee("noshow ~ distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+C(insurance_cat2,Treatment('Commercial')) + C(race_ethnic_cat2,Treatment('Non-Hispanic White'))", "ir_id", data = telehealth_dat, cov_struct=ind, family=fam)


In [None]:
res8 = mod8.fit()

In [None]:
print(res8.summary())

In [None]:
res8.summary()

In [None]:
params = res8.params
conf = res8.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

# inperson

In [None]:
mod8 = smf.gee("noshow ~ distance_cat + C(age_firstVisit_cat) + New_Patient + C(Median_Income_Cat)+ C(Charlson_cat)+C(gender)+C(month_quarter)+C(insurance_cat2,Treatment('Commercial')) + C(race_ethnic_cat2,Treatment('Non-Hispanic White'))", "ir_id", data = inperson_dat, cov_struct=ind, family=fam)


In [None]:
res8 = mod8.fit()

In [None]:
print(res8.summary())

In [None]:
params = res8.params
conf = res8.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
res8.summary()