In [4]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.linear_model import ElasticNet, ElasticNetCV, LassoCV, Lasso
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler, RobustScaler, scale
import seaborn as sns 

In [54]:
place_abbr = 'houston'

tracts_deserts_path = 'data/tract_desert_measures/houston_desert_tracts.csv'

save_data_dir = 'data/zcta_health/'
save_data_name =  place_abbr+'_zcta_health.csv'
save_transf_data_name =  place_abbr+'_zcta_health_logt.csv'

save_regression_path = 'data/zcta_health/results/'
save_lasso_name = place_abbr+'_zcta_lasso_results'
save_enet_name = place_abbr+'_zcta_enet_results'

# Aggregation/Cleaning

In [55]:
tracts_deserts = pd.read_csv(tracts_deserts_path, dtype={'GEOID':'str'})

In [56]:
zip_to_tract = pd.read_csv("data/zcta_to_tract10.csv", dtype=str)
zip_to_tract = zip_to_tract[['GEOID', 'ZCTA5']]

In [57]:
tracts_zcta_deserts = pd.merge(tracts_deserts, zip_to_tract, on='GEOID').groupby('ZCTA5', as_index=False).median()

In [58]:
zcta_health = pd.read_csv("data/PLACES__ZCTA_Data__GIS_Friendly_Format___2021_release.csv", dtype={'ZCTA5':'str', 'GEOID':'str'})
zcta_health = pd.merge(zcta_health, tracts_zcta_deserts, on='ZCTA5')

income_vars = pd.read_csv('data/ZCTA_median_incomes.csv', dtype=str).rename(columns={'ZCTA':'ZCTA5'})
zcta_health = pd.merge(zcta_health, income_vars, on='ZCTA5')

zcta_health.to_csv(save_data_dir+save_data_name, index=False)

In [59]:
zcta_health['Household median income'].value_counts()

-         5
36943     1
55498     1
59792     1
55333     1
         ..
94957     1
99614     1
164564    1
56927     1
37823     1
Name: Household median income, Length: 228, dtype: int64

In [18]:
desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']

zcta_health_logt = zcta_health.copy()
zcta_health_logt[desert_measures] = zcta_health_logt[desert_measures].apply(lambda x: np.log(x+1))
zcta_health_logt.to_csv(save_transf_data_name, index=False)

# Analysis

In [9]:
name_mapping = {'access2': 'Health insurance access', 'arthritis': 'Arthritis prevalence', 'binge': 'Binge drinking prevalence',
               'bphigh': 'High blood pressure prevalence', 'bpmed': 'Medium blood pressure prevalence', 'cancer': 'Cancer prevalence',
               'casthma': 'Asthma prevalence', 'cervical': 'Cervical cancer screenings', 'chd': 'Coronary heart disease prevalence',
               'checkup': 'Routine checkups', 'cholscreen': 'Cholesterol screenings', 'colon_screen': 'Colon cancer screenings',
               'copd': 'COPD prevalence', 'corem': 'Core men\'s health', 'corew': 'Core women\'s health', 'csmoking': 'Smoking prevalence',
               'dental': 'Dental checkups', 'depression': 'Depression prevalence', 'diabetes': 'Diabetes prevalence', 'ghlth': 'General poor health prevalence',
               'highchol': 'High cholesterol prevalence', 'kidney': 'Chronic kidney disease', 'lpa': 'No physical activity', 'mammouse': 'Mammograms',
               'mhlth': 'Poor mental health prevalence', 'obesity': 'Obesity prevalence', 'phlth': 'Poor physical health', 'sleep': 'Poor sleep prevalence',
               'stroke': 'Stroke prevalence', 'teethlost': 'Teeth loss prevalence'}

results_nan = pd.DataFrame({'Health condition': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'Food': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')],
                       'Physical health': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'Public transport': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')],
                        'Education': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'Houses of worship': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')],
                       'RSquared': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'MSE': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')]})

desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']

In [10]:
zcta_health

Unnamed: 0,ZCTA5,TotalPopulation,ACCESS2_CrudePrev,ACCESS2_Crude95CI,ARTHRITIS_CrudePrev,ARTHRITIS_Crude95CI,BINGE_CrudePrev,BINGE_Crude95CI,BPHIGH_CrudePrev,BPHIGH_Crude95CI,...,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Geolocation,food_closest_travel_times,physical_closest_dist,transport_closest_dist,education_closest_travel_times,worship_closest_travel_times
0,30024,64614,15.2,"(14.4, 16.0)",18.4,"(18.1, 18.7)",18.6,"(18.4, 18.8)",26.8,"(26.5, 27.2)",...,2.0,"( 2.0, 2.1)",7.0,"( 6.2, 7.9)",POINT (-84.09063164 34.06238772),289.4,0.963302,3.946356,273.9,111.7
1,30097,41715,14.7,"(13.8, 15.7)",17.9,"(17.6, 18.3)",16.4,"(16.3, 16.6)",26.9,"(26.5, 27.4)",...,2.1,"( 2.0, 2.2)",6.9,"( 5.9, 8.2)",POINT (-84.14699136 34.02600275),289.4,0.963302,3.946356,273.9,111.7
2,30076,42678,22.4,"(21.0, 23.8)",19.5,"(19.2, 19.9)",18.1,"(17.8, 18.3)",27.0,"(26.6, 27.5)",...,2.5,"( 2.4, 2.6)",8.4,"( 7.0, 10.2)",POINT (-84.315186 34.00958099),289.4,0.963302,3.946356,273.9,111.7
3,30009,13722,14.9,"(13.7, 16.2)",20.4,"(19.9, 21.0)",18.2,"(17.9, 18.5)",26.9,"(26.3, 27.5)",...,2.5,"( 2.3, 2.6)",9.0,"( 6.3, 12.9)",POINT (-84.281277 34.06727006),289.4,0.963302,3.946356,273.9,111.7
4,30092,31704,19.4,"(18.4, 20.4)",17.9,"(17.5, 18.2)",18.8,"(18.6, 19.0)",28.1,"(27.7, 28.5)",...,2.2,"( 2.1, 2.2)",8.0,"( 6.5, 9.9)",POINT (-84.23516941 33.97167427),289.4,0.963302,3.946356,273.9,111.7
5,30005,34442,13.5,"(12.5, 14.6)",17.0,"(16.6, 17.4)",18.6,"(18.4, 18.9)",23.5,"(23.0, 24.0)",...,1.8,"( 1.7, 1.9)",6.2,"( 5.0, 7.7)",POINT (-84.21592752 34.08624059),289.4,0.963302,3.946356,273.9,111.7
6,30004,53033,14.6,"(13.6, 15.8)",18.4,"(18.0, 18.8)",19.2,"(19.0, 19.5)",24.6,"(24.1, 25.1)",...,2.0,"( 1.9, 2.1)",6.6,"( 5.6, 7.8)",POINT (-84.2974445 34.15130862),289.4,0.963302,3.946356,273.9,111.7
7,30075,52573,13.0,"(12.2, 13.8)",21.4,"(21.1, 21.8)",18.3,"(18.1, 18.5)",27.8,"(27.4, 28.2)",...,2.4,"( 2.3, 2.5)",6.5,"( 5.3, 7.9)",POINT (-84.38742064 34.05252514),162.6,1.102478,4.840824,231.6,172.0
8,30022,64359,13.7,"(13.1, 14.4)",18.5,"(18.2, 18.8)",18.4,"(18.2, 18.5)",25.1,"(24.8, 25.5)",...,2.1,"( 2.0, 2.1)",6.4,"( 5.5, 7.3)",POINT (-84.24677105 34.02781659),289.4,0.963302,3.946356,273.9,111.7
9,30344,31776,25.4,"(24.0, 27.2)",22.8,"(22.4, 23.2)",14.9,"(14.7, 15.1)",38.0,"(37.5, 38.5)",...,4.4,"( 4.2, 4.6)",19.0,"(16.3, 22.0)",POINT (-84.4653345 33.68225982),124.4,1.43827,3.464802,122.7,78.9


## Elastic Net

In [11]:
if all_deserts == True:
    desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']
    results = results_all_nan.copy()
    update_method = update_all_vars
else:
    desert_measures = ['food_closest_travel_times', 'physical_closest_dist']
    results = results_nan.copy()
    update_method = update_vars
if log_transf == True:
    df = tracts_health_logt
else:
    df = tracts_health
alphas = []
l1_ratios = []
i=0


for c in df.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        x = df[desert_measures+add_vars].to_numpy(copy=True)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = df[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        data_centered = center_function(y)

        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.25, 
                                                    random_state=42)

        #l1 ratio is from suggested values in ElasticNetCV documentation
        enet_cv = ElasticNetCV(l1_ratio = [.1, .5, .7, .9, .95, .99, 1], 
                                     cv = 10, normalize=True).fit(X_train,y_train)

        alpha = enet_cv.alpha_
        l1 = enet_cv.l1_ratio_
        alphas.append(alpha)
        l1_ratios.append(l1)

        regr = ElasticNet(alpha=alpha, l1_ratio = l1, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))

        predictions = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, predictions)

        if all_deserts==True:
            update_all_vars()
        else:
            update_vars()

        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4})
results_round.sort_values(by='RSquared', ascending=False).reset_index(drop=True)

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Mammograms,0.2173,-0.7886,-0.3198,-0.1547,0.446637,0.5715,0.812013
1,Poor mental health prevalence,1.1942,-2.7121,-2.0779,-0.037,2.359593,0.3319,8.10571
2,Cancer prevalence,0.0,0.5343,0.0,0.0,0.0,0.2233,2.447715
3,Core men's health,-2.3269,6.1669,5.1025,0.0,-6.234011,0.2146,13.681497
4,Smoking prevalence,1.421,-2.9801,-4.1822,0.2299,5.261401,0.1965,6.069259
5,Cholesterol screenings,-0.0,1.1851,0.3764,0.0528,0.0,0.1647,34.858415
6,High cholesterol prevalence,0.2949,0.4594,0.176,0.6535,0.334879,0.1555,31.907988
7,Asthma prevalence,0.4054,-1.4555,-0.9667,0.0476,1.649222,0.1485,0.838952
8,Obesity prevalence,1.4916,-4.9422,-5.1141,0.487,8.271425,0.0542,29.332406
9,Cervical cancer screenings,-0.0,0.3048,0.8881,0.0,0.0,0.0298,73.500884


<Figure size 1440x1080 with 0 Axes>

## LASSO

In [12]:
if all_deserts == True:
    desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']
    results = results_all_nan.copy()
    update_method = update_all_vars
else:
    desert_measures = ['food_closest_travel_times', 'physical_closest_dist']
    results = results_nan.copy()
    update_method = update_vars
if log_transf == True:
    df = tracts_health_logt
else:
    df = tracts_health
alphas = []
l1_ratios = []
i=0


for c in df.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = df[desert_measures+add_vars].to_numpy(copy=True)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = df[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))
        
        predictions = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, predictions)
        
        if all_deserts==True:
            update_all_vars()
        else:
            update_vars()

        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4})
results_round.sort_values(by='RSquared', ascending=False).reset_index(drop=True)

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Asthma prevalence,0.1735,-0.833,-0.9535,0.0,1.255686,0.3451,0.645229
1,Poor mental health prevalence,0.0351,-1.4097,-2.3822,-0.0,2.565178,0.2989,8.505751
2,Dental checkups,-2.1001,3.1627,9.5252,-0.0,-8.993622,0.2807,38.552597
3,Core men's health,-1.723,4.1689,4.2052,-0.1271,-4.145545,0.2629,12.840901
4,Poor sleep prevalence,0.5647,-1.9131,-4.4327,0.5262,5.245035,0.2474,14.151897
5,No physical activity,1.4879,-1.2748,-6.2818,0.0,5.859538,0.2151,19.201334
6,Health insurance access,1.2617,-1.1389,-5.4394,0.0,4.884732,0.1872,12.907359
7,Mammograms,0.0,-0.3955,-0.0,0.0,0.0,0.1702,1.572563
8,Smoking prevalence,0.9648,-1.2215,-4.2895,0.0,4.250716,0.152,6.406008
9,Cancer prevalence,0.082,0.1712,0.1251,0.0237,0.007252,0.123,2.763667


<Figure size 1440x1080 with 0 Axes>

In [13]:
if all_deserts == True:
    desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']
    results = results_all_nan.copy()
    update_method = update_all_vars
else:
    desert_measures = ['food_closest_travel_times', 'physical_closest_dist']
    results = results_nan.copy()
    update_method = update_vars
if log_transf == True:
    df = tracts_health_logt
else:
    df = tracts_health
alphas = []
l1_ratios = []
i=0


for c in df.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = df[desert_measures+add_vars].to_numpy(copy=True)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = df[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(xscale, y.reshape(-1, 1))
        
        predictions = regr.predict(xscale)
        mse_test = mean_squared_error(y, predictions)
        
        if all_deserts==True:
            update_all_vars()
        else:
            update_vars()

        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4})
results_round.sort_values(by='RSquared', ascending=False).reset_index(drop=True)

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Core men's health,-1.194,3.5721,4.8541,-0.1963,-4.68541,0.5715,14.720126
1,Core women's health,-0.9995,2.7312,4.7902,0.5845,-4.65181,0.532,11.249901
2,Teeth loss prevalence,1.0054,-2.4274,-7.1614,0.0817,5.859983,0.4584,23.610178
3,Asthma prevalence,0.031,-0.693,-0.9372,0.0452,1.248946,0.4559,0.868392
4,Dental checkups,-1.6113,2.5298,9.1389,-0.5385,-8.23393,0.3077,52.12779
5,Smoking prevalence,0.7333,-0.9252,-4.2061,0.1543,3.962132,0.2993,8.567985
6,Poor sleep prevalence,0.0,-1.3354,-4.4817,0.7097,5.219873,0.2852,15.95603
7,Health insurance access,1.2836,-0.739,-5.5991,0.0,4.671376,0.2807,15.153113
8,Poor mental health prevalence,0.2133,-1.0951,-2.4997,0.0,2.38689,0.2623,11.185556
9,No physical activity,1.0844,-0.8845,-6.1266,0.3395,5.451597,0.2505,24.30145


<Figure size 1440x1080 with 0 Axes>

In [14]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = np.log(zcta_health[desert_measures].to_numpy(copy=True)+1)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))
        
        y_test_pred = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, y_test_pred)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(X_test, y_test)
        results.iat[i, 7] = mse_test
            
        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4}).sort_values(by='RSquared', ascending=False).reset_index(drop=True)
results_round

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Core men's health,-2.2131,5.5926,4.718,-0.0,-5.248973,0.6026,13.652933
1,Core women's health,-1.2919,4.2951,5.0076,0.5045,-5.656782,0.4962,12.110018
2,Asthma prevalence,0.0872,-1.301,-1.0172,0.1846,1.847794,0.2428,1.208409
3,Obesity prevalence,0.3527,-4.1558,-5.2177,0.8631,8.435008,0.2118,29.415934
4,Arthritis prevalence,0.0,-0.0,-2.0964,0.7079,2.971363,0.2069,21.04223
5,Mammograms,0.0,-0.4922,-0.0,-0.0,0.0,0.1975,1.55394
6,Teeth loss prevalence,0.5731,-4.6344,-8.6742,1.229,8.94985,0.1946,35.109009
7,Smoking prevalence,0.7061,-2.2584,-4.5791,0.6849,5.477235,0.1781,10.050061
8,Poor mental health prevalence,0.6725,-2.2438,-2.4454,0.3212,2.854491,0.1683,12.610833
9,Cancer prevalence,0.0,0.3481,0.0,0.0,0.0,0.1169,2.784196


<Figure size 1440x1080 with 0 Axes>

In [15]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = np.log(zcta_health[desert_measures].to_numpy(copy=True)+1)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(xscale, y.reshape(-1, 1))
        
        y_pred = regr.predict(xscale)
        mse = mean_squared_error(y, y_pred)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(xscale, y)
        results.iat[i, 7] = mse
            
        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4}).sort_values(by='RSquared', ascending=False).reset_index(drop=True)
results_round.to_csv(save_results_name)
results_round

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Core men's health,-1.6829,5.1211,3.4804,0.0,-4.828722,0.6875,13.798901
1,Obesity prevalence,0.0,-2.6067,-5.1042,-0.5588,8.484329,0.6793,13.822789
2,Teeth loss prevalence,1.1708,-4.3609,-6.7575,-0.3165,7.839691,0.6758,17.881753
3,Smoking prevalence,0.4613,-2.1146,-3.5536,-0.1231,5.111602,0.6622,6.702221
4,Asthma prevalence,0.0121,-1.0802,-0.7869,-0.0745,1.597257,0.653,0.795431
5,Core women's health,-0.8601,4.1036,3.533,0.0,-4.79343,0.6529,11.11841
6,Poor sleep prevalence,0.055,-2.6178,-3.8688,-0.5209,6.458747,0.633,10.266124
7,Dental checkups,-2.8314,6.039,6.5712,0.3682,-8.312981,0.6092,39.697172
8,General poor health prevalence,0.5241,-2.8172,-4.8478,-0.0,6.981239,0.6006,18.152815
9,No physical activity,0.7567,-2.7645,-4.8001,-0.3773,6.983092,0.5934,19.576225


<Figure size 1440x1080 with 0 Axes>