In [193]:
import pandas as pd
import altair as alt 
import numpy as np
import datetime as dt
from datetime import timedelta
from vega_datasets import data

alt.data_transformers.enable("vegafusion")
alt.themes.enable("fivethirtyeight")

ThemeRegistry.enable('fivethirtyeight')

In [194]:
!uv add vega_datasets

[2mResolved [1m108 packages[0m [2min 0.71ms[0m[0m
[2mAudited [1m104 packages[0m [2min 0.65ms[0m[0m


In [195]:
# Altair chloropleth map information
# https://altair-viz.github.io/altair-tutorial/notebooks/09-Geographic-plots.html

# Altair color schemes 
# https://vega.github.io/vega/docs/schemes/


In [196]:
survey_months_overdue_df = pd.read_csv('/Users/ghazalin/NH_inspections_static/OutputData/NH_months_since_last_standard_survey.csv', dtype={'zip':str})

print(survey_months_overdue_df.dtypes)

Unnamed: 0               int64
CCN                     object
survey_date             object
survey_type             object
survey_cycle             int64
process_date            object
provider_name           object
State                   object
zip                     object
_merge                  object
date_diff               object
months_since_survey    float64
months_overdue         float64
overdue_ind              int64
dtype: object


In [197]:
print(survey_months_overdue_df.head(5))

   Unnamed: 0     CCN survey_date      survey_type  survey_cycle process_date  \
0      161368  745049  2024-06-21  Health Standard             1   2024-09-01   
1      161363  745040  2023-10-13  Health Standard             1   2024-09-01   
2      161361  745039  2023-06-20  Health Standard             1   2024-09-01   
3      161359  745038  2023-05-19  Health Standard             1   2024-09-01   
4      161349  745022  2023-10-25  Health Standard             1   2024-09-01   

                                       provider_name State    zip _merge  \
0  LAS ALTURAS NURSING & TRANSITIONAL CARE BROWNS...    TX  78521   both   
1                      THE SARAH ROBERTS FRENCH HOME    TX  78201   both   
2                  MIDTOWNE MEADOWS HEALTH AND REHAB    TX  76065   both   
3      TIERRA ESTE NURSING AND REHABILITATION CENTER    TX  79938   both   
4                          WARE MEMORIAL CARE CENTER    TX  79101   both   

  date_diff  months_since_survey  months_overdue  overdu

In [198]:
# import population-weighted zip code centroids 
 
zip_df = pd.read_table('/Users/ghazalin/NH_inspections_static/data/2024_Gaz_zcta_national.txt',
                        delimiter="\t", dtype={'GEOID':'str'})
zip_df.columns = ['ZIP', 'ALAND', 'AWATER', 'ALAND_SQMI', 'AWATER_SQMI', 'ZIP_LAT','ZIP_LON']
zip_df = zip_df[['ZIP','ZIP_LAT','ZIP_LON']]   
zip_df[['ZIP_LAT','ZIP_LON']] = zip_df[['ZIP_LAT','ZIP_LON']].astype('float')
zip_df['ZIP'] = zip_df['ZIP'].astype('str')

In [199]:
print(zip_df.columns)

Index(['ZIP', 'ZIP_LAT', 'ZIP_LON'], dtype='object')


In [200]:
def add_zip_coord(df, zip_df): 
    df['zip'] = df['zip'].astype('str')
    merge_df = pd.merge(df, zip_df, how='left',left_on='zip', right_on='ZIP', indicator='zip_merge')
    print(merge_df['zip_merge'].value_counts())
    
    # failed_merge_df = merge_df.loc[merge_df['zip_merge']!='both']
    # print(failed_merge_df.head(10))
    # add these latitudes and longitudes later 
    
    return merge_df
survey_overdue_zip_df = add_zip_coord(survey_months_overdue_df, zip_df)

zip_merge
both          14729
left_only        88
right_only        0
Name: count, dtype: int64


In [201]:
print(survey_overdue_zip_df.columns)

Index(['Unnamed: 0', 'CCN', 'survey_date', 'survey_type', 'survey_cycle',
       'process_date', 'provider_name', 'State', 'zip', '_merge', 'date_diff',
       'months_since_survey', 'months_overdue', 'overdue_ind', 'ZIP',
       'ZIP_LAT', 'ZIP_LON', 'zip_merge'],
      dtype='object')


In [210]:
def overdue_ind_map(): 
    garnet_olive = ['#948000','#74372F']
    garnet_citrine = ['#E0C200','#74372F']
    states = alt.topo_feature(data.us_10m.url, feature='states')
    
    us_background = alt.Chart(states).mark_geoshape(
        fill='#white',
        stroke='#B89C84'
    ).project('albersUsa').properties(
        width=500,
        height=300
    )
    # survey_overdue_zip_df['overdue_ind'] = np.where(survey_overdue_zip_df['overdue_ind']==1, 'Overdue', 'On Time')
    points = alt.Chart(survey_overdue_zip_df).mark_circle().encode(
        longitude='ZIP_LON:Q',
        latitude='ZIP_LAT:Q',
        size=alt.value(7),
        color=alt.Color('overdue_ind:N', 
                        legend=alt.Legend(title='')
                       ).scale(range=garnet_olive)
    ).properties(title=alt.TitleParams(text='Nursing Home Survey Status: Overdue vs. On-Time', fontSize=15, dx=100))
    
    
    overdue_ind = us_background + points
    overdue_ind = overdue_ind.properties(background='white')

    return overdue_ind
    
display(overdue_ind_map())
overdue_ind_map().save('/Users/ghazalin/NH_inspections_static/images/overdue_dot_map.svg')

In [233]:
def prop_overdue_surveys(df): 
    df['NH_count'] = 1
    df_groupby = df.groupby(['State']).agg({'overdue_ind':'sum','NH_count':'sum'}).reset_index()
    df_groupby['prop_overdue'] = df_groupby['overdue_ind']/df_groupby['NH_count']
    df_groupby = df_groupby.sort_values(by=['prop_overdue'])
    # print(df_groupby)
    return df_groupby
    

In [243]:
ontime_surveys = survey_overdue_zip_df.loc[survey_overdue_zip_df['overdue_ind']==0]
overdue_surveys = survey_overdue_zip_df.loc[(survey_overdue_zip_df['overdue_ind']==1)]
overdue_rate_df = prop_overdue_surveys(survey_overdue_zip_df)
overdue_rate_df = overdue_rate_df.loc[overdue_rate_df['State'].isin(['GU','PR','DC'])==False]


   State  overdue_ind  NH_count  prop_overdue
27    MT            0        59      0.000000
30    NE            0       184      0.000000
29    ND            0        75      0.000000
39    PA            0       668      0.000000
15    IL            1       680      0.001471
16    IN            1       511      0.001957
24    MN            1       343      0.002915
37    OK            1       287      0.003484
19    LA            1       267      0.003745
2     AR            1       220      0.004545
23    MI            2       424      0.004717
41    RI            1        74      0.013514
31    NH            1        74      0.013514
33    NM            1        68      0.014706
34    NV            1        67      0.014925
13    IA            8       404      0.019802
12    HI            1        42      0.023810
52    WY            1        35      0.028571
50    WI           17       326      0.052147
43    SD            6        97      0.061856
49    WA           12       193   

In [264]:
fips_df = pd.read_csv('/Users/ghazalin/NH_inspections_static/data/us-state-ansi-fips.csv')
fips_df[' stusps'] = fips_df[' stusps'].str.strip()
overdue_rate_fips_df = pd.merge(overdue_rate_df,fips_df, how='left', left_on='State', right_on=' stusps', indicator=True
                               ).rename(columns={' st':'id'})
print(overdue_rate_fips_df.head(5))

       stname   st  stusps
0     Alabama    1      AL
1      Alaska    2      AK
2     Arizona    4      AZ
3    Arkansas    5      AR
4  California    6      CA
  State  overdue_ind  NH_count  prop_overdue        stname  id  stusps _merge
0    MT            0        59      0.000000       Montana  30      MT   both
1    NE            0       184      0.000000      Nebraska  31      NE   both
2    ND            0        75      0.000000  North Dakota  38      ND   both
3    PA            0       668      0.000000  Pennsylvania  42      PA   both
4    IL            1       680      0.001471      Illinois  17      IL   both


In [291]:
def overdue_rate_chloropleth_map(): 

    states = alt.topo_feature(data.us_10m.url, 'states')
    

    
    chloropleth = alt.Chart(states).mark_geoshape().encode(
        color=alt.Color('prop_overdue:Q', legend=alt.Legend(title='% Surveys Overdue')).scale(scheme='browns')
    ).transform_lookup(
        lookup='id',
        from_=alt.LookupData(overdue_rate_fips_df, 'id', list(overdue_rate_fips_df.columns))
    ).properties(
        background='white',
        title=alt.TitleParams(text='Overdue Survey Rates by State', fontSize=25, dx=130),
        width=500,
        height=300
    ).project(
        type='albersUsa'
    )
    return chloropleth

overdue_amt_chloropleth_map().save('/Users/ghazalin/NH_inspections_static/images/overdue_rates_chloropleth.svg')
                            

In [292]:
display(overdue_amt_chloropleth_map())