In [1]:
import pandas as pd
import numpy as np
import datetime
import altair as alt
import os

In [2]:
data_dir = os.environ["DATA_DIR"]
raw_data = data_dir + "/raw/"
health_file = os.path.join(raw_data, 'healthcare_facility_locations.csv')
health_raw = pd.read_csv(health_file)

In [3]:
health_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14361 entries, 0 to 14360
Data columns (total 59 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LICENSED_CERTIFIED              14361 non-null  object 
 1   FLAG                            733 non-null    object 
 2   T18_19                          2250 non-null   object 
 3   FACID                           14361 non-null  int64  
 4   FAC_STATUS_TYPE_CODE            14361 non-null  object 
 5   ASPEN_FACID                     14152 non-null  object 
 6   CCN                             9692 non-null   object 
 7   TERMINAT_SW                     251 non-null    object 
 8   PARTICIPATION_DATE              9625 non-null   object 
 9   APPROVAL_DATE                   9149 non-null   object 
 10  NPI                             8322 non-null   object 
 11  CAN_BE_DEEMED_FAC_TYPE          12502 non-null  object 
 12  CAN_BE_CERTIFIED_FAC_TYPE       

In [4]:
health_raw.FAC_STATUS_TYPE_CODE.unique()

array(['OPEN', 'CERTIFIED ONLY'], dtype=object)

In [5]:
health_raw.COUNTY_NAME.unique()

array(['SONOMA', 'DEL NORTE', 'HUMBOLDT', 'MENDOCINO', 'NAPA', 'SOLANO',
       'LAKE', 'MARIN', 'ALAMEDA', 'CONTRA COSTA', 'SACRAMENTO',
       'SANTA CLARA', 'PLACER', 'CALAVERAS', 'STANISLAUS', 'TUOLUMNE',
       'EL DORADO', 'AMADOR', 'SAN JOAQUIN', 'YOLO', 'FRESNO', 'MADERA',
       'MERCED', 'KINGS', 'MARIPOSA', 'VENTURA', 'SAN LUIS OBISPO',
       'SANTA BARBARA', 'LOS ANGELES', 'ORANGE', 'SAN BERNARDINO',
       'SAN DIEGO', 'IMPERIAL', 'RIVERSIDE', 'MONTEREY', 'SANTA CRUZ',
       'SAN BENITO', 'SAN MATEO', 'KERN', 'SAN FRANCISCO', 'BUTTE',
       'SHASTA', 'TULARE', 'NEVADA', 'SISKIYOU', 'SUTTER', 'YUBA',
       'LASSEN', 'PLUMAS', 'GLENN', 'MODOC', 'TEHAMA', 'TRINITY',
       'COLUSA', 'SIERRA', 'MONO', 'INYO', 'CURRY'], dtype=object)

Here, we are going to filter our data by 'FAC_STATUS_TYPE_CODE', as we only need the OPEN ones.

In [6]:
health_open = health_raw[health_raw['FAC_STATUS_TYPE_CODE'] == 'OPEN']

In [7]:
health_open.reset_index()

Unnamed: 0,index,LICENSED_CERTIFIED,FLAG,T18_19,FACID,FAC_STATUS_TYPE_CODE,ASPEN_FACID,CCN,TERMINAT_SW,PARTICIPATION_DATE,...,OSHPD_ID,CCLHO_CODE,CCLHO_NAME,FIPS_COUNTY_CODE,BIRTHING_FACILITY_FLAG,TRAUMA_PED_CTR,TRAUMA_CTR,TYPE_OF_CARE,CRITICAL_ACCESS_HOSPITAL,DATA_DATE
0,0,LICENSED AND CERTIFIED,,T18 OR T18/19,10000001,OPEN,CA010000001,555120,,7-Nov-80,...,206492251.0,49,SONOMA,6097,,,,,,15-Mar-22
1,1,LICENSED AND CERTIFIED,,T18 OR T18/19,10000003,OPEN,CA010000003,056090,,1-Feb-74,...,206490940.0,49,SONOMA,6097,,,,,,15-Mar-22
2,2,LICENSED AND CERTIFIED,,T18 OR T18/19,10000004,OPEN,CA010000004,056296,,14-Sep-71,...,206080930.0,8,DEL NORTE,6015,,,,,,15-Mar-22
3,3,LICENSED AND CERTIFIED,,T18 OR T18/19,10000005,OPEN,CA010000005,555703,,18-Apr-97,...,206490931.0,49,SONOMA,6097,,,,,,15-Mar-22
4,4,LICENSED AND CERTIFIED,,T18 OR T18/19,10000024,OPEN,CA010000024,056300,,10-Jul-73,...,206120955.0,12,HUMBOLDT,6023,,,,,,15-Mar-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12788,14356,LICENSED AND CERTIFIED,,,980002541,OPEN,CA980002541,551653,,7-Jun-12,...,406334626.0,33,RIVERSIDE,6065,,,,,,15-Mar-22
12789,14357,LICENSED AND CERTIFIED,,,980002546,OPEN,HH98002546,053164,,6-Mar-18,...,406197019.0,19,LOS ANGELES,6037,,,,,,15-Mar-22
12790,14358,LICENSED AND CERTIFIED,,,980002547,OPEN,HH980002547,058309,,4-Aug-05,...,406196990.0,19,LOS ANGELES,6037,,,,,,15-Mar-22
12791,14359,LICENSED AND CERTIFIED,,,980002548,OPEN,HH980002548,058468,,9-Jun-08,...,406196618.0,19,LOS ANGELES,6037,,,,,,15-Mar-22


In [8]:
health_open.FAC_FDR.unique()

array(['SKILLED NURSING FACILITY',
       'INTERMEDIATE CARE FACILITY-DD/H/N/CN/IID', 'HOME HEALTH AGENCY',
       'CONGREGATE LIVING HEALTH FACILITY', 'HOSPICE FACILITY',
       'GENERAL ACUTE CARE HOSPITAL', 'PRIMARY CARE CLINIC',
       'ACUTE PSYCHIATRIC HOSPITAL', 'ALTERNATIVE BIRTHING CENTER',
       'PSYCHOLOGY CLINIC', 'OTHER', 'HOSPICE', 'SURGICAL CLINIC',
       'REHABILITATION CLINIC', 'ADULT DAY HEALTH CARE',
       'INTERMEDIATE CARE FACILITY',
       'PEDIATRIC DAY HEALTH & RESPITE CARE FACILITY',
       'CHRONIC DIALYSIS CLINIC', 'CHEMICAL DEPENDENCY RECOVERY HOSPITAL',
       'CORRECTIONAL TREATMENT CENTER', 'REFERRAL AGENCY'], dtype=object)

Here, we create a new column per main category: primary_care, nursing, pediatric, mental_health. **We can modify it as we want.**

- primary_care = PRIMARY CARE CLINIC + GENERAL ACUTE CARE HOSPITAL
- nursing = SKILLED NURSING FACILITY + ADULT DAY HEALTH CARE
- pediatric = PEDIATRIC DAY HEALTH & RESPITE CARE FACILITY + ALTERNATIVE BIRTHING CENTER
- mental_health = ACUTE PSYCHIATRIC HOSPITAL + PSYCHOLOGY CLINIC + REHABILITATION CLINIC

In [9]:
health_open['primary_care'] = health_open.FAC_FDR.str.contains('PRIMARY CARE CLINIC|GENERAL ACUTE CARE HOSPITAL', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  health_open['primary_care'] = health_open.FAC_FDR.str.contains('PRIMARY CARE CLINIC|GENERAL ACUTE CARE HOSPITAL', regex=True)


In [10]:
health_open['nursing'] = health_open.FAC_FDR.str.contains('SKILLED NURSING FACILITY|HOSPICE|ADULT DAY HEALTH CARE', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  health_open['nursing'] = health_open.FAC_FDR.str.contains('SKILLED NURSING FACILITY|HOSPICE|ADULT DAY HEALTH CARE', regex=True)


In [11]:
health_open['pediatric'] = health_open.FAC_FDR.str.contains('PEDIATRIC DAY HEALTH & RESPITE CARE FACILITY', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  health_open['pediatric'] = health_open.FAC_FDR.str.contains('PEDIATRIC DAY HEALTH & RESPITE CARE FACILITY', regex=True)


In [12]:
health_open['mental_health'] = health_open.FAC_FDR.str.contains('ACUTE PSYCHIATRIC HOSPITAL|PSYCHOLOGY CLINIC', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  health_open['mental_health'] = health_open.FAC_FDR.str.contains('ACUTE PSYCHIATRIC HOSPITAL|PSYCHOLOGY CLINIC', regex=True)


In [13]:
health_open.LICENSE_STATUS_DESCRIPTION.unique()

array(['ACTIVE', nan, 'INACTIVE - CAPEN DECISION'], dtype=object)

Here, we are filtering again, this time by 'LICENSE_STATUS_DESCRIPTION', as we only need the ACTIVE.

In [14]:
health_open_active = health_open[health_open.LICENSE_STATUS_DESCRIPTION == 'ACTIVE']

In [15]:
health_open_active.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12560 entries, 0 to 14360
Data columns (total 63 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LICENSED_CERTIFIED              12560 non-null  object 
 1   FLAG                            706 non-null    object 
 2   T18_19                          2249 non-null   object 
 3   FACID                           12560 non-null  int64  
 4   FAC_STATUS_TYPE_CODE            12560 non-null  object 
 5   ASPEN_FACID                     12379 non-null  object 
 6   CCN                             8311 non-null   object 
 7   TERMINAT_SW                     140 non-null    object 
 8   PARTICIPATION_DATE              8254 non-null   object 
 9   APPROVAL_DATE                   7840 non-null   object 
 10  NPI                             7209 non-null   object 
 11  CAN_BE_DEEMED_FAC_TYPE          10972 non-null  object 
 12  CAN_BE_CERTIFIED_FAC_TYPE       

In [16]:
health_open_active

Unnamed: 0,LICENSED_CERTIFIED,FLAG,T18_19,FACID,FAC_STATUS_TYPE_CODE,ASPEN_FACID,CCN,TERMINAT_SW,PARTICIPATION_DATE,APPROVAL_DATE,...,BIRTHING_FACILITY_FLAG,TRAUMA_PED_CTR,TRAUMA_CTR,TYPE_OF_CARE,CRITICAL_ACCESS_HOSPITAL,DATA_DATE,primary_care,nursing,pediatric,mental_health
0,LICENSED AND CERTIFIED,,T18 OR T18/19,10000001,OPEN,CA010000001,555120,,7-Nov-80,13-Jul-21,...,,,,,,15-Mar-22,False,True,False,False
1,LICENSED AND CERTIFIED,,T18 OR T18/19,10000003,OPEN,CA010000003,056090,,1-Feb-74,,...,,,,,,15-Mar-22,False,True,False,False
2,LICENSED AND CERTIFIED,,T18 OR T18/19,10000004,OPEN,CA010000004,056296,,14-Sep-71,26-Dec-19,...,,,,,,15-Mar-22,False,True,False,False
3,LICENSED AND CERTIFIED,,T18 OR T18/19,10000005,OPEN,CA010000005,555703,,18-Apr-97,13-Aug-19,...,,,,,,15-Mar-22,False,True,False,False
4,LICENSED AND CERTIFIED,,T18 OR T18/19,10000024,OPEN,CA010000024,056300,,10-Jul-73,13-Aug-21,...,,,,,,15-Mar-22,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14356,LICENSED AND CERTIFIED,,,980002541,OPEN,CA980002541,551653,,7-Jun-12,14-Oct-21,...,,,,,,15-Mar-22,False,True,False,False
14357,LICENSED AND CERTIFIED,,,980002546,OPEN,HH98002546,053164,,6-Mar-18,23-Aug-18,...,,,,,,15-Mar-22,False,False,False,False
14358,LICENSED AND CERTIFIED,,,980002547,OPEN,HH980002547,058309,,4-Aug-05,24-Apr-20,...,,,,,,15-Mar-22,False,False,False,False
14359,LICENSED AND CERTIFIED,,,980002548,OPEN,HH980002548,058468,,9-Jun-08,27-Nov-18,...,,,,,,15-Mar-22,False,False,False,False


In [21]:
mental_health_facilities = health_open_active[health_open_active['mental_health'] == True]

In [22]:
mental_health_facilities

Unnamed: 0,LICENSED_CERTIFIED,FLAG,T18_19,FACID,FAC_STATUS_TYPE_CODE,ASPEN_FACID,CCN,TERMINAT_SW,PARTICIPATION_DATE,APPROVAL_DATE,...,BIRTHING_FACILITY_FLAG,TRAUMA_PED_CTR,TRAUMA_CTR,TYPE_OF_CARE,CRITICAL_ACCESS_HOSPITAL,DATA_DATE,primary_care,nursing,pediatric,mental_health
291,LICENSED AND CERTIFIED,USE PARENT MATCH,,30000301,OPEN,CA030000146,050127,,1-Jul-66,13-Aug-19,...,,,,,,15-Mar-22,False,False,False,True
292,LICENSED AND CERTIFIED,,,30000320,OPEN,CA030000320,054087,,17-Nov-86,30-Apr-18,...,,,,,,15-Mar-22,False,False,False,True
307,LICENSED AND CERTIFIED,,,30000796,OPEN,CA030000796,054096,,28-Jun-88,7-Oct-20,...,,,,,,15-Mar-22,False,False,False,True
310,LICENSED AND CERTIFIED,,,30000822,OPEN,CA030000822,054104,,13-Jan-89,26-Jun-19,...,,,,,,15-Mar-22,False,False,False,True
314,LICENSED AND CERTIFIED,,,30000862,OPEN,CA030000862,054123,,4-Oct-90,16-Nov-16,...,,,,,,15-Mar-22,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13566,LICENSED ONLY,,,960000226,OPEN,CA960000226,,,,,...,,,,,,15-Mar-22,False,False,False,True
13570,LICENSED ONLY,,,960000239,OPEN,CA960000239,,,,,...,,,,,,15-Mar-22,False,False,False,True
13786,LICENSED ONLY,,,960002235,OPEN,CA960002235,,,,,...,,,,,,15-Mar-22,False,False,False,True
13837,LICENSED ONLY,,,960002487,OPEN,CA960002487,,,,,...,,,,,,15-Mar-22,False,False,False,True


In [23]:
mental_health_facilities.to_csv(r'/Users/irenecasado/Desktop/code/healthcare-facility/mental_health_facilities.csv')