In [1]:
import pandas as pd
import numpy as np



In [2]:
# Data manually collected; addresseses manually split out before this step
df = pd.read_csv('monte-nido-raw.csv')
df.head(2)

Unnamed: 0,address,street,city,state,program_type,program_type_specific,org
0,"1 Saint Andrews Lane, Glen Cove, NY",1 Saint Andrews Lane,Glen Cove,NY,Residential,Adults of all genders,Monte Nido
1,"100 South Broadway, Irvington, NY",100 South Broadway,Irvington,NY,Residential,Adult females,Monte Nido


In [3]:
# create blank columns to fill
df[[
    # treatment types: hospital inpatient, residential, partial hospitalization day treatment, outpatient
    'hi', 'res', 'phdt', 'op',
    # facility types: 
    'cbhc', 'msmh', 'omh', 'ores', 'ph', 'psy', 'rtca', 'rtcc', 'ipsy', 'shp', 'vahc',
    # age groups: child (<12), young adult (18-25), adult (25-64), senior (65+) 
    'chld', 'yad', 'adlt', 'snr',
    # special programs offered - ped is EDs
    'tay', 'se', 'gl', 'vet', 'adm', 'mf', 'cj', 'co', 'hv', 'dv', 'trma', 'tbi', 'alz', 'ped', 'pefp', 'ptsd', 'sed', 'smi'
   ]] = None

df.columns

Index(['address', 'street', 'city', 'state', 'program_type',
       'program_type_specific', 'org', 'hi', 'res', 'phdt', 'op', 'cbhc',
       'msmh', 'omh', 'ores', 'ph', 'psy', 'rtca', 'rtcc', 'ipsy', 'shp',
       'vahc', 'chld', 'yad', 'adlt', 'snr', 'tay', 'se', 'gl', 'vet', 'adm',
       'mf', 'cj', 'co', 'hv', 'dv', 'trma', 'tbi', 'alz', 'ped', 'pefp',
       'ptsd', 'sed', 'smi'],
      dtype='object')

In [4]:
# treatment types
# residential
df['res'] = np.where(df.program_type.str.contains('Residential'), 1, None)
# partial hospitalization / day treatment
df['phdt'] = np.where(df.program_type.str.contains('PHP'), 1, None)
# outpatient
df['op'] = np.where(df.program_type.str.contains('IOP'), 1, None)

# facility types
# msmh - Multi-setting mental health facility (e.g., non-hospital residential plus either outpatient and/or partial hospitalization/day treatment)
df['msmh'] = np.where(df.program_type.str.contains('PHP/IOP and Residential'), 1, None)
# omh - outpatient mental health
df['omh'] = np.where(df.program_type.str.contains('IOP|outpatient'), 1, None)
# ph - outpatient mental health
df['ph'] = np.where(df.program_type.str.contains('PHP'), 1, None)

# age groups
df['chld'] = np.where(df.program_type.str.contains('ages 11-'), 1, None)
df['yad'] = np.where(df.program_type.str.contains('Adult|Women'), 1, None)
df['adlt'] = np.where(df.program_type.str.contains('Adult|Women'), 1, None)
df['snr'] = np.where(df.program_type.str.contains('Adult|Women'), 1, None)

# special services offered - ED treatment for all, set to 1
df['ped'] = 1

In [5]:
# save clean dataset
df.to_csv('monte-nido-clean.csv', index=False)

In [6]:
# keep only if residential facility for concat
df_res = df[df.res == 1]

In [7]:
df_res.to_csv('monte-nido-for-concat.csv', index=False)