The purpose of this script is to read in and process data from the Urban Ministries of Durham (UMD) shelter.

In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
c = os.getcwd()
c

'/Users/alvin/Documents/GitHub/bios611-projects-fall-2019-alvinthomas/project_3/scripts'

In [3]:
if os.path.exists(c + '/scripts') :
    # Change the current working Directory    
    os.chdir(c + '/scripts')
else:
    print("Can't change the Current Working Directory")   

Can't change the Current Working Directory


In [4]:
os.getcwd()

'/Users/alvin/Documents/GitHub/bios611-projects-fall-2019-alvinthomas/project_3/scripts'

In [5]:
c2 = os.getcwd()

First, read in a dataset that contains demographic information on clients

In [6]:
client = pd.read_csv('../data/CLIENT_191102.tsv', sep='\t')
list(client.columns) 

['EE Provider ID',
 'EE UID',
 'Client Unique ID',
 'Client ID',
 'Client Age at Entry',
 'Client Age at Exit',
 'Client Gender',
 'Client Primary Race',
 'Client Ethnicity',
 'Client Veteran Status']

Drop columns we do not intend to use.

In [7]:
client.drop(['EE Provider ID', 'EE UID'], axis=1)

Unnamed: 0,Client Unique ID,Client ID,Client Age at Entry,Client Age at Exit,Client Gender,Client Primary Race,Client Ethnicity,Client Veteran Status
0,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD)
1,kdaf01071967k400d635,130335,48.0,48.0,Female,Black or African American (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD)
2,smrf06211973s620m640,188933,42.0,42.0,Female,Black or African American (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD)
3,abrm07251958a416b600,168290,57.0,57.0,Male,White (HUD),Hispanic/Latino (HUD),No (HUD)
4,wbom01251964w450b620,123122,51.0,51.0,Male,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD)
...,...,...,...,...,...,...,...,...
5294,jlpm01021947j520l125,370291,68.0,68.0,Male,Black or African American (HUD),Non-Hispanic/Non-Latino (HUD),Yes (HUD)
5295,clom10111953c430l563,175198,61.0,61.0,Male,Black or African American (HUD),Non-Hispanic/Non-Latino (HUD),Yes (HUD)
5296,htnm05151961h560t550,401978,54.0,54.0,Male,White (HUD),Non-Hispanic/Non-Latino (HUD),Yes (HUD)
5297,osrm07241965o632s645,401495,49.0,50.0,Male,Black or African American (HUD),Non-Hispanic/Non-Latino (HUD),Yes (HUD)


Next, read in data about client history.

In [8]:
entry = pd.read_csv('../data/EE_UDES_191102.tsv', sep='\t')
list(entry.columns) 

['EE Provider ID',
 'Entry Exit Provider Program Type Code',
 'EE UID',
 'Client Unique ID',
 'Client ID',
 'Client Location(4378)',
 'Zip Code (of Last Permanent Address, if known)(1932)',
 'Relationship to Head of Household(4374)',
 'Prior Living Situation(43)',
 'Length of Stay in Previous Place(1934)',
 'Did you stay less than 7 nights?(5164)',
 'Did you stay less than 90 days?(5163)',
 'On the night before did you stay on the streets, ES or SH?(5165)',
 'Regardless of where they stayed last night - Number of times the client has been on the streets, in ES, or SH in the past three years including today(5167)',
 'Total number of months homeless on the street, in ES or SH in the past three years(5168)',
 'Housing Status(2703)',
 'Does the client have a disabling condition?(1935)',
 'Covered by Health Insurance(4376)',
 'Domestic violence victim/survivor(341)',
 'If yes for Domestic violence victim/survivor, when experience occurred(1917)',
 'Date of Birth(893)']

In [9]:
entry = entry.filter(items=['Client Unique ID', 'Zip Code (of Last Permanent Address, if known)(1932)', 'Housing Status(2703)', 'Does the client have a disabling condition?(1935)', 'Covered by Health Insurance(4376)', 'Date of Birth(893)'])

Merge client demographics and history.

In [10]:
new_df = client.merge(entry, left_on='Client Unique ID', right_on='Client Unique ID')
new_df.head()

Unnamed: 0,EE Provider ID,EE UID,Client Unique ID,Client ID,Client Age at Entry,Client Age at Exit,Client Gender,Client Primary Race,Client Ethnicity,Client Veteran Status,"Zip Code (of Last Permanent Address, if known)(1932)",Housing Status(2703),Does the client have a disabling condition?(1935),Covered by Health Insurance(4376),Date of Birth(893)
0,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954
1,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954
2,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954
3,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954
4,XXXClosed2015 Urban Ministries of Durham- Durh...,658801,pbkf09291954p610b236,397941,60.0,60.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954


The next dataset contains information on the current visit.

In [11]:
visit = pd.read_csv('../data/ENTRY_EXIT_191102.tsv',sep='\t')
list(visit.columns) 

['EE Provider ID',
 'EE UID',
 'Client Unique ID',
 'Client ID',
 'Entry Exit Group Id',
 'Entry Exit Household Id',
 'Unnamed: 6',
 'Entry Date',
 'Housing Move-in Date(5584)',
 'Exit Date',
 'Destination',
 'Reason for Leaving',
 'Entry Exit Type',
 'Entry Exit Date Added',
 'Entry Exit Date Updated']

In [12]:
visit = visit.drop(['Client ID','EE Provider ID', 'EE UID','Entry Exit Group Id','Entry Exit Household Id', 'Unnamed: 6','Housing Move-in Date(5584)','Destination','Entry Exit Type','Entry Exit Date Added','Entry Exit Date Updated'], axis=1)
list(visit.columns) 

['Client Unique ID', 'Entry Date', 'Exit Date', 'Reason for Leaving']

We will use date times to estimate the length of the visit

In [13]:
visit['Entry'] =  pd.to_datetime(visit['Entry Date'], format='%m/%d/%Y')
visit['Exit'] =  pd.to_datetime(visit['Exit Date'], format='%m/%d/%Y')
visit['Total Nights'] = visit['Exit'] - visit['Entry']  # in days

In [14]:
visit["Total Nights"] = visit["Total Nights"].apply(lambda row: row.days)
visit["Total Nights"]

0       331.0
1        16.0
2        35.0
3       205.0
4         9.0
        ...  
5294     29.0
5295     34.0
5296     18.0
5297     44.0
5298     24.0
Name: Total Nights, Length: 5299, dtype: float64

In [15]:
visit = visit.drop(['Entry Date','Exit Date'], axis=1)

In [16]:
new_df = new_df.merge(visit, left_on='Client Unique ID', right_on='Client Unique ID')
new_df.head()

Unnamed: 0,EE Provider ID,EE UID,Client Unique ID,Client ID,Client Age at Entry,Client Age at Exit,Client Gender,Client Primary Race,Client Ethnicity,Client Veteran Status,"Zip Code (of Last Permanent Address, if known)(1932)",Housing Status(2703),Does the client have a disabling condition?(1935),Covered by Health Insurance(4376),Date of Birth(893),Reason for Leaving,Entry,Exit,Total Nights
0,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954,Completed program,2015-08-15,2016-07-11,331.0
1,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954,Other,2015-04-20,2015-04-26,6.0
2,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954,Completed program,2015-05-04,2015-06-09,36.0
3,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),27701.0,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954,Other,2015-06-09,2015-08-14,66.0
4,Urban Ministries of Durham - Durham County - S...,687901,pbkf09291954p610b236,397941,60.0,61.0,Female,White (HUD),Non-Hispanic/Non-Latino (HUD),No (HUD),,Category 1 - Homeless (HUD),Yes (HUD),Yes (HUD),9/29/1954,Completed program,2015-08-15,2016-07-11,331.0


This is our final product, ready for R.

In [17]:
new_df.to_csv('../data/for_r.csv', encoding='utf-8', index=False)

In [18]:
c3 = c2[:-8]
c3

'/Users/alvin/Documents/GitHub/bios611-projects-fall-2019-alvinthomas/project_3'

In [19]:
os.chdir(c3)