In [2]:
import pandas as pd
import numpy as np
from values import dismissal

In [3]:
#Step 1: Import document, Change Program Start Date to Date of Identification
df = pd.read_csv("BFZ_Aggregator_Report.csv")
df = df.rename(columns={"Program Start Date":"Date of Identification"})
df = df.drop(["Relationship","Family Name",'Name'],axis=1)
df

Unnamed: 0,Case Number,Program Name,Veteran Status (HUD),Date of Identification,Program End Date,Dismissal Reason
0,48450,Arlington Zero: Chronic - Veterans Only,No,1/31/2019 0:00,6/19/2019 0:00,Exit to Permanent Housing
1,7649,Arlington Zero: Chronic - Veterans Only,No,8/27/2015 0:00,10/30/2015 0:00,Exit to Permanent Housing
2,8039,Arlington Zero: Chronic - Veterans Only,No,1/20/2015 0:00,1/22/2015 0:00,Service Completed Successfully
3,48894,Arlington Zero: Single Adults,No,7/1/2019 0:00,7/24/2020 0:00,Exit to Permanent Housing
4,52633,Arlington Zero: Families,No,6/23/2021 0:00,,
...,...,...,...,...,...,...
442,43654,Arlington Zero: Chronic - Veterans Only,No,3/28/2017 0:00,6/12/2017 0:00,Exit to Permanent Housing
443,50467,Arlington Zero: Single Adults,No,7/1/2019 0:00,2/26/2020 0:00,Exit to Permanent Housing
444,51829,Arlington Zero: Chronic - Veterans Only,Yes,3/17/2020 0:00,11/19/2020 0:00,Exit to Permanent Housing
445,10194,Arlington Zero: Chronic - Veterans Only,No,11/2/2015 0:00,8/8/2016 0:00,Exit to Permanent Housing


In [4]:
#Step 2: Adding Household Type Column
household_type = {"Arlington Zero: Chronic - Veterans Only":"Single Adults",
"Arlington Zero: Single Adults":"Single Adults",
"Arlington Zero: Families":"Families",
"Arlington Zero: TAY":"Youth"}
df['Household Type'] = df['Program Name'].map(household_type)
df

Unnamed: 0,Case Number,Program Name,Veteran Status (HUD),Date of Identification,Program End Date,Dismissal Reason,Household Type
0,48450,Arlington Zero: Chronic - Veterans Only,No,1/31/2019 0:00,6/19/2019 0:00,Exit to Permanent Housing,Single Adults
1,7649,Arlington Zero: Chronic - Veterans Only,No,8/27/2015 0:00,10/30/2015 0:00,Exit to Permanent Housing,Single Adults
2,8039,Arlington Zero: Chronic - Veterans Only,No,1/20/2015 0:00,1/22/2015 0:00,Service Completed Successfully,Single Adults
3,48894,Arlington Zero: Single Adults,No,7/1/2019 0:00,7/24/2020 0:00,Exit to Permanent Housing,Single Adults
4,52633,Arlington Zero: Families,No,6/23/2021 0:00,,,Families
...,...,...,...,...,...,...,...
442,43654,Arlington Zero: Chronic - Veterans Only,No,3/28/2017 0:00,6/12/2017 0:00,Exit to Permanent Housing,Single Adults
443,50467,Arlington Zero: Single Adults,No,7/1/2019 0:00,2/26/2020 0:00,Exit to Permanent Housing,Single Adults
444,51829,Arlington Zero: Chronic - Veterans Only,Yes,3/17/2020 0:00,11/19/2020 0:00,Exit to Permanent Housing,Single Adults
445,10194,Arlington Zero: Chronic - Veterans Only,No,11/2/2015 0:00,8/8/2016 0:00,Exit to Permanent Housing,Single Adults


In [5]:
#Step 3: Adding Case Number Counter
# case_counter = df.groupby('Case Number').count()['Program Name']
df['Case Number Counter'] = df['Case Number'].map(df.groupby('Case Number').count()['Program Name'])
df[df['Case Number Counter'] > 1]

Unnamed: 0,Case Number,Program Name,Veteran Status (HUD),Date of Identification,Program End Date,Dismissal Reason,Household Type,Case Number Counter
36,44078,Arlington Zero: Chronic - Veterans Only,No,8/17/2016 0:00,8/17/2016 0:00,Removed from Pool,Single Adults,2
37,44078,Arlington Zero: Chronic - Veterans Only,No,3/22/2016 0:00,5/12/2017 0:00,Exit to Permanent Housing,Single Adults,2
49,10489,Arlington Zero: Chronic - Veterans Only,No,7/10/2020 0:00,,,Single Adults,4
50,10489,Arlington Zero: Single Adults,No,7/1/2019 0:00,6/22/2021 0:00,Removed from Pool,Single Adults,4
51,10489,Arlington Zero: Chronic - Veterans Only,No,7/10/2020 0:00,,,Single Adults,4
...,...,...,...,...,...,...,...,...
433,8563,Arlington Zero: Chronic - Veterans Only,No,11/30/2016 0:00,2/27/2017 0:00,Discharged for Non-Compliance,Single Adults,3
434,8563,Arlington Zero: Chronic - Veterans Only,No,10/15/2015 0:00,3/29/2016 0:00,Removed from Pool,Single Adults,3
435,8563,Arlington Zero: Chronic - Veterans Only,No,9/19/2017 0:00,6/30/2018 0:00,,Single Adults,3
439,40681,Arlington Zero: Families,Data Not Collected,9/4/2021 0:00,,,Families,2


In [6]:
#Step 4: Adding Chronic Column
#If client is in Chronic & Vet and 'No' to Veteran Status, then they are chronic
df['Chronic'] = np.nan
df.loc[(df['Program Name']=="Arlington Zero: Chronic - Veterans Only") & (df['Veteran Status (HUD)']=="No")
,"Chronic"] = "Yes"
df['Chronic'].value_counts()

Yes    233
Name: Chronic, dtype: int64

In [7]:
#Step 5, remap all dismissal reasons
df['Dismissal Reason'] = df['Dismissal Reason'].map(dismissal)
df.head()

Unnamed: 0,Case Number,Program Name,Veteran Status (HUD),Date of Identification,Program End Date,Dismissal Reason,Household Type,Case Number Counter,Chronic
0,48450,Arlington Zero: Chronic - Veterans Only,No,1/31/2019 0:00,6/19/2019 0:00,Housed,Single Adults,1,Yes
1,7649,Arlington Zero: Chronic - Veterans Only,No,8/27/2015 0:00,10/30/2015 0:00,Housed,Single Adults,1,Yes
2,8039,Arlington Zero: Chronic - Veterans Only,No,1/20/2015 0:00,1/22/2015 0:00,Housed,Single Adults,1,Yes
3,48894,Arlington Zero: Single Adults,No,7/1/2019 0:00,7/24/2020 0:00,Housed,Single Adults,1,
4,52633,Arlington Zero: Families,No,6/23/2021 0:00,,,Families,1,


In [10]:
#Step 6, Populate Housing Move-In Date
df['Housing Move-In Date'] = df["Program End Date"][df["Dismissal Reason"]=="Housed"]
df['Housing Move-In Date']

Unnamed: 0,Housing Move-In Date,Program End Date
0,6/19/2019 0:00,6/19/2019 0:00
1,10/30/2015 0:00,10/30/2015 0:00
2,1/22/2015 0:00,1/22/2015 0:00
3,7/24/2020 0:00,7/24/2020 0:00
4,,
...,...,...
442,6/12/2017 0:00,6/12/2017 0:00
443,2/26/2020 0:00,2/26/2020 0:00
444,11/19/2020 0:00,11/19/2020 0:00
445,8/8/2016 0:00,8/8/2016 0:00


In [16]:
#Step 7, Populate Inactive Date
#Do we consider those with program end date and null dismissal reasons as inactive?
df['Inactive Date'] = df['Program End Date'][df["Dismissal Reason"]!="Housed"]
df['Inactive Date']

0                 NaN
1                 NaN
2                 NaN
3                 NaN
4                 NaN
            ...      
442               NaN
443               NaN
444               NaN
445               NaN
446    5/29/2019 0:00
Name: Inactive Date, Length: 447, dtype: object

In [None]:
#Step 8, Calculate Returned to Active Date (Date of Idenfication on second record)