In [2]:
import pandas as pd
import numpy as np
from values import dismissal

In [3]:
#Step 1: Import document, Change Program Start Date to Date of Identification
df = pd.read_csv("BFZ_Aggregator_Report.csv")
df = df.rename(columns={"Program Start Date":"Date of Identification",'Case Number':'Client ID','Veteran Status (HUD)':'Veteran Status'})
df = df.drop(["Relationship","Family Name",'Name'],axis=1)
df = df.drop_duplicates()
df = df.reset_index()
df = df.drop(['index'],axis=1)
df

Unnamed: 0,Client ID,Program Name,Veteran Status,Date of Identification,Program End Date,Dismissal Reason
0,48450,Arlington Zero: Chronic - Veterans Only,No,1/31/2019 0:00,6/19/2019 0:00,Exit to Permanent Housing
1,7649,Arlington Zero: Chronic - Veterans Only,No,8/27/2015 0:00,10/30/2015 0:00,Exit to Permanent Housing
2,8039,Arlington Zero: Chronic - Veterans Only,No,1/20/2015 0:00,1/22/2015 0:00,Service Completed Successfully
3,48894,Arlington Zero: Single Adults,No,7/1/2019 0:00,7/24/2020 0:00,Exit to Permanent Housing
4,52633,Arlington Zero: Families,No,6/23/2021 0:00,,
...,...,...,...,...,...,...
431,43654,Arlington Zero: Chronic - Veterans Only,No,3/28/2017 0:00,6/12/2017 0:00,Exit to Permanent Housing
432,50467,Arlington Zero: Single Adults,No,7/1/2019 0:00,2/26/2020 0:00,Exit to Permanent Housing
433,51829,Arlington Zero: Chronic - Veterans Only,Yes,3/17/2020 0:00,11/19/2020 0:00,Exit to Permanent Housing
434,10194,Arlington Zero: Chronic - Veterans Only,No,11/2/2015 0:00,8/8/2016 0:00,Exit to Permanent Housing


In [4]:
#Step 2: Adding Household Type Column
household_type = {"Arlington Zero: Chronic - Veterans Only":"Single Adults",
"Arlington Zero: Single Adults":"Single Adults",
"Arlington Zero: Families":"Families",
"Arlington Zero: TAY":"Youth"}
df['Household Type'] = df['Program Name'].map(household_type)
df

Unnamed: 0,Client ID,Program Name,Veteran Status,Date of Identification,Program End Date,Dismissal Reason,Household Type
0,48450,Arlington Zero: Chronic - Veterans Only,No,1/31/2019 0:00,6/19/2019 0:00,Exit to Permanent Housing,Single Adults
1,7649,Arlington Zero: Chronic - Veterans Only,No,8/27/2015 0:00,10/30/2015 0:00,Exit to Permanent Housing,Single Adults
2,8039,Arlington Zero: Chronic - Veterans Only,No,1/20/2015 0:00,1/22/2015 0:00,Service Completed Successfully,Single Adults
3,48894,Arlington Zero: Single Adults,No,7/1/2019 0:00,7/24/2020 0:00,Exit to Permanent Housing,Single Adults
4,52633,Arlington Zero: Families,No,6/23/2021 0:00,,,Families
...,...,...,...,...,...,...,...
431,43654,Arlington Zero: Chronic - Veterans Only,No,3/28/2017 0:00,6/12/2017 0:00,Exit to Permanent Housing,Single Adults
432,50467,Arlington Zero: Single Adults,No,7/1/2019 0:00,2/26/2020 0:00,Exit to Permanent Housing,Single Adults
433,51829,Arlington Zero: Chronic - Veterans Only,Yes,3/17/2020 0:00,11/19/2020 0:00,Exit to Permanent Housing,Single Adults
434,10194,Arlington Zero: Chronic - Veterans Only,No,11/2/2015 0:00,8/8/2016 0:00,Exit to Permanent Housing,Single Adults


In [50]:
#Step 3: Adding Case Number Counter
# df['Case Number Counter'] = df['Case Number'].map(df.groupby('Case Number').count()['Program Name'])
df['Client ID Counter'] = df['Client ID'].map(df.groupby('Client ID').agg({'Client ID':'count'})['Client ID'])
# df[df['Client ID Counter'] > 1]
counter = {}
for i in df['Client ID']:
    if i not in counter:
        counter[i] = {"Single Adults":0,"Families":0,"Youth":0}
for j in counter:
    for k in df[df['Client ID']==j]['Household Type']:
        counter[j][k] += 1

df['Client ID Household Counter'] = np.nan
for i in df.index:
    df.loc[i,'Client ID Household Counter'] = counter[df.loc[i,'Client ID']][df.loc[i,'Household Type']]
df[df['Client ID']==52736]

Unnamed: 0,Client ID,Program Name,Veteran Status,Date of Identification,Program End Date,Dismissal Reason,Household Type,Client ID Counter,Client ID Household Counter
132,52736,Arlington Zero: Families,No,8/5/2021 0:00,10/28/2021 0:00,Exit to Permanent Housing,Families,2,1.0
133,52736,Arlington Zero: TAY,No,4/7/2021 0:00,,,Youth,2,1.0


In [15]:
#Step 4: Adding Chronic Column
#If client is in Chronic & Vet and 'No' to Veteran Status, then they are chronic
df['Chronic Status'] = np.nan
df.loc[(df['Program Name']=="Arlington Zero: Chronic - Veterans Only") & (df['Veteran Status']=="No")
,"Chronic Status"] = "Yes"
df['Chronic Status'].value_counts()

Yes    225
Name: Chronic Status, dtype: int64

In [16]:
#Step 5, remap all dismissal reasons
df['Dismissal Reason'] = df['Dismissal Reason'].map(dismissal)

In [17]:
#Step 6, Populate Housing Move-In Date
df['Housing Move-In Date'] = df["Program End Date"][df["Dismissal Reason"]=="Housed"]
df['Housing Move-In Date']

0       6/19/2019 0:00
1      10/30/2015 0:00
2       1/22/2015 0:00
3       7/24/2020 0:00
4                  NaN
            ...       
431     6/12/2017 0:00
432     2/26/2020 0:00
433    11/19/2020 0:00
434      8/8/2016 0:00
435                NaN
Name: Housing Move-In Date, Length: 436, dtype: object

In [18]:
#Step 7, Populate Inactive Date
#Do we consider those with program end date and null dismissal reasons as inactive?
df['Inactive Date'] = df['Program End Date'][df["Dismissal Reason"]!="Housed"]
df['Inactive Date']

0                 NaN
1                 NaN
2                 NaN
3                 NaN
4                 NaN
            ...      
431               NaN
432               NaN
433               NaN
434               NaN
435    5/29/2019 0:00
Name: Inactive Date, Length: 436, dtype: object

In [19]:
#Step 8, Calculate 1stDateofID, then calculate Returned to Active Date (Date of Idenfication on second record)
#{FIXED [ClientID],[Household Type]: MIN([Date of Identification]) }
#IF [NumRows_ClientID_HouseholdType]> 1 AND [1stDateofID] <> [Date of Identification] THEN [Date of Identification] ELSE NULL END
#If [Household Type Case Counter]>1 AND [1stDateofID] != [Date of Identification] THEN [Date of Identification] ELSE np.nan
df['1stDateofID'] = df['Client ID'].map(df.groupby('Client ID').agg({'Date of Identification':'min'})['Date of Identification']) #works
df['Return to Active Date'] = np.nan
df.loc[(df['Client ID Counter']>1) & (df['1stDateofID']!=df['Date of Identification'])
,"Return to Active Date"] = df['Date of Identification']
df['Return to Active Date'].value_counts()

7/1/2019 0:00      9
7/1/2018 0:00      3
8/17/2016 0:00     2
8/29/2018 0:00     2
8/9/2016 0:00      2
4/11/2016 0:00     1
5/17/2017 0:00     1
8/7/2015 0:00      1
8/29/2016 0:00     1
9/19/2017 0:00     1
5/25/2017 0:00     1
9/4/2021 0:00      1
9/28/2020 0:00     1
9/26/2018 0:00     1
7/10/2020 0:00     1
8/22/2017 0:00     1
11/30/2016 0:00    1
12/27/2020 0:00    1
3/3/2018 0:00      1
6/2/2015 0:00      1
8/5/2021 0:00      1
4/2/2020 0:00      1
1/8/2015 0:00      1
2/22/2018 0:00     1
2/8/2017 0:00      1
9/28/2016 0:00     1
7/6/2015 0:00      1
8/27/2020 0:00     1
Name: Return to Active Date, dtype: int64

In [37]:
#Step 9, Determine clients that "No longer meets population criteria" by demographic info
#All persons, all singles, veterans, chronic, chronic veteran, youth, families
print("How many clients this month No longer meet population criteria?")
print("All clients ",df['Dismissal Reason'].where(df['Dismissal Reason']=="No longer meets population criteria").count())
print("Singles ",df['Dismissal Reason'].where((df['Dismissal Reason']=="No longer meets population criteria")&(df['Household Type']=='Single Adults')).count())
print("Veterans ",df['Dismissal Reason'].where((df['Dismissal Reason']=="No longer meets population criteria")&(df['Veteran Status']=='Yes')).count())
print("Chronic ",df['Dismissal Reason'].where((df['Dismissal Reason']=="No longer meets population criteria")&(df['Chronic Status']=='Yes')).count())
print("Chronic Veterans ",df['Dismissal Reason'].where((df['Dismissal Reason']=="No longer meets population criteria")&(df['Chronic Status']=='Yes')&(df['Veteran Status']=='Yes')).count())
print("Youth ",df['Dismissal Reason'].where((df['Dismissal Reason']=="No longer meets population criteria")&(df['Household Type']=='Youth')).count())
print("Families ",df['Dismissal Reason'].where((df['Dismissal Reason']=="No longer meets population criteria")&(df['Household Type']=='Families')).count())


How many clients this month No longer meet population criteria?
All clients  34
Singles  34
Veterans  2
Chronic  22
Chronic Veterans  0
Youth  0
Families  0
