In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Z4A_Report.csv",parse_dates=['Program Start Date','Program End Date'])
df = df.rename(columns={"Program Start Date":"Date of Identification",'Case Number':'Client ID','Veteran Status (HUD)':'Veteran Status',"CH":"Chronic Status"})
df = df.drop(['Name','Gender (HUD)','Race and Ethnicity (HUD)','Family Enterprise Identifier','DOB'],axis=1)
df = df.drop_duplicates()
df = df.reset_index(drop=True)
df

Unnamed: 0,Client ID,Family Name,Relationship,Site Name,Program Name,Date of Identification,Program End Date,Dismissal Reason,Age,Veteran Status,Chronic Status
0,5556,Bowman Family 508,Self/Head of Household,PathForward,HSC - Shelter,2022-02-15,2022-02-21,Declined shelter placement/Remained homeless,54,No,Not CH
1,5556,Roderic Welcher Family,Self/Head of Household,PathForward,HSC - Shelter,2022-02-15,2022-02-21,Declined shelter placement/Remained homeless,54,No,Not CH
2,10479,Taty Fikre Family,Self/Head of Household,Bridges to Independence,Sullivan House,2023-05-02,2023-08-02,Criminal Activity/Destruction of Property/Viol...,43,No,Not CH
3,10480,Taty Fikre Family,Child,Bridges to Independence,Sullivan House,2023-05-02,2023-08-02,Criminal Activity/Destruction of Property/Viol...,16,No,Not CH
4,3734,Mauricio Prunty Family,Self/Head of Household,DHS,DHS-Treatment on Wheels (TOW),2021-04-12,2022-02-24,Exit to Permanent Housing,27,No,Not CH
...,...,...,...,...,...,...,...,...,...,...,...
1093,56759,TSION FILFILE Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-23,NaT,,36,No,Not CH
1094,42223,Horace Grant Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-21,NaT,,55,No,Not CH
1095,56776,Wayne Tracy Hicks-Miles Family,Self/Head of Household,PathForward,HSC - Shelter,2024-01-18,NaT,,21,No,Not CH
1096,56780,Robert Pace Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2023-07-31,NaT,,72,No,Not CH


In [3]:
#Chronic Column Fixing
df['Chronic Status'] = df['Chronic Status'].map({'CH':"Yes",'Not CH':np.nan})
df['Chronic Status'].value_counts()

Yes    57
Name: Chronic Status, dtype: int64

In [4]:
# #Chronic Column Addition
# chronic = pd.read_csv("July_Chronic.csv",usecols=['Case Number','CH/Not CH'])
# chronic = chronic.rename(columns={"Case Number":'Client ID'})
# chronic = chronic[chronic['CH/Not CH']=='CH']
# chronic['CH/Not CH'] = "Yes"
# chronic = chronic.drop_duplicates()

# df['Chronic Status'] = df['Client ID'].map(dict(zip(chronic['Client ID'],chronic['CH/Not CH'])))
# df['Chronic Status'].value_counts()

In [5]:
#Step 2: Determining Household Type
for i in df.index:
    df.loc[i,'Household Size'] = len(df[df['Family Name']==df.loc[i,'Family Name']].drop_duplicates(subset='Client ID'))
    df.loc[i,'Oldest Household Member'] = df[df['Family Name']==df.loc[i,'Family Name']].agg({'Age':'max'})['Age']
df

Unnamed: 0,Client ID,Family Name,Relationship,Site Name,Program Name,Date of Identification,Program End Date,Dismissal Reason,Age,Veteran Status,Chronic Status,Household Size,Oldest Household Member
0,5556,Bowman Family 508,Self/Head of Household,PathForward,HSC - Shelter,2022-02-15,2022-02-21,Declined shelter placement/Remained homeless,54,No,,1.0,54.0
1,5556,Roderic Welcher Family,Self/Head of Household,PathForward,HSC - Shelter,2022-02-15,2022-02-21,Declined shelter placement/Remained homeless,54,No,,1.0,54.0
2,10479,Taty Fikre Family,Self/Head of Household,Bridges to Independence,Sullivan House,2023-05-02,2023-08-02,Criminal Activity/Destruction of Property/Viol...,43,No,,2.0,43.0
3,10480,Taty Fikre Family,Child,Bridges to Independence,Sullivan House,2023-05-02,2023-08-02,Criminal Activity/Destruction of Property/Viol...,16,No,,2.0,43.0
4,3734,Mauricio Prunty Family,Self/Head of Household,DHS,DHS-Treatment on Wheels (TOW),2021-04-12,2022-02-24,Exit to Permanent Housing,27,No,,1.0,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1093,56759,TSION FILFILE Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-23,NaT,,36,No,,1.0,36.0
1094,42223,Horace Grant Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-21,NaT,,55,No,,1.0,55.0
1095,56776,Wayne Tracy Hicks-Miles Family,Self/Head of Household,PathForward,HSC - Shelter,2024-01-18,NaT,,21,No,,1.0,21.0
1096,56780,Robert Pace Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2023-07-31,NaT,,72,No,,1.0,72.0


In [6]:
df['Household Type'] = 'Youth'
for i in df.index:
    if df.loc[i,'Oldest Household Member'] > 24:
        if df.loc[i,'Household Size'] > 1:
            df.loc[i,'Household Type'] = 'Family'
        elif df.loc[i,'Household Size'] == 1:
            df.loc[i,'Household Type'] = 'Single Adults'
df[df['Household Type']=='Youth']['Household Size'].value_counts()

1.0    34
2.0    15
3.0    10
5.0     5
0.0     4
Name: Household Size, dtype: int64

In [7]:
df[df['Client ID']==54322]

Unnamed: 0,Client ID,Family Name,Relationship,Site Name,Program Name,Date of Identification,Program End Date,Dismissal Reason,Age,Veteran Status,Chronic Status,Household Size,Oldest Household Member,Household Type
518,54322,Jose Transito Mendiola Reyna Family,Self/Head of Household,PathForward,HSC - Shelter,2022-04-13,2022-09-19,Discharged for Non-Compliance,67,No,,1.0,67.0,Single Adults
547,54322,,,DHS,DHS-Treatment on Wheels (TOW),2022-09-06,2022-12-01,Client not engaged within the past 30 days,67,No,,0.0,,Youth


In [8]:
#Step 3: Adding Client ID Counter and Client ID Household Counter
#Client ID Counter
df['Client ID Counter'] = df['Client ID'].map(df.groupby('Client ID').agg({'Client ID':'count'})['Client ID'])
#Client ID Household Counter
counter = {}
for i in df['Client ID']:
    if i not in counter:
        counter[i] = {"Single Adults":0,"Family":0,"Youth":0}
for j in counter:
    for k in df[df['Client ID']==j]['Household Type']:
        counter[j][k] += 1
df['Client ID Household Counter'] = np.nan
for i in df.index:
    df.loc[i,'Client ID Household Counter'] = counter[df.loc[i,'Client ID']][df.loc[i,'Household Type']]
df['Client ID Household Counter'] = df['Client ID Household Counter'].apply(int)
df['Client ID Household Counter'].value_counts()

1    635
2    312
3     90
4     44
6     12
5      5
Name: Client ID Household Counter, dtype: int64

In [9]:
#Step 4: Adding Chronic Column
#If client is in Chronic & Vet and 'No' to Veteran Status, then they are chronic
#df['Chronic Status'] = np.nan
#SKIP FOR NOW
#SKIP FOR NOW
#SKIP FOR NOW

In [10]:
#Step 5, Remap all dismissal reasons
from values import dismissal
df['Dismissal Reason'] = df['Dismissal Reason'].map(dismissal)
df['Dismissal Reason'].value_counts()

Housed                                 513
No longer meets population criteria    242
Inactive                               105
Name: Dismissal Reason, dtype: int64

In [11]:
#Step 6, Populate Housing Move-In Date
df['Housing Move-In Date'] = df["Program End Date"][df["Dismissal Reason"]=="Housed"]
df['Housing Move-In Date']

0             NaT
1             NaT
2             NaT
3             NaT
4      2022-02-24
          ...    
1093          NaT
1094          NaT
1095          NaT
1096          NaT
1097          NaT
Name: Housing Move-In Date, Length: 1098, dtype: datetime64[ns]

In [12]:
#Step 7, Populate Inactive Date
#Do we consider those with program end date and null dismissal reasons as inactive?
df['Inactive Date'] = df['Program End Date'][df["Dismissal Reason"]=="Inactive"]
df['Inactive Date']

0      2022-02-21
1      2022-02-21
2             NaT
3             NaT
4             NaT
          ...    
1093          NaT
1094          NaT
1095          NaT
1096          NaT
1097          NaT
Name: Inactive Date, Length: 1098, dtype: datetime64[ns]

In [13]:
df.groupby('Client ID').agg({'Date of Identification':'min'})['Date of Identification']


Client ID
84      2022-01-25
319     2016-07-01
720     2023-08-23
903     2021-10-19
1022    2018-01-06
           ...    
56715   2024-02-13
56759   2024-02-23
56776   2024-01-18
56780   2023-07-31
56800   2024-02-06
Name: Date of Identification, Length: 828, dtype: datetime64[ns]

In [14]:
#Step 8, Calculate 1stDateofID, then calculate Returned to Active Date (Date of Idenfication on second record)
#Consider revising Return to Active Date formula
df['1stDateofID'] = df['Client ID'].map(df.groupby('Client ID').agg({'Date of Identification':'min'})['Date of Identification'])
df['Return to Active Date'] = np.nan
df.loc[(df['Client ID Household Counter']>1) & (df['1stDateofID']!=df['Date of Identification'])
,"Return to Active Date"] = df['Date of Identification']
df['Return to Active Date'] = pd.to_datetime(df['Return to Active Date'])

In [15]:
#Step 9, Calculate most recent move-in or inactive dates
df['Most Recent Move-In Date'] = df['Client ID'].map(df.groupby('Client ID').agg({'Housing Move-In Date':'max'})['Housing Move-In Date'])
df['Most Recent Inactive Date'] = df['Client ID'].map(df.groupby('Client ID').agg({'Inactive Date':'max'})['Inactive Date'])

In [16]:
#Step 10, Narrow down dataframe down to active clients and newly exited clients for the reporting month
dates = {}
dates["Reporting Year"] = input("Reporting Year? Enter four digits: ")
dates["Reporting Month"] = input("Reporting Month? Enter either proper string or number ")
dates['Start Date'] = pd.to_datetime(dates["Reporting Year"]+dates["Reporting Month"],format='%Y%m',errors='ignore')
dates['Last Day'] = dates['Start Date'].days_in_month
dates['Reporting Date'] = dates['Start Date'].replace(day=dates['Last Day'])

active_df = df[(df['Date of Identification']<=dates['Reporting Date']) &
((df['Program End Date'].isnull()==True) | (df['Program End Date']<df['Date of Identification']) | (df['Program End Date']>dates['Reporting Date']))]
exited_df = df[(df['Program End Date']>=dates['Start Date']) & (df['Program End Date']<=dates['Reporting Date'])]
filtered_df = pd.concat([active_df,exited_df])
# filtered_df = filtered_df.reset_index()
# filtered_df = filtered_df.drop(['index'],axis=1)

In [17]:
filtered_df = filtered_df.sort_values('Date of Identification').drop_duplicates('Client ID',keep='first')
filtered_df = filtered_df.reset_index(drop=True)
filtered_df

Unnamed: 0,Client ID,Family Name,Relationship,Site Name,Program Name,Date of Identification,Program End Date,Dismissal Reason,Age,Veteran Status,...,Oldest Household Member,Household Type,Client ID Counter,Client ID Household Counter,Housing Move-In Date,Inactive Date,1stDateofID,Return to Active Date,Most Recent Move-In Date,Most Recent Inactive Date
0,9689,Jeffery P. Porter Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2016-07-01,NaT,,68,No,...,68.0,Single Adults,1,1,NaT,NaT,2016-07-01,NaT,NaT,NaT
1,9562,Phanyah Sensouvanh Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2016-07-01,NaT,,63,No,...,63.0,Single Adults,1,1,NaT,NaT,2016-07-01,NaT,NaT,NaT
2,319,Efrain Fernandez Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2016-07-01,NaT,,69,No,...,69.0,Single Adults,1,1,NaT,NaT,2016-07-01,NaT,NaT,NaT
3,1897,Carlos R. Guzman Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2016-07-01,NaT,,59,No,...,59.0,Single Adults,1,1,NaT,NaT,2016-07-01,NaT,NaT,NaT
4,39248,Fidel Portillo Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2016-10-01,NaT,,45,No,...,45.0,Single Adults,1,1,NaT,NaT,2016-10-01,NaT,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,4734,ronald crespo Family,Self/Head of Household,PathForward,Opportunity Place - Arlington,2024-02-22,NaT,,41,No,...,41.0,Single Adults,1,1,NaT,NaT,2024-02-22,NaT,NaT,NaT
200,56381,DAVID WHATLEY Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-22,NaT,,45,No,...,45.0,Single Adults,1,1,NaT,NaT,2024-02-22,NaT,NaT,NaT
201,56759,TSION FILFILE Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-23,NaT,,36,No,...,36.0,Single Adults,1,1,NaT,NaT,2024-02-23,NaT,NaT,NaT
202,39730,Martin Walker Family,Self/Head of Household,New Hope Housing,Residential Program Center (RPC),2024-02-28,NaT,,52,No,...,52.0,Single Adults,3,3,NaT,NaT,2021-07-19,2024-02-28,NaT,2022-08-04


In [18]:
#Step 11, Determine clients that "No longer meets population criteria" by demographic info
#All persons, all singles, veterans, chronic, chronic veteran, youth, families
print("\nHow many clients this month No longer meet population criteria?")
print("All clients ",exited_df['Dismissal Reason'].where(exited_df['Dismissal Reason']=="No longer meets population criteria").count())
print("Singles ",exited_df['Dismissal Reason'].where((exited_df['Dismissal Reason']=="No longer meets population criteria")&(exited_df['Household Type']=='Single Adults')).count())
print("Veterans ",exited_df['Dismissal Reason'].where((exited_df['Dismissal Reason']=="No longer meets population criteria")&(exited_df['Veteran Status']=='Yes')).count())
print("Chronic ",exited_df['Dismissal Reason'].where((exited_df['Dismissal Reason']=="No longer meets population criteria")&(exited_df['Chronic Status']=='Yes')).count())
print("Chronic Veterans ",exited_df['Dismissal Reason'].where((exited_df['Dismissal Reason']=="No longer meets population criteria")&(exited_df['Chronic Status']=='Yes')&(exited_df['Veteran Status']=='Yes')).count())
print("Youth ",exited_df['Dismissal Reason'].where((exited_df['Dismissal Reason']=="No longer meets population criteria")&(exited_df['Household Type']=='Youth')).count())
print("Families ",exited_df['Dismissal Reason'].where((exited_df['Dismissal Reason']=="No longer meets population criteria")&(exited_df['Household Type']=='Families')).count())


How many clients this month No longer meet population criteria?
All clients  4
Singles  4
Veterans  0
Chronic  0
Chronic Veterans  0
Youth  0
Families  0


In [19]:
#Step 12, Calculate BFZ Reporting Metrics to make sure numbers match
print("\nBFZ Reporting Metrics")
print("Actively Homeless ", len(active_df),
"+",exited_df['Client ID'].where(exited_df['Dismissal Reason']=='No longer meets population criteria').count(),'No longer meet population criteria')
print("Housing Placements ", exited_df['Client ID'].where(exited_df['Dismissal Reason']=='Housed').count())
print("Moved to Inactive ", exited_df['Client ID'].where(exited_df['Dismissal Reason']=='Inactive').count())
print("Newly Identified Inflow ",len(active_df.loc[(dates['Start Date']<=active_df['Date of Identification']) & (active_df['Date of Identification']<=dates['Reporting Date'])]))
def housing_lot():
    housed = exited_df[exited_df['Dismissal Reason']=='Housed']
    lot = [(housed.loc[i,'Program End Date'] - housed.loc[i,'Date of Identification']) for i in housed.index]
    average = 0
    for i in lot:
        average += i.days
    return average/len(lot)
print("Average Length of Time from ID to Housing Placement ",housing_lot())
def rtad_counter():
    rtad = active_df[active_df['Return to Active Date'].isnull()==False]
    counter = 0
    for i in rtad.index:
        if (rtad.loc[i,"Return to Active Date"].year==dates['Reporting Date'].year)& (rtad.loc[i,"Return to Active Date"].month==dates['Reporting Date'].month):
            counter += 1
    return counter
print("Returned to Active ",rtad_counter())
# print("Number of children ",active_df['Relationship'].value_counts()['Child'])
# print("Number of families ",active_df['Family Name'].where(active_df['Household Type']=='Family').nunique())


BFZ Reporting Metrics
Actively Homeless  209 + 4 No longer meet population criteria
Housing Placements  17
Moved to Inactive  2
Newly Identified Inflow  20
Average Length of Time from ID to Housing Placement  204.94117647058823
Returned to Active  7


In [20]:
#filtered_df.to_csv('July_ZFA.csv')