In [None]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import time
import plotly.express as px
import plotly.io as pio
import datetime

In [None]:
# get credentials from environment variables
user = os.getenv('PGUSER')
password = os.getenv('PGPASSWORD')
host = os.getenv('PGHOST')
port = os.getenv('PGPORT')
database = os.getenv('PGDATABASE')

# configure connection to postgres
engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(user, password, host, port, database))

# open a connect
db_conn = engine.connect()

In [None]:
sql_query =  """

		with evictions as (
		select 
			matter_id,
			hashed_mci_uniq_id as client_hash,
			filingdt,
			ofp_issue_dt,
			order_for_possession
		from clean.eviction_client_matches ecm
		left join clean.eviction using(matter_id)
		group by 1, 2, 3, 4,5
		order by matter_id
		),
		
		 hmis as(
			select 
			hashed_mci_uniq_id as client_hash ,
			hud_project_type_id,
			hud_project_type_desc,
			enrollment_start_dt,
			enrollment_end_dt
			from clean.hmis_details 
		),
		
		hud_joined as(
			select 
			cohort.client_hash ,
			cohort.as_of_date,
			hmis.hud_project_type_id,
			hmis.hud_project_type_desc,
			hmis.enrollment_start_dt,
			hmis.enrollment_end_dt,
			e.ofp_issue_dt,
			e.filingdt,
			DATE_PART('day', e.ofp_issue_dt::date::timestamp- e.filingdt::timestamp) as date_diff_filing_ofp, 
			DATE_PART('day', e.ofp_issue_dt::date::timestamp- hmis.enrollment_start_dt::timestamp) as date_diff_hmis_ofp, 
			case when e.filingdt::timestamp < hmis.enrollment_start_dt::timestamp and e.ofp_issue_dt::timestamp > hmis.enrollment_start_dt::timestamp then 1 else 0 end hmis_between_filing_and_ofp
			----case when e.ofp_issue_dt is not null and cohort.as_of_date::date::timestamp > e.ofp_issue_dt::timestamp then DATE_PART('day', cohort.as_of_date::date::timestamp - e.ofp_issue_dt::timestamp) else 99999 end day_diff_ofp 
			from pipeline.cohort as cohort
			left join evictions e
				on cohort.client_hash = e.client_hash
				and e.filingdt < cohort.as_of_date and e.order_for_possession = true
			left join hmis on cohort.client_hash = hmis.client_hash
			---group by cohort.client_hash , cohort.as_of_date 
		)
		
	select * from hud_joined
    ;
"""

In [None]:

#  load data
df = pd.read_sql(sql_query, db_conn)
df.head()

df

In [None]:
df_ev_ofp = df[(df['hmis_between_filing_and_ofp'] == 1)]
print(df_ev_ofp['date_diff_filing_ofp'].mean())


ok so need to have visualizations or computing: 
- most common programs + side by side barplots comparing most common programs for ppl that did them between filing and OFP, and those that didn't
- average amount of time in those programs
- number of distinct HMIS programs accessed where `hmis_between_filing_and_ofp =1` 
- relationship between `date_diff_filing_ofp` and `date_diff_hmis_ofp`

In [None]:
import plotly.express as px
fig = px.histogram(df, x="hud_project_type_desc", color="hmis_between_filing_and_ofp",
             height=800)
             
fig.update_xaxes(tickangle=45, tickfont=dict(color='crimson', size=14))
fig.show()

In [None]:
# plot of days_between_filing_and_ofp + distinct programs accessed
df_ev_ofp.groupby('hud_project_type_desc',as_index=False)['date_diff_hmis_ofp'].mean()


#df_ev_ofp.groupby('hud_project_type_desc',as_index=False)['date_diff_hmis_ofp'].min()

It seems that people try to access permanent supportive housing first closest to their OFP. Emergency shelter is the furthest out from the order of possesion/ closest to the eviction filing date, along with rapid rehousing and homelessness prevention services. 