In [None]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import time
import plotly.express as px
import plotly.io as pio
import datetime

In [None]:
# get credentials from environment variables
user = os.getenv('PGUSER')
password = os.getenv('PGPASSWORD')
host = os.getenv('PGHOST')
port = os.getenv('PGPORT')
database = os.getenv('PGDATABASE')

# configure connection to postgres
engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(user, password, host, port, database))

# open a connect
db_conn = engine.connect()

In [None]:
#with open("/mnt/data/projects/acdhs-housing/jbaumann/acdhs_housing/src/eda/entry_into_homelessness.sql", "r") as f:
#    sql = f.read()
#df = pd.read_sql(sql, db_conn)

In [None]:
#df = pd.read_sql("select * from modelling.entry_into_homelessness_50000;", db_conn)
df = pd.read_sql("select * from modelling.entry_into_homelessness_final;", db_conn)

In [None]:
df

In [None]:
# plotly parameters
pio.templates.default = "plotly_white"

layout_update_dict = dict(
    showlegend=True,
    font={'size': 20},
    title={'font': {'size': 24}}
)

In [None]:
# select individuals for different min_days_not_hl
# min_days_not_hl: how many day an individual need to be outside of any DHS homelessness system to count as entry into homelessness systems and not as already homeless
# min_days_not_hl = 0 : we always consider all homeless individuals
x = pd.DataFrame()
#for min_days_not_hl in range(0,187,31):
for min_days_not_hl in [0, 32, 186, 373]:
    temp_df = (df[(df["days_since_last_hl"] > min_days_not_hl) | (df["days_since_last_hl"].isnull())]).groupby(['prog_dt']).size().reset_index(name='n')
    temp_df["min_days_not_hl"] = str(int(min_days_not_hl/31)) + " months"
    if min_days_not_hl > 0:
        # if we are not considering all homeless individuals, drop the first month, because they would all count as new entries
        temp_df = temp_df[temp_df.prog_dt != min(df["prog_dt"])] 
    
    x = pd.concat([x, temp_df], axis=0, ignore_index=True)

# now select first time homeless individuals
x_first_time = (df[df["days_since_last_hl"].isnull()]).groupby(['prog_dt']).size().reset_index(name='n')
x_first_time["min_days_not_hl"] = "never"
x_first_time = x_first_time[x_first_time.prog_dt != min(df["prog_dt"])]
x = pd.concat([x, x_first_time], axis=0, ignore_index=True)

In [None]:
x

In [None]:
fig = px.line(
    data_frame=x, 
    x = 'prog_dt',
    y = 'n', 
    color = "min_days_not_hl",
    title="Entry into homelessness over time<br><sup><i>by nr of days out of homelessness system to count as new</i></sup>",

    labels=dict(prog_dt="Date", n="# clients"),
    )
#fig.update_yaxes(range=[0, 600])
fig.update_layout(layout_update_dict)
fig.show()
fig.write_image("plots/define_entry_into_homelessness.pdf", engine = "kaleido")