In [None]:
import os
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
import kaleido

In [None]:
# plotly parameters
pio.templates.default = "plotly_white"

layout_update_dict = dict(
    showlegend=False,
    font={'size': 20},
    title={'font': {'size': 24}}
)

# generate plots folder if doesnt exist
if not os.path.exists("plots"):
    os.mkdir("plots")


### Loading the data

In [None]:
sys.path.append("../")
from utils import get_db_conn

In [None]:
# connect to database
db_conn = get_db_conn()

##### Defining homelessness

In [None]:
sql_query =  """
-- list of all homeless individuals for every month-year
with hl as (
    select 
        client_hash, 
        prog_dt, 
        --count(program_key)
        program_key
    from clean.involvement_feed
    where program_key in {hl_programs}
    group by client_hash, prog_dt, program_key
),

-- program descriptions
program_desc as (
    select
        program_key, 
        program_name,
        program_desc
    from lookup.program_feed
)

-- join with data on homeless individuals
select
    *
from hl 
left join program_desc using(program_key) -- program info
;
"""

In [None]:
# defining homelessness

# load parameters
hl_programs = "(263, 29, 32, 33)"

sql_query = sql_query.format(
    hl_programs = hl_programs)

# check query
#print(sql_query)

#  load data
df = pd.read_sql(sql_query, db_conn)

In [None]:
df.head()

### Trends in homelessness over time

In [None]:
print(f"Unique homeless clients ({min(df['prog_dt'])} to {max(df['prog_dt'])}): {len(df['client_hash'].unique())}")

In [None]:
# homeless population over time
x = (
    df
    .groupby(['prog_dt'])['client_hash']
    .nunique()
    .reset_index(name = 'n')
)

fig = px.line(
    data_frame=x, 
    x = 'prog_dt',
    y = 'n', 
    title="Utilization of homeless services over time", 
    labels=dict(prog_dt="Date", n="# unique clients"),
    )

fig.update_layout(layout_update_dict)

fig.update_yaxes(range=[0, 2000])
fig.show()
fig.write_image("plots/hl_trends.png", engine = "kaleido")

- There is **considerable fluctuation** in the number of individuals in homelessness over time. 
Although the number of homeless individuals has decreased between 2017 and 2021, there seems to have been a stark recent increase in the number of homeless individuals.

- The number of homeless individuals peaks each winter. This may be because:

    - Low temperatures during the winter months force homeless individuals into shelters, where they are registered
    
    - Some programs are administered on an annual basis, so that individuals who are homeless at any given point in time during the year only show up in our data on January. 

=> To investigate these dynamics, I disaggregate homelessness by `program_key`.

In [None]:
# homeless population over time, by program key
x = (
    df
    .groupby(["prog_dt", "program_key", 'program_name'])['client_hash']
    .count()
    .reset_index(name = "n")
)

fig = px.line(data_frame=x, 
    x = 'prog_dt', 
    y = 'n', 
    color = 'program_name',
    title="Utilization of homeless services over time", 
    labels=dict(prog_dt="Date", n="# unique clients"),
    )

fig.update_layout(layout_update_dict)
fig.show()
fig.write_image("plots/hl_program_trends.png", engine = "kaleido")

- During the winter months, the number of individuals interacting with emergency shelters and homeless services considerably increases. Probably best if our homelessness variable is something like *has been homeless in previous 12 months* to account for these fluctuations
- Also interesting that some categories, notably **transitional housing** and **day shelters** are being phased out, whereas **street outreach** increases. **Emergency shelter** and **homeless services** remain fairly constant over time. Perhaps these are better (if arguably more restrictive) proxies to capture trends in homelessness over time.

- Comparing 'homeless services' to the overall plot above, it also looks like most individuals who are homeless show up as being enrolled in 'homeless services'. See also histogram below

In [None]:
# how many programs is a homeless individual enrolled in in any given month?
x = (
    df
    .groupby(['client_hash', 'prog_dt'])['program_key']
    .count()
    .reset_index(name = "n")
)

fig = px.histogram(
    data_frame=x, 
    x = 'n', 
    title = "Number of programs enrolled in in any given month",
    labels=dict(n="Number of enrolled programs"),
)

fig.write_image("plots/prog_hist.png", engine = 'kaleido')