<a href="https://colab.research.google.com/github/cincinnatilibrary/collection-analysis/blob/master/reports/CHPL_Hold_Shelf_Supply_Overview.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CHPL - Collection Analysis - **Hold-Shelves Supply**
<img src="https://ilsweb.cincinnatilibrary.org/img/CHPL_Brandmark_Primary.png" alt="CHPL" title="CHPL" width="300"/>

This report provides some queries and generates some visualizations for data
related to items that have been delievered to various hold-shelf locations in
the CHPL system.

Data sources:

* https://ilsweb.cincinnatilibrary.org/collection-analysis/
* https://github.com/plch/plch-holds-shelf

In [1]:
import pandas as pd
import numpy as np
import altair as alt
from urllib.parse import urlencode

chpl_collection_url = 'https://ilsweb.cincinnatilibrary.org/collection-analysis/current_collection'

# this is the month we want to target for the aggregation 
start_date = '2021-11'

In [2]:
# Hold-shelf Supply Query
# where are the items to fill holds coming from (when not being sourced and filled from the same branch)? 

"""
NOTE: this excludes:
  * items where the source branch is the same same as the pickup branch
  * items having item types where there are not more than 100 of that type in the set 
"""
sql = """\
with hold_shelf_data as (
  select
    date(modified_epoch, 'unixepoch', 'localtime') as date_hold_on_holdshelf,
    date(placed_epoch, 'unixepoch', 'localtime') as date_hold_placed,
    s_location_code as item_source_location_code,
    (
      select
        coalesce(branch_name.name, s_location_code, '')
      from
        "location"
        left outer join branch on branch.code_num = "location".branch_code_num
        left outer join branch_name on branch_name.branch_id = branch.id
      where
        "location".code = s_location_code
      limit
        1
    ) as item_source_branch_name,
    item.item_format,
    pickup_location_code,
    (
      select
        coalesce(branch_name.name, pickup_location_code, '')
      from
        "location"
        left outer join branch on branch.code_num = "location".branch_code_num
        left outer join branch_name on branch_name.branch_id = branch.id
      where
        "location".code = pickup_location_code
      limit
        1
    ) as item_pickup_branch_name
  from
    holds_shelf
    left outer join item as item on item.item_record_num = holds_shelf.record_num
  where
    modified_epoch >= CAST(
      strftime('%s', :start_date || '-01') AS INT
    )
    and modified_epoch < CAST(
      strftime('%s', DATE(:start_date || '-01', '+1 months')) AS INT
    )
)
select
  :start_date as month,
  item_source_branch_name,
  -- item_format,
  count(*) as count
from
  hold_shelf_data
where
  item_source_branch_name != item_pickup_branch_name
group by
  1,
  2
"""

In [3]:
df = pd.read_csv(
        chpl_collection_url + '.csv?' + urlencode(query={'sql': sql, 'start_date': start_date})
)

# save output as .csv ...
df.to_csv(f'{start_date}-holds-supply-output.csv', index=False)

In [4]:
df.columns

Index(['month', 'item_source_branch_name', 'count'], dtype='object')

In [5]:
# Chart where are the items to fill holds coming from (when not being sourced and filled from the same branch)? 
alt.Chart(df).mark_bar().encode(
    x=alt.X('count:Q'),
    y=alt.Y('item_source_branch_name', sort='-x'),
    # color='item_format',
    tooltip=['month', 'item_source_branch_name', 'count']
).properties(
    title = f"Hold-shelf Supply - {df.iloc[0]['month']}",
    width = 800
).display()

In [6]:
# how many items did we ship through the system in this month?
df['count'].sum()

134777

In [7]:
# how many items did we ship through the system PER DAY on average in this month?
df['count'].sum() / pd.Period(start_date + '-01').days_in_month

4492.566666666667