In [1]:
# this will produce a series of heatmaps related to source and pickup location for item related to holds
!pip install -U pip
!pip install pandas==1.3.4
!pip install altair==4.1.0



In [2]:
import pandas as pd
import numpy as np
import altair as alt
from urllib.parse import urlencode

chpl_collection_url = 'https://ilsweb.cincinnatilibrary.org/collection-analysis/current_collection'

# month we want to produce reports for
start_date='2021-10'

In [3]:
sql = """\
-- get the possible item_format values (and counts)
with hold_shelf_data as (
  select
    item.item_format
  from
    holds_shelf
    left outer join item on item.item_record_num = holds_shelf.record_num
  where
    modified_epoch >= CAST(strftime('%s', '{start_date}' || '-01') AS INT)
    and modified_epoch < CAST(
      strftime('%s', DATE('{start_date}' || '-01', '+1 months')) AS INT
    )
)
select
  item_format,
  count(*) as count
from
  hold_shelf_data
group by
  1
order by
  count DESC
"""

item_format_df = pd.read_csv(
    chpl_collection_url + '.csv?' + urlencode(query={'sql': sql.format(start_date=start_date)})
)

item_format_df.head()

Unnamed: 0,item_format,count
0,Book,58514
1,Juvenile Book,46134
2,DVD/Videocassette,22619
3,New Release DVDs,13909
4,Music on CD,7793


In [4]:
sql = """\
-- given `YYYY-MM` and `item_format` values, aggregate counts based on those
with data as (
  with hold_shelf_data as (
    select
      date(modified_epoch, 'unixepoch', 'localtime') as date_hold_on_holdshelf,
      date(placed_epoch, 'unixepoch', 'localtime') as date_hold_placed,
      cast(
        round((modified_epoch - placed_epoch) / 86400.0) as integer
      ) as days_to_holdshelf,
      s_location_code as item_source_location_code,
      patron_record_hash,
      pickup_location_code,
      holds_shelf.record_num,
      item.item_format
    from
      holds_shelf
      left outer join item on item.item_record_num = holds_shelf.record_num
    where
      modified_epoch >= CAST(strftime('%s', '{start_date}' || '-01') AS INT)
      and modified_epoch < CAST(
        strftime('%s', DATE('{start_date}' || '-01', '+1 months')) AS INT
      )
  )
  select
    strftime('%Y-%m', '{start_date}' || '-01') as month,
    item_format,
    -- pickup_location_code,
    item_source_location_code,
    coalesce(branch_name.name, pickup_location_code) as pickup_location,
    --
    round(avg(days_to_holdshelf), 2) as avg_days_to_holdshelf,
    count(record_num) as count_items,
    count(DISTINCT patron_record_hash) as count_distinct_patrons
  from
    hold_shelf_data
    left outer join "location" on "location".code = hold_shelf_data.pickup_location_code
    left outer join branch on branch.code_num = "location".branch_code_num
    left outer join branch_name on branch_name.branch_id = branch.id
  group by
    1,
    2,
    3,
    4
) -- this is messy, and i kinda hate it, but nesting the CTEs like this seems like the most efficient way to get the source
select
  month,
  item_format,
  coalesce(branch_name.name, item_source_location_code) as source_location,
  pickup_location,
  avg_days_to_holdshelf,
  count_items,
  count_distinct_patrons
from
  data
  left outer join "location" on "location".code = data.item_source_location_code
  left outer join branch on branch.code_num = "location".branch_code_num
  left outer join branch_name on branch_name.branch_id = branch.id
where
  item_format = '{item_format}'
"""  

In [5]:
df = pd.DataFrame(columns=[
    'month', 'item_format', 'source_location', 
    'pickup_location', 'avg_days_to_holdshelf', 
    'count_items', 'count_distinct_patrons'
])

for i, row in item_format_df.iterrows():
    print(i, row['item_format'])
    
    temp_df = pd.read_csv(
        chpl_collection_url + '.csv?' + urlencode(query={'sql': sql.format(start_date=start_date, item_format=row['item_format'])})
    )
    
    df = df.append(temp_df, ignore_index=True)

    # print(df.head(), end='\n\n')

0 Book
1 Juvenile Book
2 DVD/Videocassette
3 New Release DVDs
4 Music on CD
5 Teen Book
6 Large Print Book
7 Book on CD
8 Magazine
9 Bluray
10 Juvenile Book on CD
11 Music Score
12 LP Record
13 Juvenile Music on CD
14 Juvenile Large Print Book
15 Playaway
16 Juvenile Playaway
17 nan
18 Juvenile Kit
19 Portable Technology Device
20 Teen Book on CD
21 Juvenile Magazine
22 Teen Large Print Book
23 Government Document
24 Teen Magazine
25 Teen Playaway
26 Juvenile Music Score
27 Kit
28 Braille
29 Leased DVD
30 Reference Juvenile Book
31 Book (Branches)
32 Juv Large Print Book (Branches)


In [6]:
df.shape

(12681, 7)

In [7]:
df.head()

Unnamed: 0,month,item_format,source_location,pickup_location,avg_days_to_holdshelf,count_items,count_distinct_patrons
0,2021-10,Book,Main Library,Anderson,50.95,1080,609
1,2021-10,Book,Main Library,Avondale,67.57,30,19
2,2021-10,Book,Main Library,Blue Ash,53.55,737,414
3,2021-10,Book,Main Library,Bond Hill,85.5,76,40
4,2021-10,Book,Main Library,Cheviot,58.68,201,112


In [18]:
item_format = 'Music on CD'

In [19]:
alt.Chart(df[df['item_format']==item_format]).mark_rect().encode(
    alt.X('pickup_location:O', title='Pickup Location'),
    alt.Y('source_location:O', title='Source Location'),
    alt.Color('count_items:Q'),
    # alt.Size('count_distinct_patrons', scale=alt.Scale(range=[1, 1000])),
    tooltip=['item_format', 'source_location', 'pickup_location', 'avg_days_to_holdshelf', 'count_items', 'count_distinct_patrons']
).properties(
    title='{} -- {} -- Hold Source & Hold Pickup Location by Item Count'.format(item_format, start_date),
    width=900
)

In [20]:
alt.Chart(df[ (df['item_format']==item_format) & (df['source_location'] != 'Main Library') ]).mark_rect().encode(
    alt.X('pickup_location:O', title='Pickup Location'),
    alt.Y('source_location:O', title='Source Location'),
    alt.Color('count_items:Q'),
    # alt.Size('count_distinct_patrons', scale=alt.Scale(range=[1, 1000])),
    tooltip=['item_format', 'source_location', 'pickup_location', 'avg_days_to_holdshelf', 'count_items', 'count_distinct_patrons']
).properties(
    title='{} -- {} -- Hold Source & Hold Pickup Location by Item Count (Main Library as source removed)'.format(item_format, start_date),
    width=900
)

In [21]:
alt.Chart(df[df['item_format']==item_format]).mark_rect().encode(
    alt.X('pickup_location:O', title='Pickup Location'),
    alt.Y('source_location:O', title='Source Location'),
    alt.Color('avg_days_to_holdshelf:Q'),
    # alt.Size('count_distinct_patrons', scale=alt.Scale(range=[1, 1000])),
    tooltip=['item_format', 'source_location', 'pickup_location', 'avg_days_to_holdshelf', 'count_items', 'count_distinct_patrons']
).properties(
    title='{} -- {} -- Hold Source & Hold Pickup Location by Avg. Days to Holdshelf'.format(item_format, start_date),
    width=900
)