<a href="https://colab.research.google.com/github/cincinnatilibrary/collection-analysis/blob/master/reports/items_and_publication_date_aggregations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CHPL - Collection Analysis - **Items & Publication Dates**

<img src="https://ilsweb.cincinnatilibrary.org/img/CHPL_Brandmark_Primary.png" alt="CHPL" title="CHPL" width="300"/>

Show item counts from the past 5 decades by item type and faceted by decade, and then by the last 5 years

Data sources

* https://collection-analysis.cincy.pl/

In [7]:
#@title
import pandas as pd
import numpy as np
import altair as alt
# if we want to save output as .png use this
# NOTE: use !pip install -U altair_saver above to download / install first
# from altair_saver import save
from urllib.parse import urlencode
import requests

base_url = 'https://collection-analysis.cincy.pl/current_collection'

In [8]:
#@title
sql = """\
-- group titles (bib records) by the decade in which they were published
select
  cast(round(publish_year / 10, 1) * 10 as integer) AS decade,
  case
    when item.item_format = 'New Release DVDs' then 'DVD/Videocassette'
    else item.item_format
  end as item_format,
  count(*) AS count_items
from
  bib
  join item on item.bib_record_num = bib.bib_record_num
where
  bib.publish_year >= 1980
  and bib.publish_year <= 2022
  and item_format in (
    'Book',
    'Juvenile Book',
    'Reference Book',
    'DVD/Videocassette',
    'Music on CD',
    'Teen Book',
    'Magazine',
    'Large Print Book',
    'Microfilm',
    'Rare Book',
    'Book on CD',
    'Microfiche',
    'Music Score',
    'Reference Juvenile Book',
    'LP Record',
    'New Release DVDs',
    'Juvenile Magazine',
    'Reference Magazine',
    'Juvenile Book on CD',
    'Leased Book'
  )
group by
  1,
  2
order by
  1,2
"""

df = pd.read_json(
    requests.get(
      url=base_url + '.json',
      params={
        'sql': sql,
        '_shape': 'array'
      }
    ).content
)

# df.head()

In [9]:
#@title
print('Click the link below to view the raw data:', base_url + '?' + urlencode({'sql':sql}), sep='\n')

Click the link below to view the raw data:
https://collection-analysis.cincy.pl/current_collection?sql=--+group+titles+%28bib+records%29+by+the+decade+in+which+they+were+published%0Aselect%0A++cast%28round%28publish_year+%2F+10%2C+1%29+%2A+10+as+integer%29+AS+decade%2C%0A++case%0A++++when+item.item_format+%3D+%27New+Release+DVDs%27+then+%27DVD%2FVideocassette%27%0A++++else+item.item_format%0A++end+as+item_format%2C%0A++count%28%2A%29+AS+count_items%0Afrom%0A++bib%0A++join+item+on+item.bib_record_num+%3D+bib.bib_record_num%0Awhere%0A++bib.publish_year+%3E%3D+1980%0A++and+bib.publish_year+%3C%3D+2022%0A++and+item_format+in+%28%0A++++%27Book%27%2C%0A++++%27Juvenile+Book%27%2C%0A++++%27Reference+Book%27%2C%0A++++%27DVD%2FVideocassette%27%2C%0A++++%27Music+on+CD%27%2C%0A++++%27Teen+Book%27%2C%0A++++%27Magazine%27%2C%0A++++%27Large+Print+Book%27%2C%0A++++%27Microfilm%27%2C%0A++++%27Rare+Book%27%2C%0A++++%27Book+on+CD%27%2C%0A++++%27Microfiche%27%2C%0A++++%27Music+Score%27%2C%0A++++%27Referen

In [10]:
#@title
chart = alt.Chart(df).mark_bar().encode(
    # x='end_date:T',
    x=alt.X(
        'item_format',
        axis=alt.Axis(title='item format', labelAngle=-70),
        # sort='-y'
    ),
    y=alt.Y(
        'count_items:Q',
        title = 'count total items',
        # scale = alt.Scale(domain=[0,100]),
    ),
    color=alt.Color(
        'item_format',
        # legend=None
    ),
    tooltip=['decade', 'item_format', 'count_items']
).properties(
    title="",
    # .format(df.iloc[0]['branch_name']),
    width=270,
).facet(
    facet='decade',
    columns=5
)

chart.display()

In [11]:
#@title
sql = """\
select
  publish_year,
  case
    when item.item_format = 'New Release DVDs' then 'DVD/Videocassette'
    else item.item_format
  end as item_format,
  count(*) AS count_items
from
  bib
  join item on item.bib_record_num = bib.bib_record_num
where
  bib.publish_year >= 2018
  and bib.publish_year <= 2022
  and item_format in (
    'Book',
    'Juvenile Book',
    'Reference Book',
    'DVD/Videocassette',
    'Music on CD',
    'Teen Book',
    'Magazine',
    'Large Print Book',
    'Microfilm',
    'Rare Book',
    'Book on CD',
    'Microfiche',
    'Music Score',
    'Reference Juvenile Book',
    'LP Record',
    'New Release DVDs',
    'Juvenile Magazine',
    'Reference Magazine',
    'Juvenile Book on CD',
    'Leased Book'
  )
group by
  1,
  2
order by
  1,2
"""

df = pd.read_json(
    requests.get(
      url=base_url + '.json',
      params={
        'sql': sql,
        '_shape': 'array'
      }
    ).content
)

# df.head()

In [12]:
#@title
chart = alt.Chart(df).mark_bar().encode(
    # x='end_date:T',
    x=alt.X(
        'item_format',
        axis=alt.Axis(title='item format', labelAngle=-70),
        # sort='-y'
    ),
    y=alt.Y(
        'count_items:Q',
        title = 'count total items',
        # scale = alt.Scale(domain=[0,100]),
    ),
    color=alt.Color(
        'item_format',
        # legend=None
    ),
    tooltip=['publish_year', 'item_format', 'count_items']
).properties(
    title="",
    # .format(df.iloc[0]['branch_name']),
    width=270,
).facet(
    facet='publish_year',
    columns=5
)

chart.display()