<a href="https://colab.research.google.com/github/cincinnatilibrary/collection-analysis/blob/master/reports/colab_datasette_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CHPL - Collection Analysis - **Publication Dates Example**

<img src="https://ilsweb.cincinnatilibrary.org/img/CHPL_Brandmark_Primary.png" alt="CHPL" title="CHPL" width="300"/>

Data sources

* https://collection-analysis.cincy.pl/

In [16]:
import pandas as pd
import requests
import altair as alt

base_url = 'https://collection-analysis.cincy.pl/current_collection'

sql = """\
-- group titles (bib records) by the decade in which they were published
select
  cast(round(publish_year / 10, 1) * 10 as integer) AS decade,
  count(*) AS count_titles
from
  bib
where
  publish_year >= 1800
  AND publish_year <= 2021
group by
  decade
order by
  decade
"""

df = pd.read_json(
    requests.get(
      url=base_url + '.json',
      params={
        'sql': sql,
        '_shape': 'array'
      }
    ).content
)

# generate the chart using Altair
# https://altair-viz.github.io/user_guide/encoding.html
chart = alt.Chart(df).mark_bar(color='#34B78F').encode(
    x = alt.X(
        'decade:N',
        axis=alt.Axis(labelAngle=-70)
    ),
    y = 'count_titles:Q',
    tooltip=['decade', 'count_titles']
).properties(
    title="Count CHPL Titles by Decade of Publication",
)

chart.display()


In [21]:
sql = """\
select
  publish_year,
  case
    when item.item_format = 'New Release DVDs' then 'DVD/Videocassette'
    else item.item_format
  end as item_format,
  count(*) AS count_items
from
  bib
  join item on item.bib_record_num = bib.bib_record_num
where
  bib.publish_year >= 2018
  and bib.publish_year <= 2022
  and item_format in (
    'Book',
    'Juvenile Book',
    'Reference Book',
    'DVD/Videocassette',
    'Music on CD',
    'Teen Book',
    'Magazine',
    'Large Print Book',
    'Microfilm',
    'Rare Book',
    'Book on CD',
    'Microfiche',
    'Music Score',
    'Reference Juvenile Book',
    'LP Record',
    'New Release DVDs',
    'Juvenile Magazine',
    'Reference Magazine',
    'Juvenile Book on CD',
    'Leased Book'
  )
group by
  1,
  2
order by
  1,2
"""

df = pd.read_json(
    requests.get(
      url=base_url + '.json',
      params={
        'sql': sql,
        '_shape': 'array'
      }
    ).content
)

chart = alt.Chart(df).mark_bar().encode(
    # x='end_date:T',
    x=alt.X(
        'item_format',
        axis=alt.Axis(title='item format', labelAngle=-70)
    ),
    y=alt.Y(
        'count_items:Q',
        title = 'count total items'
    ),
    color=alt.Color(
        'item_format'
    ),
    tooltip=['publish_year', 'item_format', 'count_items']
).properties(
    width=270,
).facet(
    facet='publish_year',
    columns=5
)

chart.display()