<a href="https://colab.research.google.com/github/cincinnatilibrary/collection-analysis/blob/master/reports/Titles_by_Language.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://collection-analysis.cincy.pl/static/CHPL_Brandmark_Primary.png" width=300>

# CHPL Titles By Language

Below is a query for examining titles and associated items that have a language code that is not for English ("eng").

In [1]:
import requests
import json
import pandas as pd

base_url = "https://collection-analysis.cincy.pl/current_collection.json"

session = requests.session()

sql = """\
select
  (
    SELECT
      coalesce(
        language_property.name,
        bib_record.language_code
      )
    from
      language_property
    where
      language_property.code = language_code
  ) as language_name,
  count(bib_record.record_id) as count_titles,
  count(item.item_record_num) as count_items,
  coalesce(
    sum(checkout_total) + sum(renewal_total),
    0
  ) as total_item_circ
from
  bib_record
  join bib on bib.bib_record_id = bib_record.record_id
  left outer join item on item.bib_record_num = bib.bib_record_num
where
  -- https://www.loc.gov/standards/iso639-2/php/code_list.php
  -- zxx is "No linguistic content; Not applicable"
  -- und is "Undetermined"
  language_code not in ('eng', '', 'zxx', 'und')
  and language_code is not null
group by
  1
order by
  2 desc
limit :limit offset :offset
"""

limit = 1000
offset = 0
result_rows = list()

while True:
  response = session.get(
      base_url,
      params={
          'sql': sql,
          'limit': limit,
          'offset': offset
      }
  )

  data = json.loads(response.text)
  result_rows.extend(data['rows'])
  print('.', end='')
  offset+=limit

  if (len(data['rows']) < limit):
    result_columns = data['columns']
    print('done.')
    break

.done.


In [2]:
from google.colab import data_table

# Enables DataTable as the default IPython formatter for Pandas DataFrames.
data_table.enable_dataframe_formatter()

df = pd.DataFrame(
    data=result_rows,
    columns=result_columns
)

data_table.DataTable(
    df,
    include_index=False,
    num_rows_per_page=20
)

Unnamed: 0,language_name,count_titles,count_items,total_item_circ
0,German,34018,33953,201060
1,French,27956,27914,353730
2,Spanish,20488,20435,344838
3,Italian,8338,8328,149975
4,Latin,7606,7596,67740
...,...,...,...,...
243,Chinook jargon,1,1,0
244,Bhojpuri,1,1,32
245,Aymara,1,1,0
246,Assamese,1,1,0


In [3]:
import altair as alt

count_titles = alt.Chart(df).mark_bar(color='#0092BD').encode(
    x = alt.X('count_titles'),
    y = alt.Y('language_name', sort='-x'),
    tooltip=['language_name', 'count_titles',]
).properties(
    title='Non-English Title Count By Language',
    width=500
)

count_items = alt.Chart(df).mark_bar(color='#34B78F').encode(
    x = alt.X('count_items'),
    y = alt.Y('language_name', sort='-x'),
    tooltip=['language_name', 'count_items',]
).properties(
    title='Non-English Item Count By Language',
    width=500
)


count_items_circ = alt.Chart(df).mark_bar(color='#8659B5').encode(
    x = alt.X('total_item_circ'),
    y = alt.Y('language_name', sort='-x'),
    tooltip=['language_name', 'total_item_circ',]
).properties(
    title='Non-English Total Item Circulation Count By Language',
    width=500
)


count_titles | count_items | count_items_circ