<a href="https://colab.research.google.com/github/cincinnatilibrary/collection-analysis/blob/master/reports/make_shelf_locations_spreadsheets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://collection-analysis.cincy.pl/static/CHPL_Brandmark_Primary.png" width=300>

# Shelf Locations Measurements & Statitistics

This notebook utilizes the collection information stored here: https://collection-analysis.cincy.pl/current_collection/ (refreshed every-other day) to generate an Excel Workbook consisting of multiple sheets--one for each CHPL Branch Location. Each row for a Branch Location defines a shelf location, and a number of statistics related to that location.

Each sheet, then also provides **3 fields related to physical dimensions of all shelving** aloted to that particular shelf location:

1. **Total Linear Inches of shelving**
2. **Linear Inches of Shelving Filled**

   e.g. How many linear inches of shelfing are occupied by physical materials
3. **Notes**

   Use this space for defining *numbers of ranges*, *shelves*, and *other information related to physical shelving space*.

In [1]:
from matplotlib import pyplot as plt
from matplotlib import colors
import requests
import pandas as pd
from datetime import date
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import quote_sheetname, get_column_letter
from openpyxl.styles import Font, PatternFill
import json

# set the API base URL
base_url = "https://collection-analysis.cincy.pl/current_collection/"

# create a requests session for connection pooling
session = requests.session()

In [2]:
def generate_colors(cmap='YlOrRd'):
  """
  using matplotlib, generate a list of colors in a cmap that can then be 
  mapped to integer percentages to create a heatmap like effect in the 
  spreadsheet
  """
  palette = plt.get_cmap(cmap, 101) # make sure we have values for 0-100

  return [
      colors.rgb2hex(
          palette(i)[:3]  # remove the alpha channel
      )[1:]               # remove the `#` in front of the hex color string
      for i
      in range(palette.N)
  ]

fill_colors = generate_colors()
font_colors = generate_colors(cmap='hot')

In [3]:
# accepts parameter `location_code`
sql_location_data = """\
with item_data as (
  select
    item.location_code,
    item.item_format,
    count(item_format) as count_item_format,
    count(item.item_record_num) as count_total,
    count(item.due_date) as count_checkout
  from
    item
  where
    -- consider these status codes as availbale
    item.item_status_code in (
      '-',
      '!',
      'b',
      'p',
      '(',
      '@',
      ')',
      '_',
      '=',
      '+',
      't'
    )
    and item.location_code = :location_code
  group by
    item.location_code,
    item.item_format
)
select
  (
    select
      item_format
    from
      item_data
    order by
      count_item_format DESC
    limit
      1
  ) as item_format,
  (
    select
      physical_format_name
    from
      itype_property
    where
      itype_property.itype_name = (
        select
          item_format
        from
          item_data
        order by
          count_item_format DESC
        limit
          1
      )
  ) as physical_format,
  --  json_group_array(
  --    json_object(
  --      'item_format',
  --      item_data.item_format,
  --      'count_items',
  --      item_data.count_total,
  --      'count_cheked_out',
  --      item_data.count_checkout
  --    )
  --  ) as item_formats,
  sum(item_data.count_total) as count_total_items,
  sum(item_data.count_checkout) as count_total_checked_out,
  round(
    sum(item_data.count_checkout) / (sum(item_data.count_total) * 1.0) * 100.0,
    2
  ) as pct_checked_out,
  (
    select
      -- item_location_code,
      -- branch_name,
      SUM(count_op_code)
    from
      circ_agg
    where
      "item_location_code" = (
        case
          WHEN length(:location_code) = 1 then :location_code || '    '
          WHEN length(:location_code) = 2 then :location_code || '   '
          WHEN length(:location_code) = 3 then :location_code || '  '
          WHEN length(:location_code) = 4 then :location_code || ' '
          else :location_code
        end
      )
      and "op_code" = 'o'
    group by
      item_location_code
    limit
      1
  ) as checkouts_6mo,
  item_data.location_code,
  location_name.name as location_name,
  branch_name.name as branch_name
from
  item_data
  join location on location.code = item_data.location_code
  join location_name on location_name.location_id = location.id
  join branch on branch.code_num = location.branch_code_num
  join branch_name on branch_name.branch_id = branch.id
limit
  1
"""

In [4]:
# list comprehension to get the list of branches ...
branches = [
    branch[2] 
    for branch 
    in (
        session.get(base_url+'location_view.json').json()['rows']
    ) 
    if branch[2] != ''
]

In [5]:
# get all the location data from `collection-analysis.cincy.pl`

# create the dataframe where we'll store the results
df = pd.DataFrame(index=None)

# this is the API endpoint where our current collection results will come from
next_url = base_url+'location_view.json'

# work through the results (the endpoint will return `None` when no more rows)
while next_url is not None:
  r = session.get(next_url)
  
  # concatinate the results into the DataFrame as we go
  df = pd.concat(
      [
          df,
          pd.DataFrame(
              r.json()['rows'],
              columns=r.json()['columns'],
              index=None
          )
      ],
  )

  # get the next url (results are paginated)
  if 'next_url' in r.json().keys():
    next_url = r.json()['next_url']
    print('.', end='')
  else:
    break

df = df.sort_values(['branch_name', 'location_name'])

...............

In [6]:
# Create a new Excel workbook
workbook = openpyxl.Workbook()

sheet1 = workbook.active
sheet1.title = 'Summary'
sheet1['A1'].value = 'Branch'
sheet1['A1'].font = Font(bold=True)
sheet1['B1'].value = 'Total Items'
sheet1['B1'].font = Font(bold=True)
sheet1['C1'].value = 'Total Items Checked Out'
sheet1['C1'].font = Font(bold=True)

# loop through the branches 
for i, branch_name in enumerate([
    branch_name 
    for branch_name 
    in df['branch_name'].unique()
    if branch_name not in (
        'Main Library',
        'Outreach Services',
        'Cincinnati Masonic Library',
        'Harriet Beecher Stowe House',
        'Madeira Historical Society',
        'W.H. Taft National Historic Site',
        'Taft Museum of Art Library',
        'Distribution Center',
        'Virtual Library'
    )
]):
  # useful for debugging to just generate 2 sheets
  # if i >= 2:
  #   break

  # create a sheet for each branch / agency
  workbook.create_sheet(title=branch_name)
  current_sheet = workbook[branch_name]

  # filter the dataframe ... 
  temp_df = df[
      df['location_name'] != 'DO NOT USE'
  ]
  temp_df = temp_df[temp_df['branch_name'] == branch_name].sort_values(
      by=['location_name', 'branch_name']
  )

  # place the location codes, and location names into columns for the branch
  # print(branch_name, temp_df.shape)

# Category	# OF RANGES	# OF SHELVES PER RANGE	LENGTH OF SHELVES (in inches)	 LINEAR FEET	Notes

  current_sheet['A1'].value = 'Location Code'
  current_sheet['A1'].font = Font(bold=True)
  current_sheet['B1'].value = 'Location Name'
  current_sheet['B1'].font = Font(bold=True)
  
  current_sheet['C1'].value = 'Item Format'
  
  current_sheet['D1'].value = 'Total Items'
  current_sheet['E1'].value = 'Total Checked Out'
  current_sheet['F1'].value = 'PCT Total Checked Out'
  current_sheet['G1'].value = 'Checkouts (previous 6-months)'

  current_sheet['H1'].value = 'Total Linear Feet of Shelving'
  current_sheet['I1'].value = 'Linear Feet of Shelving Filled'
  current_sheet['J1'].value = 'Notes'

  # current_sheet['D1'].value = '# of Ranges'
  # current_sheet['E1'].value = '# of Shelves Per Range'
  # current_sheet['F1'].value = '# pf Inches of '
  # current_sheet['G1'].value = 'Location Name'

  for j, location in enumerate(zip(
      temp_df['location_code'].to_list(),
      temp_df['location_name'].to_list()
  )):

    # get the pct checked out and num available
    try:
      params = {
          'sql': sql_location_data,
          'location_code': location[0],
          '_shape': 'array'
      }

      r = session.get(
          base_url[0:-1]+'.json', # take the last `/` off the url string
          params=params
      ).json()

      if len(r) == 0:
        r = [{
          'count_total': None,
          'count_checkout': None,
          'percent_checkout': None
        }]
    except:
      pass

    # place values in the sheet 
    current_sheet[f"A{j+2}"].value = location[0]  # location_code
    current_sheet[f"B{j+2}"].value = location[1]  # location_name
    
    # cell_json = json.dumps(
    #   json.loads(r[0]['item_format']),
    #   indent=4
    # )

    # cell_json_num_lines = len(cell_json.split('\n'))
    
    # current_sheet[f"C{j+2}"].value = json.loads(r[0]['item_format'])
    current_sheet[f"C{j+2}"].value = r[0]['item_format']
    
    # I believe this is the font size * number of lines ... 
    # current_sheet.row_dimensions[j+2].height = 14 * cell_json_num_lines

    current_sheet[f"D{j+2}"].value = r[0]['count_total_items']
    current_sheet[f"E{j+2}"].value = r[0]['count_total_checked_out']
    current_sheet[f"F{j+2}"].value = r[0]['pct_checked_out']
    if r[0]['item_format'] == None:
      current_sheet[f"G{j+2}"].value = None
    else:
      current_sheet[f"G{j+2}"].value = r[0]['checkouts_6mo']
    try:
      current_sheet[f"F{j+2}"].fill = PatternFill(
          fill_type='solid',
          fgColor=fill_colors[
            round(r[0]['pct_checked_out'], ndigits=None)
          ]
      )
    except Exception as e:
      # print(e)
      pass

    try:
      if r[0]['pct_checked_out'] >= 50.0:
        color = 'eeeeee' # light gray
      else:
        color = '000000' # black
      current_sheet[f"F{j+2}"].font = Font(
          bold=True,
          color=color
      )
    except:
      pass
    
    print(j, end=', ')
    if (j%20 == 0):
      print()

  # resize the columns in the branch sheet to fit the content
  for col in current_sheet.columns:
     max_length = 0
     column = col[0].column_letter # Get the column name
     for cell in col:
         try: # Necessary to avoid error on empty cells
             if len(str(cell.value)) > max_length:
                 max_length = len(str(cell.value))
         except:
             pass
     # adjusted_width = (max_length + 2) # * 1.2
     adjusted_width = (max_length)
     
     current_sheet.column_dimensions[column].width = adjusted_width

  # Calculate the maximum width of each column based on the longest line in each cell
  # column_widths = []
  # for row in current_sheet.iter_rows():
  #     for i, cell in enumerate(row):
  #         try:
  #             value = str(cell.value)
  #             lines = value.split("\n")
  #             max_line_length = max([len(line) for line in lines])
  #             column_width = column_widths[i] if len(column_widths) > i else 0
  #             column_widths.insert(i, max(column_width, max_line_length))
  #         except TypeError:
  #             pass
  # Set the column widths
  # for i, column_width in enumerate(column_widths):
  #     current_sheet.column_dimensions[openpyxl.utils.get_column_letter(i+1)].width = column_width
  
  # freeze parts of the spreadsheet
  current_sheet.freeze_panes = "D3"

  #TODO put a formula to sum all the values in the sheet

  print(f'{branch_name}: ✅\n')

  # finall, place the link to the branch on the first sheet
  sheet1 = workbook['Summary']
  sheet1[f'A{i+2}'].value = branch_name
  sheet1[f'A{i+2}'].hyperlink = f"#{quote_sheetname(branch_name)}!A1"
  sheet1[f'A{i+2}'].font = Font(bold=True, color='3366cc') 
  sheet1[f"B{i+2}"].value = f"=SUM('{branch_name}'!D:D)"
  sheet1[f"C{i+2}"].value = f"=SUM('{branch_name}'!E:E)"


# resize the columns in the branch sheet to fit the content
for col in sheet1.columns:
    max_length = 0
    column = col[0].column_letter # Get the column name
    for cell in col:
        try: # Necessary to avoid error on empty cells
            if len(str(cell.value)) > max_length:
                max_length = len(str(cell.value))
        except:
            pass
    adjusted_width = (max_length + 2) * 1.2
    sheet1.column_dimensions[column].width = adjusted_width

sheet1.freeze_panes = "D2"

# Save the workbook
workbook.save(
    filename=f"branch_shelf-locations_{date.today().strftime('%Y-%m-%d')}.xlsx"
)

0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, Anderson: ✅

0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, Avondale: ✅

0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, Blue Ash: ✅

0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, Bond Hill: ✅

0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23,

In [7]:
# cell_json = json.dumps(
#     json.loads(r[0]['item_formats']),
#     indent=4
# )


# print(
#     cell_json, 
#     len(cell_json.split('\n'))
# )

# # print(len)


# # >>> import json
# # >>> your_json = '["foo", {"bar": ["baz", null, 1.0, 2]}]'
# # >>> parsed = json.loads(your_json)
# # >>> print(json.dumps(parsed, indent=4))