## Exceptional Trends in S&co Data

This notebook explores the correlation and distribution of exceptional trends in Shakespeare and Company data, specifically in the activity by members and the books they borrowed.

The categories identified for exceptional behavior were the following:
- When a book had a borrow status of missing or unknown
- When a book was borrowed after 1942
- When a book was borrowed beyond the member's stated subscription rate
- When a book was borrowed for far longer than normal
- When a book was borrowed on a Sunday when the shop was technically closed
  
We are particularly interested in the overlap between these behaviors whether for members (i.e. someone who might have had a special relationship with Beach) or for books (i.e. a title that for some reason was more likely be used differently).

#### Load Libraries and Data

In [1]:

import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)

import altair as alt
# from altair_saver import save

from ast import literal_eval

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("..")
from network_analysis.load_datasets import get_updated_shxco_data

In [2]:
members_df, books_df, borrow_events, events_df = get_updated_shxco_data(
    get_subscription=False)


#### Split and clean data

In [3]:
def split_cols(original_df):
    df = original_df[original_df.exceptional_types.isna() == False]
    df.exceptional_types = df.exceptional_types.apply(literal_eval)
    if 'exceptional_counts' in df.columns:
        df.exceptional_counts = df.exceptional_counts.apply(literal_eval)
        df = df.explode(['exceptional_types', 'exceptional_counts'])
    else:
        df = df.explode(['exceptional_types'])
    return df

In [4]:
ex_books_df = split_cols(books_df)
grouped_books = ex_books_df.groupby(
    'exceptional_types').size().reset_index(name='counts')
grouped_books['type'] = 'books'

ex_members_df = split_cols(members_df)
grouped_members = ex_members_df.groupby(
    'exceptional_types').size().reset_index(name='counts')
grouped_members['type'] = 'members'

ex_events_df = split_cols(events_df)
grouped_events = ex_events_df.groupby(
    'exceptional_types').size().reset_index(name='counts')
grouped_events['type'] = 'events'


In [5]:
def update_values(df):
    df.loc[df['exceptional_types'] ==
               'sunday_shopers', 'exceptional_types'] = 'Sunday Shoppers'
    df.loc[df['exceptional_types'] ==
                'longterm_borrows', 'exceptional_types'] = 'Longterm Borrows'
    df.loc[df['exceptional_types'] == 'overborrows',
                'exceptional_types'] = 'Borrows Beyond Subscription'

    df.loc[df['exceptional_types'] ==
                'post1942_events', 'exceptional_types'] = 'Borrows After 1942'

    df.loc[df['exceptional_types'] ==
                'unknown_borrows', 'exceptional_types'] = 'Borrow Status Unknown'

    df.loc[df['exceptional_types'] ==
                'missing_events', 'exceptional_types'] = 'Borrow Status Missing'
    return df


#### Generate Correlation Charts

In [17]:
def get_correlation_df(original_df, col):
    df = split_cols(original_df)
    df.exceptional_counts = df.exceptional_counts.astype(int)
    df = update_values(df)
    grouped_df = df.groupby([col, 'exceptional_types'])['exceptional_counts'].sum().reset_index()
    pivoted_df = grouped_df.pivot(index=col, columns='exceptional_types', values='exceptional_counts').reset_index()
    pivoted_df.fillna(0, inplace=True)
    pivot_cols = grouped_df.exceptional_types.unique().tolist()
    corr_df = pivoted_df[pivot_cols].corr().reset_index()
    corr_df['cat'] = corr_df.exceptional_types
    return corr_df, pivot_cols

def get_correlation_chart(original_df, col, title):
    corr_df, pivot_cols = get_correlation_df(original_df, col)
    base = alt.Chart(corr_df).transform_fold(pivot_cols).encode(
        x=alt.X("cat:N", axis=alt.Axis(title='', labelAngle=-45)),  
        y=alt.Y('key:N', axis=alt.Axis(title=''))
    ).properties(height=300, width=300, title=title)
    boxes = base.mark_rect().encode(color=alt.Color(
        "value:Q", scale=alt.Scale(scheme="redyellowblue")))
    labels = base.mark_text(size=5, color="grey").encode(
        text=alt.Text("value:Q", format="0.1f"))
    chart = boxes + labels
    return chart

In [18]:
members_corr, pivot_cols = get_correlation_df(members_df, 'member_id')
books_corr, pivot_cols = get_correlation_df(books_df, 'id')

In [19]:
members_type = "Correlations in Members' Borrowing Activity Beyond Guidelines"
books_type = "Correlations in Books Being Borrowed Beyond Guidelines"
members_corr['type'] = members_type
books_corr["type"] = books_type

In [20]:
corr_concat = pd.concat([members_corr, books_corr])

In [21]:
base = alt.Chart(corr_concat).transform_fold(pivot_cols).encode(
    x=alt.X("cat:N", axis=alt.Axis(title='', labelAngle=-45)),  
    y=alt.Y('key:N', axis=alt.Axis(title=''))
    ).properties(height=300, width=300)
boxes = base.mark_rect().encode(color=alt.Color(
    "value:Q", scale=alt.Scale(scheme="redyellowblue")))
labels = base.mark_text(size=5, color="grey").encode(
    text=alt.Text("value:Q", format="0.1f"))
chart = boxes + labels
chart.facet(
    row=alt.Row('type:N', header=alt.Header(labelOrient='top'), sort=[members_type,books_type])
    )

In [25]:
chart_books = get_correlation_chart(books_df, 'id', 'Correlations in Exceptional Books')
chart_members = get_correlation_chart(members_df, 'member_id', 'Correlations in Exceptional Members')

alt.vconcat(*[chart_members, chart_books])

#### Trends Across Datasets

In [26]:
grouped_df = pd.concat([grouped_books, grouped_members, grouped_events])
grouped_df = update_values(grouped_df)


In [27]:
grouped_df

Unnamed: 0,exceptional_types,counts,type
0,Longterm Borrows,23,books
1,Borrow Status Missing,4,books
2,Borrows Beyond Subscription,2931,books
3,Borrows After 1942,392,books
4,Sunday Shoppers,246,books
5,Borrow Status Unknown,539,books
0,Longterm Borrows,12,members
1,Borrow Status Missing,4,members
2,Borrows Beyond Subscription,277,members
3,Borrows After 1942,119,members


In [28]:

chart = alt.Chart(grouped_df).mark_bar().encode(
    y=alt.Y('type:O', sort=alt.EncodingSortField(
        field='counts', op='sum', order='descending'), title=''),
    x=alt.X('counts:Q', title=''),
    color=alt.Color('type:N', legend=alt.Legend(title='Library Dataset'), sort=alt.EncodingSortField(field='counts', op='sum', order='descending')),
).properties(
    width=100,
    height=100,
).facet(
    facet=alt.Facet('exceptional_types:O', title=None),
    columns=3,
    title='Rate of Exceptional Behavior in the Library (Scale Independent)'
).resolve_scale(x='independent')
chart
# chart.save('./visualizations/exceptional_metadata.png', scale_factor=2.0)
