In [1]:
import duckdb

db_file = "../jupyter/ils-analytics-import/circ_trans_master.db"

con = duckdb.connect(':memory:')
cursor = con.cursor()

con.install_extension('sqlite')
con.load_extension('sqlite')

sql = f"""\
CALL sqlite_attach('{db_file}', overwrite=false);
"""
cursor.execute(sql)

<duckdb.DuckDBPyConnection at 0x7f96da887bf0>

In [2]:
%%time

sql = """\
SELECT
    -- map_stat_group.location_code,
    map_location.name as location_name,
    count(DISTINCT circ_trans.patron_record_id) as count_distinct_patrons
FROM
    circ_trans
    left outer join map_stat_group on map_stat_group.code = circ_trans.stat_group_code_num
    left outer join map_location on map_location.code = map_stat_group.location_code
WHERE
    circ_trans.patron_record_id IN (
        SELECT
            patron_record_id
        FROM
            circ_trans
        WHERE
            circ_trans.op_code in ('o','i')
            -- AND circ_trans.transaction_gmt >= '2022-01-01'
            AND circ_trans.stat_group_code_num in (
                480,
                481,
                482,
                483,
                484
            )
            -- AND circ_trans.ptype_code < '196'
        GROUP BY 
            circ_trans.patron_record_id 
    )
    AND circ_trans.op_code in ('o','i')
    AND map_location.name NOT IN (
        'Distribution Center',
        'Main Library'
    )
GROUP BY 1
ORDER BY 2 DESC
"""

cursor.execute(sql)
data = cursor.fetchall()
columns=[description[0] for description in cursor.description]

# clean up the cursor / connection
cursor.close()
del(cursor)
con.close()

CPU times: user 1min 31s, sys: 48.1 s, total: 2min 19s
Wall time: 16.6 s


In [10]:
import pandas as pd
import altair as alt

df = pd.DataFrame(
    data=data, 
    columns=columns
)

alt.Chart(df).mark_bar().encode(
    x=alt.X('count_distinct_patrons'),
    y=alt.Y('location_name', sort='-x'),
    tooltip=('location_name', 'count_distinct_patrons')
).properties(
    title='',
    width=900
)

In [4]:
df

Unnamed: 0,location_name,count_distinct_patrons
0,Sharonville,19481
1,Blue Ash,5102
2,Symmes Township,3936
3,Madeira,2995
4,Deer Park,2702
5,North Central,2686
6,Reading,2478
7,Anderson,2188
8,Wyoming,2108
9,Forest Park,2073


In [5]:
df.to_csv('sharonville_users_other_branches.csv')