# Introduction to the Reader Toolbox module

Given a Distant Reader data set ("study carrel"), the Reader Toolbox module is an interface to the dataset. This module is an introduction to such functionality

In [None]:
# configure; what carrel do you want to read and what do you find interesting?
CARREL = 'homer'
QUERY  = 'love'


In [None]:
# require; import the Reader Toolbox's functionality
import rdr


In [None]:
# get and output rudimentary provenance information; from whence did this carrel come?
creator     = rdr.provenance( CARREL, 'creator' )
dateCreated = rdr.provenance( CARREL, 'dateCreated' )
print( "       Creator: %s" % creator )
print( "  Date created: %s" % dateCreated )


In [None]:
# get and output rudimentary extent information; how big is the carrel?
items  = rdr.extents( CARREL, 'items' )
words  = rdr.extents( CARREL, 'words' )
flesch = rdr.extents( CARREL, 'flesch' )
print( "      Total number of items: %s" % str( items ) )
print( "      Total number of words: %s" % str( words ) )
print( "  Average readability score: %s" % str( flesch ) )


In [None]:
# bibliography; what items are in this carrel?
bibliography = rdr.bibliography( CARREL )
print( bibliography )


In [None]:
# cluster; use feature reduction (PCA) to determine how holistic the carrel may be
rdr.clusters( CARREL )


In [None]:
# ngrams; sans stop words, count & tabulate unigrams
unigrams = rdr.ngramss( CARREL, count=True )
print( unigrams )


In [None]:
# visualize ngrams; output a word cloud of unigram counts & tabulations
rdr.ngramss( CARREL, count=True, wordcloud=True )


In [None]:
# concordance; perform a rudimentary key-word-in-context search against carrel
print( rdr.concordancing( CARREL, query=QUERY ) )


In [None]:
# semantic indexing; an additional way to knowing a word by the company it keeps
print( rdr.word2vec( CARREL, query=QUERY ) )


In [None]:
# full text indexing; "Librarians like to search. Everybody else likes to find."
print( rdr.searching( CARREL, query=QUERY ) ) 
