# Genre Spread

This notebook will make counts and determine the spread of the top time markers across books and genres in the Hebrew Bible. The time markers with even spreads across books in the Hebrew Bible will be marked for special inquiry.

In [8]:
# import modules
import collections, pickle
from pprint import pprint
from tf.fabric import Fabric

In [28]:
TF = Fabric(modules='hebrew/etcbc4c', silent=True)
api = TF.load('''
                book chapter verse
                txt domain
              ''')
api.makeAvailableIn(globals())

  0.00s loading features ...
   |     0.01s B book                 from /Users/Cody/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B chapter              from /Users/Cody/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B verse                from /Users/Cody/github/text-fabric-data/hebrew/etcbc4c
   |     0.02s B txt                  from /Users/Cody/github/text-fabric-data/hebrew/etcbc4c
   |     0.02s B domain               from /Users/Cody/github/text-fabric-data/hebrew/etcbc4c
   |     0.00s Feature overview: 103 for nodes; 5 for edges; 1 configs; 7 computed
  4.44s All features loaded/computed - for details use loadLog()


In [4]:
# import time markers data
tm_data_file = 'data/time_markers.pickle'

# load data
with open(tm_data_file, 'rb') as infile:
    tm_data = pickle.load(infile)

print('data available: ', ', '.join(tm_data.keys()))

data available:  markers, top_markers, stats_rows, preposition_cl_lists


In [10]:
# assign the data
markers = tm_data['markers']
top_markers = tm_data['top_markers']
stats_rows = tm_data['stats_rows']

print('marker data available:')
print(markers['L <WLM'].keys())

marker data available:
dict_keys(['count', 'clauses', 'tense_cl_lists', 'tense_counts', 'tense_percents', 'example_phrase'])


## Map Clause Atom Nodes to Genre by Marker



In [43]:
time_marker_genres = collections.defaultdict(lambda: collections.defaultdict(list))
time_marker_domains = collections.defaultdict(lambda: collections.defaultdict(list))

# loop and map
for marker, count in top_markers:
        
    for clause in markers[marker]['clauses']:
                        
        # get the books embedding genre; thus index 0
        genre = F.txt.v(clause)[0]
        domain = F.domain.v(clause)
        
        time_marker_genres[marker][genre].append(clause)
        time_marker_domains[marker][domain].append(clause)
        
pprint(time_marker_domains['<D MTJ'])

defaultdict(<class 'list'>,
            {'Q': [433607,
                   433627,
                   452974,
                   454756,
                   457156,
                   462775,
                   473226,
                   473252,
                   474210,
                   476297,
                   477949,
                   484877,
                   487911,
                   492742,
                   493155,
                   493249,
                   499390,
                   499713,
                   507546]})


In [41]:
T.text(L.d(457691, otype='word'))

T.sectionFromNode(457691)

('2_Samuel', 7, 24)

In [37]:
pprint(time_marker_genres)

defaultdict(<function <lambda> at 0x1f65191e0>,
            {'<D <WLM': defaultdict(<class 'list'>,
                                    {'?': [488863, 508632, 511860],
                                     'D': [446357, 469533],
                                     'N': [433854,
                                           434088,
                                           448067,
                                           453013,
                                           453172,
                                           453278,
                                           453283,
                                           455513,
                                           455643,
                                           457650,
                                           457658,
                                           457659,
                                           457691,
                                           457696,
                                           457700,
       

                                          455204,
                                          456904,
                                          475016]}),
             'MXR': defaultdict(<class 'list'>,
                                {'N': [433446,
                                       433475,
                                       433611,
                                       433993,
                                       434480,
                                       441475,
                                       441698,
                                       441739,
                                       447986,
                                       448060,
                                       448111,
                                       449772,
                                       449801,
                                       449816,
                                       449820,
                                       452495,
                                       452729,
