## Imports

In [132]:
import pandas as pd
import bibleAnalysisFunctions as bf
import nltk as nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import numpy as np


pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows',None)


## Functions to be Reused

In [125]:
def GetVerseReferences(verseReferenceListing):
    '''Given a book, chapter, verse listing, return three lists that contain those elements broken out in that order.'''
    
    verseReferenceListing = [ele[::-1] for ele in verseReferenceListing]
    
    verses = []
    chapters = []
    books = []

    for ele in verseReferenceListing:
        temp_list_1 = ele.split(':')
        verses.append(temp_list_1[0][::-1])
        temp_list_2 = temp_list_1[1].split(' ',1)
        chapters.append(temp_list_2[0][::-1])
        books.append(temp_list_2[1][::-1])

    return books, chapters, verses

def GetMaxVerseLength(verseListing):
    '''Given a list of verse texts, return the maximum verse length in terms of word count along with a dictionary of verse references and their lengths'''
    lenDict = {}
    counter = 0
    for ele in verseListing:
        temp_list = ele.split(' ') 
        lenDict[counter] = len(temp_list)
        counter = counter + 1
    maxLen = max(lenDict.values())
    return maxLen, lenDict

def IndicateWordPresence(bible_dataframe_verse_series, word_list):
    '''given a dataframe column containing verse texts and a list of pertinent words, return a column as a list that indicates for each verse whether one of those words is present --- boolean column'''
    word_indicator = [] #initialize list indicating whether words are present in verses
    for ele in bible_dataframe_verse_series:
        if any(item in word_list for item in word_tokenize(ele)):
            word_indicator.append(True)
        else: word_indicator.append(False)
    return word_indicator

def LoadBible(bible_excel_file):
    '''given an excel file containing the bible, return a dataframe for analysis'''
    bible_columns = 'BookChapterVerse','VerseText'
    bible = pd.read_excel(bible_excel_file).drop([0]).reset_index(drop=True)
    bible.columns = bible_columns
    bookChapterVerse = bible.BookChapterVerse
    book, chapter, verse = GetVerseReferences(bookChapterVerse)
    bible['Book'] = book
    bible['Chapter'] = chapter
    bible['Verse'] = verse
    return bible

## Variables to be Referenced

In [None]:
tree_words = ['tree','forest','shrug','bark','eucalyptus','oak','birch','pine','root','acacia','beech','trunk','leaf','grove','seed']

## Iterative Exploratory Analysis

In [128]:
akjv = LoadBible('akjv.xlsx')
asv = LoadBible('asv.xlsx')
# bsb = LoadBible('bsb.xlsx')
cpdv = LoadBible('cpdv.xlsx')
kjv = LoadBible('kjv.xlsx')
ylt = LoadBible('ylt.xlsx')


akjv['tree_word_indicator'] = IndicateWordPresence(akjv.VerseText,tree_words)
akjv['light_dark_word_indicator'] = IndicateWordPresence(akjv.VerseText,['light','dark'])