In [None]:
from collections import defaultdict, Counter
import logging
import re

from .utils import InvalidUsage

def text_statistics(text, tokens):
    return {"Word Count" : len(tokens),
            "Char Length" : len(text)}
            
def word_counts(tokens,
    filter_words=[],
    lower_case=False):
    logging.info("Calling word counts")
    logging.debug(" ".join(tokens))
    
    # TODO: THERE IS PROBABLY A SMARTER WAY TO DO TYPE VERIFICATION
    # PROBABLY WILL WRITE HELPER FUNCTIONS IF NOTHING ELSE EXISTS, TBD
    tks_type = type(tokens)
    if tks_type != list:
        error_message = "Expected tokens to be list, recieved {}".format(tks_type)
        logging.error(error_message)
        raise InvalidUsage(error_message)
    
    if not tokens:
        error_message = "Expected list with values"
        logging.error(error_message)
        raise InvalidUsage(error_message)
        
    tk_type = type(tokens[0])
    if tk_type != str:
        error_message = "Expected token to be str, recieved {}".format(tk_type)
        logging.error(error_message)
        raise InvalidUsage(error_message)
        
    fws_type = type(filter_words)
    if fws_type != list and fws_type != set:
        error_message = "Expected filter_words to be set or list, recieved {}".format(fws_type)
        logging.error(error_message)
        raise InvalidUsage(error_message)
    
    if len(filter_words) > 0:
        fw_type = type(filter_words[0])
        if fw_type != str:
            error_message = "Expected filter_word to be str, recieved {}".format(fw_type)
            logging.error(error_message)
            raise InvalidUsage(error_message)


    lc_type = type(lower_case)  
    if lc_type != bool:
        error_message = "Expected lower_case to be bool, recieved {}".format(lc_type)
        logging.error(error_message)
        raise InvalidUsage(error_message)       
        
    if lower_case:
        logging.info("Lower casing input tokens")
        tokens = [tok.lower() for tok in tokens]
        logging.debug(" ".join(tokens))
        
    token_counts = dict(Counter(tokens))
    
    if filter_words:
        logging.info("Filter token count")
        logging.debug("Filtered words are: " + " ".join(filter_words))
        token_counts = {tok:count for tok, count in token_counts.items() if tok in filter_words}
        
    return token_counts


def keepAlpha(sentence):
    denumbered_string = re.sub( r'[0-9]+', '', sentence)
    single_space_string = re.sub( r'\s+',' ',denumbered_string)
    return single_space_string

def fed_law(text, string_to_search = 'Federal Law'):
    fed_dict = {}
    text_type = type(text)
    if text_type != str:
        error_message = "Expected text to be str, recieved {}".format(text_type)
        logging.error(error_message)
        raise InvalidUsage(error_message)
    if re.search(r"\b" + re.escape(keepAlpha(string_to_search.lower())) + r"\b", keepAlpha(text.lower())):
        fed_dict['Found'] = True
    else:
        fed_dict['Found'] = False
    return fed_dict
        
def bank_mention(text, string_to_search = 'The Bank'):
    bank_dict = {}
    text_type = type(text)
    if text_type != str:
        error_message = "Expected text to be str, recieved {}".format(text_type)
        logging.error(error_message)
        raise InvalidUsage(error_message)
    if re.search(r"\b" + re.escape(keepAlpha(string_to_search.lower())) + r"\b", keepAlpha(text.lower())):
        bank_dict['Found'] = True
    else:
        bank_dict['Found'] = False
    return bank_dict  