In [59]:
import re


def find_occurrences(text, search_term):
    if " " in search_term:
        raise Exception(f"'{search_term}' is not a word")
    search_term = search_term.lower()
    pattern = r'\b{}\b'.format(re.escape(search_term))
    matches = re.findall(pattern, text)
    count = len(matches)
    return count


def get_columns_width(d, left_header, right_header, left_footer, right_footer):
    left_column_width = max([
        len(left_header),
        len(left_footer),
        max_string_len(d.keys())
    ])
    right_column_width = max([
        len(right_header),
        len(right_footer),
        max_string_len([str(v) for v in d.values()])
    ])
    return left_column_width, right_column_width


def max_string_len(l):
    return max([len(i) for i in l])


def pad_word(word, colum_width, direction):
    assert direction in ('left', 'right'), f"direction shoud be one of ('left', 'right') instead of '{direction}'"
    padding = " " * (colum_width - len(word))
    if direction == 'left':
        padded_word = " " + word + padding + " "
    elif direction == 'right':
        padded_word = " " + padding + word + " "
    return padded_word


def pad_line(left, right, left_column_width, right_column_width):
    padded_left = pad_word(word=left, colum_width=left_column_width, direction='left')
    padded_right = pad_word(word=right, colum_width=right_column_width, direction='right')
    line = f"|{padded_left}|{padded_right}|"
    return line


def generate_markdown_table(d):
    left_header = "WORD"
    right_header = "COUNT"
    left_footer = "TOTAL"
    right_footer = str(sum(d.values()))

    left_column_width, right_column_width = get_columns_width(
        d=d,
        left_header=left_header,
        right_header=right_header,
        left_footer=left_footer,
        right_footer=right_footer,
    )

    separator_line = f"|{'-' * (left_column_width + 2)}|{'-' * (right_column_width + 2)}|"

    header_line = pad_line(left_header, right_header, left_column_width, right_column_width)
    footer_line = pad_line(left_footer, right_footer, left_column_width, right_column_width)

    content_lines = []
    for left, right in d.items():
        right = str(right)
        content_line = pad_line(left, right, left_column_width, right_column_width)
        content_lines.append(content_line)

    markdown_table_rows = [
        separator_line,
        header_line,
        separator_line,
        *content_lines,
        separator_line,
        footer_line,
        separator_line,
    ]
    summary = "\n".join(markdown_table_rows)
    return summary


def word_count_summary(file_path, search_term):
        
    with open(file_path, 'r') as file:
        text = file.read().lower()
        # I'm lowering the case of the text and search term to make this search function case-insensitive
    try:
        if isinstance(search_term, str):
            count = find_occurrences(text=text, search_term=search_term)
            summary = "The word '{}' appears {} time(s).\n".format(search_term.lower(), count)
        elif isinstance(search_term, list):
            count_dict = {
                term: find_occurrences(text=text, search_term=term) for term in search_term
            }
            summary = generate_markdown_table(d=count_dict)
        else:
            raise ValueError("search_term should one of: str or list[str]")
    except Exception as e:
        return e
    return summary

In [60]:
file_path = 'data/pride_and_prejudice.txt'

In [61]:
print(word_count_summary(file_path, "You"))

The word 'you' appears 1326 time(s).



In [62]:
# Case insensitive search

print(word_count_summary(file_path, "you"))

The word 'you' appears 1326 time(s).



In [63]:
# Works even if there is no results

print(word_count_summary(file_path, "yuo"))

The word 'yuo' appears 0 time(s).



In [64]:
# Handles invalid input
print(word_count_summary(file_path, "two words"))

'two words' is not a word


In [69]:
# Works in list version
print(word_count_summary(file_path, ["you", "are", "so", "beautiful"]))

|-----------|-------|
| WORD      | COUNT |
|-----------|-------|
| you       |  1326 |
| are       |   338 |
| so        |   592 |
| beautiful |    15 |
|-----------|-------|
| TOTAL     |  2271 |
|-----------|-------|


In [70]:
# Handles invalid input in list version
print(word_count_summary(file_path, ["you", "are", "so beautiful"]))

'so beautiful' is not a word


In [87]:
# Handles column width automatically for longer words
print(word_count_summary(file_path, ["communicativeness", "misrepresentation", "the"]))

|-------------------|-------|
| WORD              | COUNT |
|-------------------|-------|
| communicativeness |     1 |
| misrepresentation |     1 |
| the               |  4333 |
|-------------------|-------|
| TOTAL             |  4335 |
|-------------------|-------|
