# Quantifying Shakespeare

In this notebook, we will:

1. Read all of Shakespeare's works into a list of strings
2. Count the frequencies of letters used
3. Visualize the results of the frequencies

The dataset we will use is <https://bit.ly/shakespeare-txt>

In [None]:
# Read the lines into a list

from io import TextIOWrapper


def read_lines(filename: str) -> list[str]:
    """Read every line from filename into a list."""
    lines: list[str] = []
    file_handle: TextIOWrapper = open(filename, "r")
    for line in file_handle:
        line = line.lower()
        line = line.strip()
        lines.append(line)
    file_handle.close()
    return lines


shakespeare_lines: list[str] = read_lines("./shakespeare.txt")
print(len(shakespeare_lines))

In [None]:
# Tally up characters

# Goal: Define a functions named tally
# It will take two parameters
# 1. A Reference to a dictionary whose keys are str and values are int
# 2. A str that refers to a  key in the dicitonary you are tallying
# Logic:
    # If the str / key is in the dictionary, increase its value by one.
    # Otherwise, set the key's value to 1 in the dictionay
    # This function returns nothing.


def tally(x: dict[str, int], y: str) -> None:
    """Keeps a tally of how many times a specific character appears within a dictionary."""
    if y in x: 
        x[y] += 1
    else:
        x[y] = 1
    return None


def count_letters(lines: list[str]) -> dict[str, int]:
    """Count the frequencies of all letters in lines."""
    counts: dict[str, int] = {}
    for line in lines:
        for char in lines:
            #if char >= "a" or char <= "z":
            if char.isalpha():
                tally(counts, char)
    return counts


shakespeare_letters: dict[str, int] = count_letters(shakespeare_lines)
shakespeare_letters

In [None]:
# Organizes the frequencies of characters by the order they appear in the alphabet, rather than the frequencies

items_in_dict: list[tuple[str, int]] = list[shakespeare_letters.items()]
items_in_dict = sorted(items_in_dict)
shakespeare_letters_sorted: dict[str, int] = dict(items_in_dict)
shakespeare_letters_sorted

In [None]:
# Make a bar chart

from matplotlib import pyplot

pyplot.title("Frequencies of Letters")
pyplot.xlabel("Letters")
pyplot.ylabel("Frequencies")
labels: list[str] = list(shakespeare_letters.keys())
values: list[int] = list(shakespeare_letters.values())