In [24]:
from urllib.request import urlopen, Request
from urllib.error import URLError, HTTPError

URL = "https://www.gutenberg.org/cache/epub/10/pg10.txt"

In [25]:
pg10 = ""

try:
    req = Request(URL, headers={"User-Agent": "Mozilla/5.0"})
    with urlopen(req) as resp:
        raw = resp.read()  # bytes
        try:
            pg10 = raw.decode("utf-8")
        except UnicodeDecodeError:
            pg10 = raw.decode("latin-1")
    print("Download complete. Characters in pg10:", len(pg10))
except HTTPError as e:
    print("HTTP error:", e.code, e.reason)
except URLError as e:
    print("URL error:", e.reason)

Download complete. Characters in pg10: 4451818


In [26]:
def tokenize_alpha(text):
    word = []
    for ch in text:
        if ch.isalpha():             
            word.append(ch.lower())  
        else:
            if word:
                yield "".join(word)
                word = []
    if word:  
        yield "".join(word)

def count_lines(text):
    return len(text.splitlines())

def count_words(text):
    total = 0
    for _ in tokenize_alpha(text):
        total += 1
    return total

def count_word_case_insensitive(text, target):
    target = target.lower()
    cnt = 0
    for w in tokenize_alpha(text):
        if w == target:
            cnt += 1
    return cnt

def most_common_word(text):
    freq = {}
    for w in tokenize_alpha(text):
        freq[w] = freq.get(w, 0) + 1
    if not freq:
        return None, 0

    best_word = None
    best_count = -1
    for w, c in freq.items():
        if c > best_count:
            best_word, best_count = w, c
    return best_word, best_count


In [27]:
line_count = count_lines(pg10)

word_count = count_words(pg10)

apostle_count = count_word_case_insensitive(pg10, "apostle")

common_word, common_count = most_common_word(pg10)

print("Lines:", line_count)
print("Words (alphabetic-only):", word_count)
print('Occurrences of "apostle" (case-insensitive):', apostle_count)
print("Most common word (case-insensitive):", common_word, "→", common_count)

Lines: 99968
Words (alphabetic-only): 795227
Occurrences of "apostle" (case-insensitive): 47
Most common word (case-insensitive): the → 64309
