# Read Text

In [73]:
import re

In [74]:
def egrep(regex, text):
    """
    Filters lines of text that match the given regex pattern.
    
    :param regex: Regular expression pattern to search for.
    :param text: Input text (can be a multi-line string).
    :return: A list of matching lines.
    """
    matching_lines = []
    for line in text.splitlines():  # Split text into lines
        if re.search(regex, line):  # Check if the line matches the pattern
            matching_lines.append(line)  # Add matching lines to the list
    return matching_lines


In [75]:
with open("test.txt", "r") as file:
    text = file.read()

In [76]:
# Example: Find lines with numbers
regex = r"[0-9]"  # Regular expression to match any digit
matches = egrep(regex, text)

# Print the matching lines
for line in matches:
    print(line)

This is line 1.
This line has a number: 123.
Another number: 456.


# Counting Lines

In [77]:
# Convert the input text into a list of lines (simulating sys.stdin)
import sys
from io import StringIO

In [78]:
# Redirect sys.stdin to simulate input
sys.stdin = StringIO(text)

In [79]:
# Now the script can run as-is
count = 0
for line in sys.stdin:
    count += 1

In [80]:
count

4

# Most common words from input text

In [81]:
from collections import Counter

In [88]:
def most_common_words(text, num_words):
    """
    Counts the most common words in the given text and returns the top N words.
    
    :param text: Input text (can be a multi-line string).
    :param num_words: Number of top words to return.
    :return: A list of tuples (word, count) for the most common words.
    """
    counter = Counter(
        word.lower()  # Convert words to lowercase
        for line in text.splitlines()  # Split text into lines
        for word in line.strip().split()  # Split words
        if word  # Ignore empty words
    )
    return counter.most_common(num_words)


In [89]:
# Read text from a file
file_path = "test.txt"  # Replace with your file path
with open(file_path, "r") as file:
    text = file.read()  # Read the entire file content into a string

In [90]:
text

'This is line 1.\nThis line has a number: 123.\nNo numbers here.\nAnother number: 456.'

In [92]:
num_words = 3  # Number of top words to show
top_words = most_common_words(text, num_words)

# Print the results
for word, count in top_words:
    print(f"{count}\t{word}")

2	this
2	line
2	number:
