## Module 12 Regular Expressions 

#### exercise 1

#### frequent word used in a sentence 

In [1]:
from collections import Counter
import re

# The given paragraph
paragraph = """I love teaching. If you do not love teaching what else can you love. 
I love Python if you do not love something which can give you all the capabilities to develop an application what else can you love."""

# Step 1: Clean the text (remove punctuation and convert to lowercase)
cleaned_paragraph = re.sub(r'[^\w\s]', '', paragraph).lower()

# Step 2: Split the text into words
words = cleaned_paragraph.split()

# Step 3: Count the occurrences of each word
word_counts = Counter(words)

# Step 4: Find the most frequent word(s)
most_frequent_word = word_counts.most_common(1)

# Output
print("Most frequent word and its count:", most_frequent_word)


Most frequent word and its count: [('love', 6)]


#### Distance Between the Furthest Particles

In [2]:
import re

# Text containing particle positions
text = """The position of some particles on the horizontal x-axis are -12, -4, -3 and -1 in the negative direction, 0 at origin, 4 and 8 in the positive direction."""

# Step 1: Extract the numbers
points = list(map(int, re.findall(r'-?\d+', text)))

# Step 2: Sort the points
sorted_points = sorted(points)

# Step 3: Calculate the distance between the furthest particles
distance = sorted_points[-1] - sorted_points[0]

# Output
print("Extracted points:", points)
print("Sorted points:", sorted_points)
print("Distance between the furthest particles:", distance)


Extracted points: [-12, -4, -3, -1, 0, 4, 8]
Sorted points: [-12, -4, -3, -1, 0, 4, 8]
Distance between the furthest particles: 20


### Valid Python Variable Name

Valid python variable has the following features  
* Must start with a letter or an underscore.
* Can only contain alphanumeric characters and underscores.
* Cannot start with a digit.
* Cannot use Python reserved keywords.

In [4]:
import re

def is_valid_variable(var_name):
    # Check if the variable name matches the valid pattern
    pattern = r'^[a-zA-Z_][a-zA-Z0-9_]*$'
    return bool(re.match(pattern, var_name))

# Test cases
print(is_valid_variable('first_name'))
print(is_valid_variable('first-name')) 
print(is_valid_variable('1first_name'))
print(is_valid_variable('firstname')) 


True
False
False
True


#### exercise 3
Clean Text and Find Most Frequent Words

In [5]:
from collections import Counter
import re

# Original sentence
sentence = '''%I $am@% a %tea@cher%, &and& I lo%#ve %tea@ching%;. There $is nothing; &as& mo@re rewarding as educa@ting &and& @emp%o@wering peo@ple. ;I found tea@ching m%o@re interesting tha@n any other %jo@bs. %Do@es thi%s mo@tivate yo@u to be a tea@cher!?'''

def clean_text(text):
    # Remove non-alphanumeric characters except spaces and convert to lowercase
    cleaned = re.sub(r'[^\w\s]', '', text).lower()
    return cleaned

def most_frequent_words(text, top_n=3):
    # Split text into words and count frequencies
    words = text.split()
    word_counts = Counter(words)
    return word_counts.most_common(top_n)

# Step 1: Clean the text
cleaned_text = clean_text(sentence)

# Step 2: Find the most frequent words
top_words = most_frequent_words(cleaned_text)

# Output
print("Cleaned text:", cleaned_text)
print("Most frequent words:", top_words)


Cleaned text: i am a teacher and i love teaching there is nothing as more rewarding as educating and empowering people i found teaching more interesting than any other jobs does this motivate you to be a teacher
Most frequent words: [('i', 3), ('a', 2), ('teacher', 2)]
