# Regular Expressions in Python

1) Check wether the given strings contain 0xA0. Display the line and a boolean result.

In [11]:
import re

lines = ['start address: 0xA0', 'func address: 0xC0',
         'end address: 0xFF', 'func address: 0xB0']

pattern = re.compile(r'0xA0')

for line in lines:
    print(line,bool(pattern.search(line)))


start address: 0xA0 True
func address: 0xC0 False
end address: 0xFF False
func address: 0xB0 False


2) Replace all occurrences of 5 with five for the given strings

In [13]:
lines = ['They run 5 kilometers in 5 minutes',
         'They were only 5 girls',
         'We are eating 5 apples and 5 oranges']

pattern = re.compile(r'5')

result = []
for line in lines:
    result.append(pattern.sub('five',line))
print(result)


['They run five kilometers in five minutes', 'They were only five girls', 'We are eating five apples and five oranges']


3) Replace only the first occurrence of 5 with five for the given strings

In [14]:
lines = ['They run 5 kilometers in 5 minutes',
         'They were only 5 girls',
         'We are eating 5 apples and 5 oranges']

pattern = re.compile(r'5')

result = []
for line in lines:
    result.append(pattern.sub('five',line,count=1))
print(result)

['They run five kilometers in 5 minutes', 'They were only five girls', 'We are eating five apples and 5 oranges']


4) For the given list, filter all elements that contain eiter a or w.

In [19]:
words = ['fast','some','wonderful','army','wally','waterpolo','food','eat','new','corn']

pattern = re.compile(r'a|w')

words = [word for word in words if pattern.search(word)]

print(words)

['fast', 'wonderful', 'army', 'wally', 'waterpolo', 'eat', 'new']


5) For the given list, filter all elements that contain both e and n.

In [20]:
words = ['fast','some','wonderful','army','wally','waterpolo','food','eat','new','corn']

pattern1 = re.compile(r'e')
pattern2 = re.compile(r'n')

words = [word for word in words if pattern1.search(word) and pattern2.search(word)]

print(words)

['wonderful', 'new']


6. For the given list, filter all strings that start with be

In [21]:
lines = ['be nice', 'I will be a king','"best"','better?',' bet']

pattern = re.compile(r'^be')

lines = [line for line in lines if pattern.search(line)]
print(lines)

['be nice', 'better?']


7. For the given input string, change only the whole word red to brown.

In [22]:
words = 'bred red spread credible red.'

pattern = re.compile(r'\bred\b')

words = pattern.sub('brown',words)
print(words)

bred brown spread credible brown.


8. For the given input list, filter all elements that contain 42 surrounded by word characters.

In [25]:
words = ['hi42bye', 'nice1423', 'bad42', 'cool_42a', '42fake', '_42_', 'lucky42man','manimal_42 firestorm','xxx\n42_27']

pattern = re.compile(r'\B42\B')

words = [word for word in words if pattern.search(word)]
print(words)

['hi42bye', 'nice1423', 'cool_42a', '_42_', 'lucky42man']


9. For the given input list, filter all elements that start with den or end with ly.

In [27]:
words = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent']

pattern = re.compile(r'\Aden|ly\Z')

words = [word for word in words if pattern.search(word)]
print(words)

['lovely', '2 lonely', 'dent']


10. For the given input list, replace hand with X for all elements that start with hand followed by at least one word character.

In [30]:
words = ['handed', 'hand', 'handy', 'un-handed', 'handle', 'hand-2']

pattern = re.compile(r'\Ahand\B')

words = [pattern.sub('X',word) for word in words]
print(words)

['Xed', 'hand', 'Xy', 'un-handed', 'Xle', 'hand-2']


11. For the given strings, replace all occurrences of removed or reed or received or refused with X.

In [32]:
strings = ['creed refuse removed read','refused reed redo received']

pattern = re.compile(r're(mov|ceiv|fus)?ed')

strings = [pattern.sub('X',string) for string in strings]
print(strings)


['cX refuse X read', 'X X redo X']


12. Replace all occurrences of \e with e.

In [33]:
string = r'th\er\e ar\e common asp\ects among th\e alt\ernations'
pattern = re.compile(r'\\e')

string = pattern.sub('e',string)
print(string)

there are common aspects among the alternations


13. For the list items, filter all elements starting with hand and ending immediately with at most one character or le.

In [35]:
items = ['handed', 'hand', 'handled', 'handy', 'unhand', 'hands', 'handle']

pattern = re.compile(r'\Ahand(.|le)?\Z')

items = [item for item in items if pattern.search(item)]
print(items)

['hand', 'handy', 'hands', 'handle']


14. For the given input strings, remove everything from the first occurrence of i till the end of the string.

In [36]:
strings = ['remove the special meaning of such constructs',
           'characters while constructing',
           'input output']

pattern = re.compile(r'i.*\Z')

strings = [pattern.sub('',string) for string in strings]
print(strings)

['remove the spec', 'characters wh', '']


15. For the input list words, filter all elements starting with s or t and having a maximum of 6 characters.

In [38]:
words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 't set', 'site']

pattern = re.compile('\A[st].{0,5}\Z')

words = [word for word in words if pattern.search(word)]
print(words)

['subtle', 'sets', 't set', 'site']


16. For the given strings, extract the matching portion from the first is to the last t

In [39]:
strings = ['This the biggest fruit you have seen?',
           'Your mission is to read and practice consistently']

pattern = re.compile('(is.*t)')

strings = [pattern.search(string).group() for string in strings]
print(strings)

['is the biggest fruit', 'ission is to read and practice consistent']


17. Find the starting index of the first occurrence of is or the or was or to for the given input strings.

In [40]:
strings = ['match after the last newline character',
           'and then you want to test',
           'this is good bye then',
           'who was there to see?']

pattern = re.compile(r'is|the|was|to')

indices = [pattern.search(string).start() for string in strings]
print(indices)

[12, 4, 2, 4]


18. The given input string contains : exactly once. Extract all characters after the : as output.

In [42]:
items = 'fruits:apple, mango, guava, blueberry'

pattern = re.compile(r':(.*)')

print(pattern.search(items).group(1))

apple, mango, guava, blueberry


19. Delete from ( to the next occurrence of ) unless they contain parentheses characters in between.

In [45]:
strings = ['def factorial()',
           'a/b(division) + c%d(#modulo) - (e+(j/k-3)*4)',
           'Hi there(greeting). Nice day(a(b)']
pattern = re.compile(r'\([^\(\)]*\)')

strings = [pattern.sub('',string) for string in strings]
print(strings)

['def factorial', 'a/b + c%d - (e+*4)', 'Hi there. Nice day(a']


20. Given a list of strings, find all occurances of the digits in each string.

In [46]:
time_sentences = ["Monday: The doctor's appointment is at 2:45pm.", 
                  "Tuesday: The dentist's appointment is at 11:30 am.",
                  "Wednesday: At 7:00pm, there is a basketball game!",
                  "Thursday: Be back home by 11:15 pm at the latest.",
                  "Friday: Take the train at 08:10 am, arrive at 09:00am."]

pattern = re.compile(r'\d')

digits = [pattern.findall(sentence) for sentence in time_sentences]
print(digits)

[['2', '4', '5'], ['1', '1', '3', '0'], ['7', '0', '0'], ['1', '1', '1', '5'], ['0', '8', '1', '0', '0', '9', '0', '0']]


21. Given a list of strings, group and find the hours and minutes in each string.

In [47]:
time_sentences = ["Monday: The doctor's appointment is at 2:45pm.", 
                  "Tuesday: The dentist's appointment is at 11:30 am.",
                  "Wednesday: At 7:00pm, there is a basketball game!",
                  "Thursday: Be back home by 11:15 pm at the latest.",
                  "Friday: Take the train at 08:10 am, arrive at 09:00am."]

pattern = re.compile(r'(\d{1,2}):(\d{2})')

times = [pattern.findall(sentence) for sentence in time_sentences]
print(times)


[[('2', '45')], [('11', '30')], [('7', '00')], [('11', '15')], [('08', '10'), ('09', '00')]]


22. Given a list of strings, replace weekdays with ???.

In [48]:
time_sentences = ["Monday: The doctor's appointment is at 2:45pm.", 
                  "Tuesday: The dentist's appointment is at 11:30 am.",
                  "Wednesday: At 7:00pm, there is a basketball game!",
                  "Thursday: Be back home by 11:15 pm at the latest.",
                  "Friday: Take the train at 08:10 am, arrive at 09:00am."]

pattern = re.compile(r'\w+day\b')

time_sentences = [pattern.sub('???',sentence) for sentence in time_sentences]
print(time_sentences)

["???: The doctor's appointment is at 2:45pm.", "???: The dentist's appointment is at 11:30 am.", '???: At 7:00pm, there is a basketball game!', '???: Be back home by 11:15 pm at the latest.', '???: Take the train at 08:10 am, arrive at 09:00am.']


23. Given a list of strings, replace weekdays with 3 letter abbrevations.

In [50]:
time_sentences = ["Monday: The doctor's appointment is at 2:45pm.", 
                  "Tuesday: The dentist's appointment is at 11:30 am.",
                  "Wednesday: At 7:00pm, there is a basketball game!",
                  "Thursday: Be back home by 11:15 pm at the latest.",
                  "Friday: Take the train at 08:10 am, arrive at 09:00am."]

pattern = re.compile(r'(\w{3})\w*day\b')

time_sentences = [pattern.sub(pattern.search(sentence).group(1),sentence) for sentence in time_sentences]
print(time_sentences)

["Mon: The doctor's appointment is at 2:45pm.", "Tue: The dentist's appointment is at 11:30 am.", 'Wed: At 7:00pm, there is a basketball game!', 'Thu: Be back home by 11:15 pm at the latest.', 'Fri: Take the train at 08:10 am, arrive at 09:00am.']


24. Given a list of strings, extract the entire time, the hours, the minutes, and the period for each string.

In [57]:
time_sentences = ["Monday: The doctor's appointment is at 2:45pm.", 
                  "Tuesday: The dentist's appointment is at 11:30 am.",
                  "Wednesday: At 7:00pm, there is a basketball game!",
                  "Thursday: Be back home by 11:15 pm at the latest.",
                  "Friday: Take the train at 08:10 am, arrive at 09:00am."]

pattern = re.compile(r'(\d{1,2}:\d{2}\s?(?:am|pm))')

time_sentences = [pattern.findall(sentence) for sentence in time_sentences]
print(time_sentences)

[['2:45pm'], ['11:30 am'], ['7:00pm'], ['11:15 pm'], ['08:10 am', '09:00am']]


25. Replace the space character that occurs after a word ending with a or r with a newline character.

In [65]:
string = 'area not a _a2_ roar took 22'

pattern = re.compile(r'([ar])\s')

print(pattern.sub(r'\1\n',string))

area
not a
_a2_ roar
took 22
