In [1]:
import re

This regular expression example matches words that begin with at least two uppercase letters or two lowercase letters and prints the result.

In [2]:
pattern = re.compile(r'^[A-Z]{2}[a-zA-Z]*|^[a-z]{2}[a-zA-Z]*')


In [3]:
s = '''UCX 
AM 
PM 
hello 
University 
Programming'''

In [4]:
for word in ['UCX', 'AM', 'PM', 'hello', 'University', 'Programming']:
    match = re.search(pattern, word)
    if match:
        print(match.group())

UCX
AM
PM
hello


This following regex removes comments from code (removes whatever is to the right of the # hashtag simbol in each line).

In [5]:
comments = re.compile(r'(^ *#.*\n)|( #.*\n?)')
replacement = " "
# '(^ *#.*\n)|( #.*\n?)'gs

In [6]:
sentence = '''# this is a comment 
print("hello world") # this is another comment'''


And prints out the code contained in the text sentence....

In [7]:
print(re.sub(comments, replacement, sentence))

 print("hello world") 


Or we can do this... as an example of 

In [8]:
sentence = '''# this is a comment 
"hello world" # this is another comment
"here's some more text"
'''


In [9]:
print(re.sub(comments, replacement, sentence))

 "hello world" "here's some more text"



The follwing regex example matches valid emails.

In [10]:
emails = '''
example@gmail.com
fake_staff@berkeley.edu
test-321-check@my-work.net
another_test@hotmail.cc
test@gmail.c
'''

In [11]:
pattern = re.compile(r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]{2,}')

matches = pattern.finditer(emails)

for match in matches:
    print(match)

<re.Match object; span=(1, 18), match='example@gmail.com'>
<re.Match object; span=(19, 42), match='fake_staff@berkeley.edu'>
<re.Match object; span=(43, 69), match='test-321-check@my-work.net'>
<re.Match object; span=(70, 93), match='another_test@hotmail.cc'>


The following example standardizes a list of phone numbers ('nums' below) to XXX-XXX-XXX format.

In [12]:
# phone_search = re.compile(r'(.*\d{3}.\d{3}.\d{4})')
phone = re.compile(r'[\(]?(\d{3})[ -.]?(\d{3})[ -.]?(\d{4})')


nums = '''(415)555-1212
510-778-1234
408 555 4321
650.444.1213
7073730399'''

matches = phone.finditer(nums)

for match in matches:
    print(match.group())
    print(match)

# match = re.search(phone, nums)
# if match:
#     print(match.group())

(415)555-1212
<re.Match object; span=(0, 13), match='(415)555-1212'>
510-778-1234
<re.Match object; span=(14, 26), match='510-778-1234'>
408 555 4321
<re.Match object; span=(27, 39), match='408 555 4321'>
650.444.1213
<re.Match object; span=(40, 52), match='650.444.1213'>
7073730399
<re.Match object; span=(53, 63), match='7073730399'>


In [13]:
replacement = r'\1-\2-\3'

print(re.sub(phone, replacement, nums))

415-555-1212
510-778-1234
408-555-4321
650-444-1213
707-373-0399
