# Regular Expressions (RegEx)

In [1]:
text = "The agent's phone number is 408-55-1234. Call Soon!"

In [2]:
'phone' in text

True

In [3]:
import re

In [4]:
pattern = 'phone'

In [5]:
re.search(pattern,text)

<re.Match object; span=(12, 17), match='phone'>

In [6]:
pattern = 'NOT IN TEXT'

In [7]:
re.search(pattern,text)

In [8]:
pattern = 'phone'

In [9]:
match = re.search(pattern,text)

In [10]:
match.span()

(12, 17)

In [12]:
match.start()

12

In [13]:
match.end()

17

In [14]:
text = 'my phone once and my phone twice'

In [15]:
match = re.search(pattern,text)

In [16]:
match # Only returns first match

<re.Match object; span=(3, 8), match='phone'>

In [17]:
matches = re.findall('phone',text)

In [18]:
matches

['phone', 'phone']

In [19]:
len(matches)

2

In [21]:
for match in re.finditer('phone',text): # Iterating through match objects
    print(match.span())

(3, 8)
(21, 26)


In [22]:
text = "The agent's phone number is 408-55-1234. Call Soon!"

In [27]:
phone = re.search('408-55-1234',text)

In [28]:
phone

<re.Match object; span=(28, 39), match='408-55-1234'>

In [29]:
phone = re.search(r'\d\d\d-\d\d-\d\d\d\d',text)

In [30]:
phone

<re.Match object; span=(28, 39), match='408-55-1234'>

In [31]:
phone.group()

'408-55-1234'

In [32]:
phone = re.search(r'\d{3}-\d{2}-\d{4}',text)

In [33]:
phone

<re.Match object; span=(28, 39), match='408-55-1234'>

In [34]:
phone_pattern = re.compile(r'(\d{3})-(\d{2})-(\d{4})')

In [35]:
phone_pattern

re.compile(r'(\d{3})-(\d{2})-(\d{4})', re.UNICODE)

In [36]:
results = re.search(phone_pattern,text)

In [38]:
results.group()

'408-55-1234'

In [39]:
results.group(1)

'408'

In [40]:
results.group(2)

'55'

In [41]:
results.group(3)

'1234'

# Additional Regex Syntax

In [44]:
re.search(r'cat|dog','the dog is here') # cat or dog

<re.Match object; span=(4, 7), match='dog'>

In [48]:
re.findall(r'.at','The catin the hat went splat') # .at also catches the word behind

['cat', 'hat', 'lat']

In [49]:
re.findall(r'...at','The catin the hat went splat') # .at also catches the word behind

['e cat', 'e hat', 'splat']

In [53]:
re.findall(r'^\d','1 is a number') # checks if entire text starts with number

['1']

In [54]:
re.findall(r'\d$','The 2 is a number 4') # checks if entire text starts with number

['4']

In [55]:
phrase = 'There are 3 numbers 34 inside 5 this sentence'

In [59]:
pattern = r'[^\d]+' # Exclude any digits by using [] # +

In [60]:
re.findall(pattern,phrase)

['There are ', ' numbers ', ' inside ', ' this sentence']

In [61]:
terst_phrase = 'This is a string! But it has puntuation. How can we remove it ?'

In [63]:
clean = re.findall(r'[^!.?]+',terst_phrase)

In [64]:
clean

['This is a string', ' But it has puntuation', ' How can we remove it ']

In [65]:
' '.join(clean)

'This is a string  But it has puntuation  How can we remove it '

In [71]:
text = 'Only find the hypen-word in this sentence. But you do not know how long-ish they are'

In [76]:
pattern = r'[\w]+-[\w]+' # w for alphanumeric

In [77]:
re.findall(pattern,text)

['hypen-word', 'long-ish']

In [78]:
text = 'Hello, would you like to take a catfish?'
texttwo = "Hello, would you like to take a catnap?"
textthree = "Hello, would you like to take a caterpillar?"

In [82]:
re.search(r'cat(fish|nap|erpillar)',textthree)

<re.Match object; span=(32, 43), match='caterpillar'>

# Timing your Python Code

In [1]:
def func1(n):
    return [str(num) for num in range(n)]

In [2]:
func1(10)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [3]:
def func2(n):

    return list(map(str,range(n)))

In [4]:
func2(10)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [6]:
import time

In [11]:
# Current time before
start_time = time.time()

#Run code
result = func1(10000000)

#Current time after running code

end_time = time.time()

#Elapsed time
elapsed_time = end_time - start_time

print(elapsed_time)

2.901036024093628


In [12]:
# Current time before
start_time = time.time()

#Run code
result = func2(10000000)

#Current time after running code

end_time = time.time()

#Elapsed time
elapsed_time = end_time - start_time

print(elapsed_time)

2.307056188583374


## func2 is faster

In [13]:
import timeit

In [15]:
help(timeit.timeit)

Help on function timeit in module timeit:

timeit(stmt='pass', setup='pass', timer=<built-in function perf_counter>, number=1000000, globals=None)
    Convenience function to create Timer object and call timeit method.



In [34]:
stmt = '''
func1(100)
'''

In [35]:
setup = '''
def func1(n):
    return [str(num) for num in range(n)]
'''

In [36]:
timeit.timeit(stmt,setup,number=1000000)

22.827612989000045

In [37]:
stmt2 = '''
func2(100)
'''

In [38]:
setup2 = '''
def func2(n):

    return list(map(str,range(n)))
'''

In [39]:
timeit.timeit(stmt2,setup2,number=1000000)

20.374486331999947

In [43]:
%%timeit # Only for Jupiter Notebook
func1(100000)

29.8 ms ± 7.62 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [44]:
%%timeit # Only for Jupiter Notebook
func2(100000)

22.9 ms ± 3.74 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Zipping and Unzipping Files

In [45]:
f = open('fileone.txt','w+')
f.write('ONE FILE')
f.close()

In [46]:
f = open('filetwo.txt','w+')
f.write('TWO FILE')
f.close()

In [47]:
import zipfile

In [48]:
comp_file = zipfile.ZipFile('comp_file.zip','w') # Created a Zip File

In [49]:
comp_file.write('fileone.txt',compress_type=zipfile.ZIP_DEFLATED)

In [50]:
comp_file.write('filetwo.txt',compress_type=zipfile.ZIP_DEFLATED)

In [51]:
comp_file.close()

## Extracting items from zip file

In [53]:
zip_obj = zipfile.ZipFile('comp_file.zip','r')

In [54]:
zip_obj.extractall('extracted_content')

## Turning a folder into Zip file & vice-versa

In [55]:
pwd

'/Users/adityatiwari/Desktop/PythonBootcamp/MyPractice'

In [56]:
import shutil

In [57]:
dir_to_zip = '/Users/adityatiwari/Desktop/PythonBootcamp/MyPractice/extracted_content'

In [58]:
output_filename = 'Example'

In [59]:
shutil.make_archive(output_filename,'zip',dir_to_zip) # Making a Zip File

'/Users/adityatiwari/Desktop/PythonBootcamp/MyPractice/Example.zip'

In [60]:
shutil.unpack_archive('example.zip','file_unzip','zip') # Made a folder out of example.zip by extracting it