## Regular Expression

In [1]:
import re

In [3]:
text = "The agent's phone number is 555-555-1234. Call soon!"

In [4]:
'phone' in text

True

In [5]:
pattern = 'phone'

In [6]:
re.search(pattern, text)

<re.Match object; span=(12, 17), match='phone'>

In [7]:
pattern = 'NOT IN TEXT'

In [8]:
re.search(pattern, text)

In [9]:
pattern = 'phone'

In [10]:
match = re.search(pattern, text)

In [11]:
match

<re.Match object; span=(12, 17), match='phone'>

In [12]:
match.span()

(12, 17)

In [13]:
match.start()

12

In [14]:
match.end()

17

In [15]:
text = 'my phone once, my phone twice'

In [16]:
match = re.search('phone', text)

In [18]:
match

<re.Match object; span=(3, 8), match='phone'>

In [19]:
matches = re.findall('phone', text)

In [20]:
matches

['phone', 'phone']

In [21]:
len(matches)

2

In [25]:
for match in re.finditer('phone', text):
    print(match)
    print(match.span())
    print(match.group()+"\n")

<re.Match object; span=(3, 8), match='phone'>
(3, 8)
phone

<re.Match object; span=(18, 23), match='phone'>
(18, 23)
phone



## Regular Expression Patern Codes
### Character Identifiers
         Character               Description           Example Pattern Code            Example Match
      ------------------------------------------------------------------------------------------------------------
          \d                        A Digit             file_\d\d                       file_25
          
          \w                   Alpha Numeric            \w-\w\w\w                       A-b_1
          
          \s                   White Space               a\sb\sc                        a b c
          
          \D                   A non digit               \D\D\D                         ABC
          
          \W                  Non-Alpha Numeric          \W\W\W\W\W                     *-+=)
          
          \S                  Non-Whitespace            \S\S\S\S                        Yoyo
      

In [41]:
text = "My phone number is 123-456-7890"

In [42]:
phone = re.search(r'\d\d\d-\d\d\d-\d\d\d\d', text)

In [43]:
phone

<re.Match object; span=(19, 31), match='123-456-7890'>

In [44]:
phone.group()

'123-456-7890'

### Quantifiers
        +              Occurs one or more times            Version \w-\w+              Version A-b1_1
        
        {3}            Occurs exactly 3 times              \D{3}                       abc
        
        {2,4}          Occurs 2 to 4 times                 \d{2,4}                     123
        
        {3,}           Occurs 3 or more                    \w{3,}                      anycharacters
        
        *              Occurs 0 or more times              A*B*C*                      AAACC
        
        ?              Once or none                        plurals?                     plural

In [45]:
text

'My phone number is 123-456-7890'

In [46]:
phone = re.search(r'\d{3}-\d{3}-\d{4}', text)

In [47]:
phone

<re.Match object; span=(19, 31), match='123-456-7890'>

In [48]:
phone_pattern = re.compile(r'(\d{3})-(\d{3})-(\d{4})')

In [49]:
results = re.search(phone_pattern, text)

In [51]:
results.group()

'123-456-7890'

In [52]:
results.group(1)

'123'

In [53]:
results.group(2)

'456'

## Additional Regex Syntax

In [58]:
re.search(r'cat|dog', 'The dog is here')  # | -> Search for cat or dog in the string

<re.Match object; span=(4, 7), match='dog'>

In [62]:
re.findall(r'...at', 'The cat and the hat and sat and splat there.')  # . -> Grab the previous letters of the pattern, wildcards

['e cat', 'e hat', 'd sat', 'splat']

In [64]:
re.findall(r'^\d', "1 is a number")  # ^ -> Whether the String I am searching starts with a NUMBER

['1']

In [67]:
re.findall(r'\d$', "Number is 2")  # $ -> Whether the String I am searching ends with a NUMBER

['2']

In [68]:
phrase = "There are 3 numbers 34 inside 5 this sentence"

In [73]:
pattern = r'[^\d]+'   # Exclude the Numbers in a phrase

In [74]:
re.findall(pattern, phrase)

['There are ', ' numbers ', ' inside ', ' this sentence']

In [75]:
test_phrase = "This is a string! But is has punctuation. How can we remove it?"

In [78]:
words = re.findall(r'[^!.? ]+', test_phrase)

In [80]:
' '.join(words)

'This is a string But is has punctuation How can we remove it'

In [81]:
text = "Only find the hypen-words in this sentence. But you do not know how long-ish they are"

In [86]:
pattern = r'[\w]+-[\w]+'

In [87]:
re.findall(pattern, text)

['hypen-words', 'long-ish']

In [88]:
text = "Hello, would you like some catfish?"
texttwo = "Hello, would you like to take a catnap?"
textthree = "Hello, have you seen this caterpillar?"

In [92]:
re.search(r'cat(fish|nap|erpillar)', textthree)

<re.Match object; span=(26, 37), match='caterpillar'>