# Chapter 7 - RegEx

In [63]:
!powershell [regex]::new('') ^| Get-Member



   TypeName: System.Text.RegularExpressions.Regex

Name                MemberType Definition                                                                              
----                ---------- ----------                                                                              
Equals              Method     bool Equals(System.Object obj)                                                          
GetGroupNames       Method     string[] GetGroupNames()                                                                
GetGroupNumbers     Method     int[] GetGroupNumbers()                                                                 
GetHashCode         Method     int GetHashCode()                                                                       
GetObjectData       Method     void ISerializable.GetObjectData(System.Runtime.Serialization.SerializationInfo info,...
GetType             Method     type GetType()                                                              

In [14]:
import re

In [21]:
phone_re = re.compile(r'\d{3}-\d{3}-\d{4}')
result = phone_re.search('my number is 212-123-4567')
print(f'Found phone number {result.group()}')

Found phone number 212-123-4567


In [16]:
# https://kevinmarquette.github.io/2017-07-31-Powershell-regex-regular-expression/#select-string

In [25]:
!powershell \
$PhoneRe = [regex]::new('\d{3}-\d{3}-\d{4}') ;\
$Result = $PhoneRe.Match('my number is 212-123-4567') ;\
Write-Host "Found phone number $Result.Value"

Found phone number 212-123-4567


### Grouping with parentheses

In [54]:
phone_re = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
match = phone_re.search('my number is 212-123-4567')
print(match)
print(match.group(1))
print(match.group(2))
print(match.group(0))
print(match.group())
area_code, number = match.groups()
print(f'Area: {area_code} Number: {number}')

<re.Match object; span=(13, 25), match='212-123-4567'>
212
123-4567
212-123-4567
212-123-4567
Area: 212 Number: 123-4567


In [55]:
!powershell \
$PhoneRe = [regex]::new('(\d\d\d)-(\d\d\d-\d\d\d\d)') ;\
$Result = $PhoneRe.Match('my number is 212-123-4567') ;\
$Result ;\
$Result.Groups[1].Value ;\
$Result.Groups[2].Value ;\
$Result.Groups[0].Value ;\
$Result.Value ;\
$Full, $Area, $Number = $Result.Groups.Value ;\
Write-Host "Area: $Area Number: $Number"



Groups   : {0, 1, 2}
Success  : True
Name     : 0
Captures : {0}
Index    : 13
Length   : 12
Value    : 212-123-4567

212
123-4567
212-123-4567
212-123-4567
Area: 212 Number: 123-4567




### Matching multiple options with pipe

In [82]:
hero_re = re.compile (r'Batman|Tina Fey')
print(hero_re.search('Batman and Tina Fey'))
print(hero_re.search('Batwoman and Tina Fey'))

<re.Match object; span=(0, 6), match='Batman'>
<re.Match object; span=(13, 21), match='Tina Fey'>


In [91]:
!powershell \
$Hero_re = [regex]::new('Batman^|tina Fey') ;\
$Hero_re.match('Batman and Tina Fey') ;\
$Hero_re.match('Batwoman and Tina Fey')



Groups   : {0}
Success  : True
Name     : 0
Captures : {0}
Index    : 0
Length   : 6
Value    : Batman

Groups   : {0}
Success  : True
Name     : 0
Captures : {0}
Index    : 13
Length   : 8
Value    : Tina Fey





### Find all

In [62]:
ssn = re.compile(r'\d\d\d-\d\d-\d\d\d\d')
ssn.findall('My social is 123-54-3592. Mom\'s is 549-23-1236')

['123-54-3592', '549-23-1236']

In [68]:
!powershell \
$Ssn = [regex]::new('\d\d\d-\d\d-\d\d\d\d') ;\
Write-Host $Ssn.Matches('My social is 123-54-3592. Mom''s is 549-23-1236').Value

123-54-3592 549-23-1236


### Using brackets for multiple character options

In [177]:
print(re.findall(r'[abcde]', 'hi there, how are you?'))
!powershell Write-Host ([regex]::new('[.]*').Matches('hi there, how are you?').Value)

['e', 'e', 'a', 'e']
                      


re.IGNORECASE, re.DOTALL, and re.VERBOSE

In [108]:
# ignore case
print(re.match(r'hello', 'Hello world!', re.IGNORECASE))
!powershell [regex]::new('(?i:hello)').Match('Hello world!')

<re.Match object; span=(0, 5), match='Hello'>


Groups   : {0}
Success  : True
Name     : 0
Captures : {0}
Index    : 0
Length   : 5
Value    : Hello





In [109]:
# Ignore whitespace
print(re.match(r'\d\d\d-\d\d     -\d\d\d\d', '123-45-6788', re.VERBOSE))
!powershell [regex]::new('(?x:\d\d\d-\d\d-     \d\d\d\d)').Match('123-45-6788')

<re.Match object; span=(0, 11), match='123-45-6788'>


Groups   : {0}
Success  : True
Name     : 0
Captures : {0}
Index    : 0
Length   : 11
Value    : 123-45-6788





In [134]:
with open('regex_data.txt', 'w') as f:
    f.write('''LINE 1
LINE 2
LINE 3''')

In [167]:
# Dotall includes new-lines; Powershell.. not sure how to get this one to work
with open('regex_data.txt') as f:
    data = f.read()
    print(re.search('.*', data))
    print(re.search('.*', data, re.DOTALL))
!powershell Write-Host ([regex]::new('.*').Match((Get-Content regex_data.txt -Raw))) ;\
Write-Host ([regex]::new('[.]*').Match((Get-Content regex_data.txt -Raw)))

<re.Match object; span=(0, 6), match='LINE 1'>
<re.Match object; span=(0, 20), match='LINE 1\nLINE 2\nLINE 3'>
LINE 1
                      


In [158]:
!powershell \
(Get-Content regex_data.txt).length ;\
(Get-Content regex_data.txt -Raw).length ;\
echo (Get-Content regex_data.txt -Raw)

3
22
LINE 1
LINE 2
LINE 3
