## Identifying phone numbers


### Pattern : 999-999-8766

In [1]:
def isPhoneNumber(number):
    pass
        

In [2]:
number = "999-999-9999"

g1, g2, g3 = number.split("-")

In [3]:
print(f"group 1 {g1}\ngroup 2 {g2}\ngroup 3 {g3}")

group 1 999
group 2 999
group 3 9999


In [4]:
"9999".isdecimal()

True

In [5]:
"al987".isalnum()

True

In [6]:
"999".isdigit()

True

In [7]:
def isPhoneNumber(number, marker="-"):
    if len(number) == 12:
        group1, group2, group3 = number.split(marker)
        
        if group1.isdigit() and group2.isdigit() and group3.isdigit():
            return True
    return False

In [8]:
string = "999-000-1234"
isPhoneNumber(string)

True

In [9]:
string = "+91 999*000*1234"
isPhoneNumber(string, "*")

False

In [10]:
string = f"My number is {string}"
print(string)

My number is +91 999*000*1234


##  Approach

1. Import the regex module with `import re`.

2. Create a Regex object with the `re.compile()` function. (Remember to use a raw string.)

3. Pass the string you want to search into the Regex object’s `search()` method. This returns a Match object.

4. Call the Match object’s `group()` method to return a string of the actual matched text.


Shorthand character class: Represents

\d: Any numeric digit from 0 to 9.

\D: Any character that is not a numeric digit from 0 to 9.

\w: Any letter, numeric digit, or the underscore character. (Think of this as matching “word” characters.)

\W: Any character that is not a letter, numeric digit, or the underscore character.

\s: Any space, tab, or newline character. (Think of this as matching “space” characters.)

\S: Any character that is not a space, tab, or newline.

In [11]:
import re

# 999-999-8766

pattern = r'\d\d\d-\d\d\d-\d\d\d\d'

# regular expression object
reg = re.compile(pattern)

In [12]:
string = "My number is 999-999-9999"

m = reg.search(string)

In [13]:
m.group()

'999-999-9999'

## Grouping with `()`

In [14]:
pattern = r"(\d\d\d)-(\d\d\d-\d\d\d\d)"

reg = re.compile(pattern)

In [15]:
m = reg.search(string)

In [16]:
m.group()

'999-999-9999'

In [17]:
m.group(1)

'999'

In [18]:
m.group(2)

'999-9999'

## Shorthand 

In [19]:
pattern = r"\d{3}-\d{3}-\d{4}"

reg = re.compile(pattern)

In [20]:
m = reg.search(string)

In [21]:
m.group()

'999-999-9999'

## Finding all

In [22]:
string = "My number is 999-999-9999, 888-888-8999"

In [23]:
pattern

'\\d{3}-\\d{3}-\\d{4}'

In [24]:
m = reg.findall(string)

In [25]:
m

['999-999-9999', '888-888-8999']

In [26]:
def sqr(x):
    return x**2

In [27]:
sqr(4)

16

In [28]:
x = lambda x: x**2

In [29]:
x(2)

4

## Matching a Parenthesis

In [191]:
pattern = r'(\(\d\d\d\))-\d\d\d-\d\d\d\d'
# re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')

In [192]:
reg = re.compile(pattern)

In [193]:
string = "(999)-999-9999"

In [194]:
m = reg.search(string)

In [195]:
m.group()

'(999)-999-9999'

In [196]:
m.group(1)

'(999)'

## Matching Multiple Groups with the Pipe(|)

In [50]:
pattern = r"Gopal|Singh"
reg = re.compile(pattern)

In [53]:
string = "My name is Singh."

In [54]:
reg.findall(string)

['Singh']

## Matching with the same pre-string value

In [66]:
string = "Batman mobile lost a wheel"

pattern = r'Bat(man|mobile|copter|bat)'

In [67]:
reg = re.compile(pattern)

In [68]:
reg.findall(string)

['man']

In [69]:
m = reg.search(string)

In [70]:
m.group()

'Batman'

In [71]:
m.group(1)

'man'

## Optional Matching with the Question Mark

In [78]:
pattern = r'Spider(wo)?man'

reg = re.compile(pattern)

In [79]:
string = "The tales of Spiderman."
m = reg.search(string)

In [80]:
m.group()

'Spiderman'

In [81]:
string = "The tales of Spiderwoman."
m = reg.search(string)

In [82]:
m.group()

'Spiderwoman'

In [83]:
pattern = r'(\d\d\d)? (\d\d\d-\d\d\d\d)'

In [84]:
string = "My number is 999-9999"

In [85]:
reg = re.compile(pattern)
m = reg.search(string)

In [86]:
m.group()

' 999-9999'

In [84]:
string = "My number is 999 999-9999"

In [85]:
reg.search(string).group()

'999 999-9999'

## Matching Zero or More with the Star (*)

Meaning --> Either it will be completely absent or repeated over and over again.

zero or more times repetiotion

In [100]:
pattern = r'Spider(wo)*man'
reg = re.compile(pattern)

In [101]:
string = "The adventures of Spiderman"

In [102]:
m = reg.search(string)

In [103]:
m.group()

'Spiderman'

In [104]:
string = "The adventures of Spiderwowowoman"
m = reg.search(string)

In [105]:
m.group()

'Spiderwowowoman'

## Matching One or More with the Plus (+)

In [106]:
pattern = r'Spider(wo)+man'
reg = re.compile(pattern)

In [109]:
string = "The adventures of Spiderman"

In [113]:
m = reg.search(string)
print(m)

None


In [114]:
m.group()

AttributeError: 'NoneType' object has no attribute 'group'

In [118]:
string = "The adventures of Spiderwowowowoman"

In [119]:
m = reg.search(string)

In [120]:
m.group()

'Spiderwowowowoman'

## Matching Specific Repetitions with Braces

In [127]:
pattern = r"(Ha){3}"

string = "HaHaHaHaHa"

reg = re.compile(pattern)

In [128]:
m = reg.search(string)

In [129]:
m.group()

'HaHaHa'

In [130]:
string = "HaHa"

In [131]:
m = reg.search(string)

In [132]:
m.group()

AttributeError: 'NoneType' object has no attribute 'group'

In [151]:
pattern = r"(Ha){3,}"

In [152]:
reg = re.compile(pattern)

In [153]:
string = "HaHaHa"

m = reg.search(string)

In [154]:
m.group()

'HaHaHa'

In [155]:
string = "HaHaHaHaHaHa"
m = reg.search(string)

In [156]:
m.group()

'HaHaHaHaHaHa'

## Making your own character classes

In [197]:
pattern = r'[aeiouAEIOU]'

In [198]:
vowelReg = re.compile(pattern)

In [199]:
string = "My name is Gopal Singh."

In [200]:
vowelReg.findall(string)

['a', 'e', 'i', 'o', 'a', 'i']

In [205]:
pattern = r'[a-zA-Z0-9\@\!\+]'

In [209]:
string = "My number is 9999@!" 

In [210]:
reg = re.compile(pattern)

In [211]:
reg.findall(string)

['M',
 'y',
 'n',
 'u',
 'm',
 'b',
 'e',
 'r',
 'i',
 's',
 '9',
 '9',
 '9',
 '9',
 '@',
 '!']

## The caret and dollar sign characters

`^` --> Carot (Matching a Starting Value and the value can be anything)

`$` --> Dollar (Matching the Lasting value and the value can be anything)

In [218]:
pattern = r'^Hello'

hello_reg = re.compile(pattern)

string = "Hello, My name is Gopal."

In [219]:
m = hello_reg.search(string)

In [220]:
m.group()

'Hello'

In [221]:
pattern = r'^HelloGopal.$'

hello_reg = re.compile(pattern)

string = "HelloGopal."

In [222]:
m = hello_reg.search(string)

In [223]:
m.group()

'HelloGopal.'

In [236]:
string = "My number is 999"

In [237]:
regNum = re.compile(r'\d{3}$')

In [238]:
m = regNum.search(string)

In [239]:
m.group()

'999'

## The WildCard Character (.)

In [263]:
string = "The cat in the hat doing aawhat ?"

In [270]:
atreg = re.compile(r'\w.at')

In [271]:
atreg.search(string)

<_sre.SRE_Match object; span=(27, 31), match='what'>

In [272]:
atreg.findall(string)

['what']

## Matching Everything with dot-star (.*)

In [273]:
reg = re.compile(r'First Name:(.*)Last Name:(.*)')

In [286]:
string = "First Name: Gopal Last Name:Singh"

In [287]:
reg.search(string)

<_sre.SRE_Match object; span=(0, 33), match='First Name: Gopal Last Name:Singh'>

In [288]:
reg.findall(string)

[(' Gopal ', 'Singh')]

## Case-Insensitive Matching

In [290]:
reg1 = re.compile(r'Gopal')
reg2 = re.compile(r'gopal')
reg3 = re.compile(r'GOPAL')

In [293]:
reg = re.compile(r'gopal', re.I)

In [294]:
string = "My name is GOPAL"

In [295]:
reg.findall(string)

['GOPAL']

In [296]:
reg.search(string)

<_sre.SRE_Match object; span=(11, 16), match='GOPAL'>