# Basic Regex Examples 

In [1]:
import re
import time

In [2]:
def match_example(text):
    '''
    Match one or more digits at the beginning of string
    '''
    pattern = r'\d+'
    match = re.match(pattern, text)
    
    if match:
        print('Match')
        print('Text: {0}. Length: {1}\n'.format(text, len(match.group(0))))
    else:
        print('No Match.')
        print('Text: {}'.format(text))

    
    

In [3]:
match_example('1234')

match_example('ABC')

Match
Text: 1234. Length: 4

No Match.
Text: ABC


In [4]:
def search_example(text):
    '''
    Search for one or more digits in the entire string.
    '''
    pattern = r'\d+'
    match = re.search(pattern, text)
    
    if match:
        print('Match')
        print('Text: {0}. Length: {1}\n'.format(text, len(match.group(0))))
    else:
        print('No Match.')
        print('Text: {}'.format(text))

In [5]:
search_example('ABCD123')

search_example('PQRS456XYZ')

search_example('NoNumbers')

search_example('Numbers 123 and spaces')

Match
Text: ABCD123. Length: 3

Match
Text: PQRS456XYZ. Length: 3

No Match.
Text: NoNumbers
Match
Text: Numbers 123 and spaces. Length: 3



In [6]:
def digits_only(string):
    '''Check if the entire string is made out of digits.'''
    
    pattern = r'^\d+$'
    
    match = re.search(pattern, text)
    
    if match:
        print('Match')
        print('Text: {0}. Length: {1}'.format(text, len(match.group(0))))
    else:
        print('No Match.')
        print('Text: {}'.format(text))

In [7]:
positiveTest = ['123', '03081993', '456', '246810']
negativeTest = ['ABCDEFG', 'AFIF', 'ABU', '123ABC']


print('Positive Test')
for text in positiveTest:
    digits_only(text)
print()
print('Negative Test')
for text in negativeTest:
    digits_only(text)

Positive Test
Match
Text: 123. Length: 3
Match
Text: 03081993. Length: 8
Match
Text: 456. Length: 3
Match
Text: 246810. Length: 6

Negative Test
No Match.
Text: ABCDEFG
No Match.
Text: AFIF
No Match.
Text: ABU
No Match.
Text: 123ABC


In [8]:
def postal_codes(text):
    '''Find all postal codes within given text.'''
    
    pattern = r'\b\d{5}\b'
    
    match_iter = re.finditer(pattern, text)
    
    for match in match_iter:
        print('Match')
        print('Text: {0}. Index: {1}'.format(match.group(0), match.start()))


In [9]:
string = 'Kuala Lumpur uses codes from 50000 to 60000. Putrajaya uses codes from 62300 to 62988.'

In [10]:
postal_codes(string)

Match
Text: 50000. Index: 29
Match
Text: 60000. Index: 38
Match
Text: 62300. Index: 71
Match
Text: 62988. Index: 80


In [11]:
def timestamps(text):
    '''Break down timestamps into individual components.'''
    
    pattern = r'(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})(?P<hour>\d{2})?'
    
    match_iter = re.finditer(pattern, text)
    
    for match in match_iter:
        print('Match. Text: {0} Index: {1} Length: {2}'.format(text,
                                                               match.start(),
                                                               len(match.group(0))))
        print('Access by components:')
        print('Year: ', match.group('year'))
        print('Month: ', match.group('month'))
        print('Day: ', match.group('day'))
        print('Hour: ', match.group('hour'))


In [12]:
timestamps('timestamp=20190719')

Match. Text: timestamp=20190719 Index: 10 Length: 8
Access by components:
Year:  2019
Month:  07
Day:  19
Hour:  None


In [13]:
def grouping_by_number(text):
    '''Divide different parts of the text into groups.'''
    
    pattern = r'(\d{4})(\d{2})(\d{2})(\d{2})?'
    
    match_iter = re.finditer(pattern, text)
    
    for match in match_iter:
        print('Match. Text: {0} Index: {1} Length: {2}'.format(text,
                                                               match.start(),
                                                               len(match.group(0))))
        
        for count,value in enumerate(match.groups()):
            print('Group: {0}, Value: {1}'.format(count+1, value))

In [14]:
grouping_by_number('timestamps=20190719')

Match. Text: timestamps=20190719 Index: 11 Length: 8
Group: 1, Value: 2019
Group: 2, Value: 07
Group: 3, Value: 19
Group: 4, Value: None


In [15]:
def substitute_currency(text):
    
    pattern = r'(?P<value>\d+(,\d{3})*(\.\d{2})?)\s+ringgit(s)?'
    
    replacement = r'**MYR \g<value>**'
    
    print('Text:\n{0}\n'. format(text))
    
    new_text = re.sub(pattern, replacement, text)
    
    print('New Text:\n{0}'. format(new_text))

In [16]:
price =\
'''Unit Cost of Laptop: 2,500.50 ringgit
Tax: 15.00 ringgit
Total: 2,515.50 ringgit'''

In [17]:
substitute_currency(price)

Text:
Unit Cost of Laptop: 2,500.50 ringgit
Tax: 15.00 ringgit
Total: 2,515.50 ringgit

New Text:
Unit Cost of Laptop: **MYR 2,500.50**
Tax: **MYR 15.00**
Total: **MYR 2,515.50**


In [18]:
def fahrenheit_to_celcius(match):
    degF = float(match.group("fahrenheit"))
    degCelcius = (degF - 32.0) * 5.0/9.0
    return '{0}°C'.format(round(degCelcius));

In [19]:
def temperature_convert(text):
    '''Convert temperature unit from Fahrenheit to Celcius'''
    pattern = r"(?P<fahrenheit>\d+)\u00B0F"
    
    print('Text: ', format(text))
    
    new_text = re.sub(pattern, fahrenheit_to_celcius, text)
    
    print('Text after: ',format(new_text))

In [20]:
text = "Today's temperature is 70°F"

In [21]:
temperature_convert(text)

Text:  Today's temperature is 70°F
Text after:  Today's temperature is 21°C


In [22]:
def split_list(some_list):
    '''Split a long list into separate items.'''
    
    pattern = r"\d+\.\s*"
    
    print("Before split: ", some_list)
    
    split_text = re.split(pattern, some_list)
    
    print("After split: ")
    for each in split_text:
        print('{}'.format(each))

In [23]:
some_list = ("I like 1.coffee 2.kueytiow 3.chatime 4.maggi")

In [24]:
split_list(some_list)

Before split:  I like 1.coffee 2.kueytiow 3.chatime 4.maggi
After split: 
I like 
coffee 
kueytiow 
chatime 
maggi
