# Examples with Regular Expressions

In [1]:
# Import package
import re  

In [2]:
# Acknowledgements:
# Uses content from - https://docs.python.org/3/library/re.html,
# https://www.w3schools.com/python/python_regex.asp,
# https://realpython.com/regex-python-part-2/

In [3]:
# String (data) to experiment with
data = "The CSCE 771 course is taught at University this Fall!"
pattern = "[tT]h"
m = re.search(pattern, data)
m.group()

'Th'

In [4]:
m = re.findall(pattern, data)
print(m)

['Th', 'th']


In [5]:
# Find this and the 
pattern = "[tT]h[ei]+."
m = re.findall(pattern, data)
print(m)

['The ', 'this']


In [6]:
pattern = "[tT]+\w"
m = re.findall(pattern, data)
print(m)

['Th', 'ta', 'ty', 'th']


In [7]:
# Selective extraction
# Search for a series of alphanumeric characters after it matches '-'
# See syntax: https://docs.python.org/3/library/re.html 
m = re.search(r'(?<=-)\w+', 'self-taught person')
print(m)

<re.Match object; span=(5, 11), match='taught'>


In [8]:
# Example: An extraction of known information type

In [9]:
m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
m.group(0)       # The entire match

'Isaac Newton'

In [10]:
m.group(1)       # The first parenthesized subgroup.

'Isaac'

In [11]:
m.group(1, 2)    # Multiple arguments will return a tuple.

('Isaac', 'Newton')

In [12]:
# Cleaning a string with regex
email = "tony@tiremove_thisger.net"
m = re.search("remove_this", email)
email[:m.start()] + email[m.end():]

'tony@tiger.net'

# Using Wordnet

In [13]:
import nltk
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to /Users/biplavs/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [14]:
# Based on example at https://pythonprogramming.net/wordnet-nltk-tutorial/
from nltk.corpus import wordnet

In [15]:
# Find synnets of given data word
data = "language"

In [16]:
# Find and then pring
syns = wordnet.synsets(data)
print (syns)

[Synset('language.n.01'), Synset('speech.n.02'), Synset('lyric.n.01'), Synset('linguistic_process.n.02'), Synset('language.n.05'), Synset('terminology.n.01')]


In [17]:
# See name, definition, examples
print(syns[0].name(), syns[0].definition(), syns[0].examples())

language.n.01 a systematic means of communicating by the use of sounds or conventional symbols ['he taught foreign languages', 'the language introduced is standard throughout the text', 'the speed with which a program can be executed depends on the language in which it is written']


In [18]:
def getWordNetInfo(word):

    # See synonyms and antonyms
    synonyms = []
    antonyms = []

    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                antonyms.append(l.antonyms()[0].name())

    print("Synonyms - " + str(set(synonyms)))
    print("Antonyms - " + str(set(antonyms)))

In [19]:
getWordNetInfo(data)

Synonyms - {'words', 'linguistic_process', 'oral_communication', 'spoken_language', 'speech_communication', 'terminology', 'spoken_communication', 'linguistic_communication', 'lyric', 'nomenclature', 'speech', 'language', 'voice_communication'}
Antonyms - set()


In [20]:
getWordNetInfo("take")

Synonyms - {'consume', 'consider', 'shoot', 'conduct', 'accept', 'admit', 'issue', 'need', 'bring', 'take_away', 'subscribe', 'subscribe_to', 'have', 'read', 'remove', 'hold', 'ingest', 'necessitate', 'use_up', 'strike', 'train', 'assume', 'payoff', 'postulate', 'study', 'look_at', 'withdraw', 'takings', 'adopt', 'rent', 'proceeds', 'yield', 'occupy', 'deal', 'choose', 'call_for', 'lead', 'demand', 'exact', 'take', 'involve', 'film', 'aim', 'acquire', 'lease', 'hire', 'contain', 'take_on', 'pack', 'guide', 'carry', 'return', 'claim', 'charter', 'direct', 'engage', 'get', 'learn', 'select', 'fill', 'drive', 'make', 'take_up', 'contract', 'take_aim', 'get_hold_of', 'require', 'ask', 'submit', 'pick_out', 'take_in', 'convey'}
Antonyms - {'obviate', 'abstain', 'disclaim', 'give', 'refuse'}


In [21]:
# Find and then pring
syns = wordnet.synsets("take")
print (syns)

[Synset('return.n.06'), Synset('take.n.02'), Synset('take.v.01'), Synset('take.v.02'), Synset('lead.v.01'), Synset('take.v.04'), Synset('assume.v.03'), Synset('take.v.06'), Synset('bring.v.01'), Synset('take.v.08'), Synset('take.v.09'), Synset('choose.v.01'), Synset('accept.v.02'), Synset('fill.v.04'), Synset('consider.v.03'), Synset('necessitate.v.01'), Synset('take.v.15'), Synset('film.v.01'), Synset('remove.v.01'), Synset('consume.v.02'), Synset('take.v.19'), Synset('take.v.20'), Synset('take.v.21'), Synset('assume.v.05'), Synset('accept.v.05'), Synset('take.v.24'), Synset('learn.v.04'), Synset('claim.v.05'), Synset('take.v.27'), Synset('aim.v.01'), Synset('take.v.29'), Synset('carry.v.02'), Synset('lease.v.04'), Synset('subscribe.v.05'), Synset('take.v.33'), Synset('take.v.34'), Synset('take.v.35'), Synset('claim.v.04'), Synset('accept.v.08'), Synset('contain.v.05'), Synset('take.v.39'), Synset('drive.v.16'), Synset('take.v.41'), Synset('contract.v.04')]


In [24]:
for i in syns:
    # See name, definition, examples
    print(i.name(), i.definition(), i.examples())

return.n.06 the income or profit arising from such transactions as the sale of land or other property ['the average return was about 5%']
take.n.02 the act of photographing a scene or part of a scene without interruption []
take.v.01 carry out ['take action', 'take steps', 'take vengeance']
take.v.02 require (time or space) ['It took three hours to get to work this morning', 'This event occupied a very short time']
lead.v.01 take somebody somewhere ['We lead him to our chief', 'can you take me to the main entrance?', 'He conducted us to the palace']
take.v.04 get into one's hands, take physically ['Take a cookie!', 'Can you take this bag, please']
assume.v.03 take on a certain form, attribute, or aspect ['His voice took on a sad tone', 'The story took a new turn', 'he adopted an air of superiority', 'She assumed strange manners', 'The gods assume human or animal form in these fables']
take.v.06 interpret something in a certain way; convey a particular meaning or impression ['I read thi