Create the function find_usernames that takes a list of email addresses and returns the username of each email address without the ‘@’ symbol and without its domain name.

In [1]:
## Course 1: Natural Language Processing
## Module 1: Intro to NLP in Python
## Coding Exercise 1

import re

emails = ['skateboard@bobaround.com', 'investigators@dangerous.com', 'vehicles@floatingvehicles.com', 'pipes@cars.com', 'engine@shutdown.com', 'water@power.com']

def find_usernames(emails):
  usernames = [re.findall('(\S+)@', email) for email in emails]
  return usernames

print(find_usernames(emails))

[['skateboard'], ['investigators'], ['vehicles'], ['pipes'], ['engine'], ['water']]


Create the function common_words that takes a list of strings and returns the four most common words used in the list.

In [2]:
## Course 1: Natural Language Processing
## Module 1: Intro to NLP in Python
## Coding Exercise 2

from nltk.tokenize import word_tokenize
from nltk import FreqDist

many_sentences = ["How many blankets do you want?", "There are many dogs here.", "How come there aren't many cats here?", "There are many people in the world."]

def common_words(data):
  tokens_by_sentence = [word_tokenize(sentence) for sentence in data]
  tokens = sum(tokens_by_sentence, [])
  lowercase_tokens = [token.lower() for token in tokens]
  return(FreqDist(lowercase_tokens).most_common(4))

print(common_words(many_sentences))


[('many', 4), ('there', 3), ('are', 3), ('how', 2)]


Create the function find_pronouns that takes a list of strings and returns a list of all the personal pronouns.

In [3]:
## Course 1: Natural Language Processing
## Module 1: Intro to NLP in Python
## Coding Exercise 3

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag

sentences = ['They are good at playing football.', 'He has many backpacks.', 'She has lots of pens in her bag.', 'I forgot my ID.', 'The dog stared at himself in the reflection.', 'She told me it had to be done by end of day.']

def find_pronouns(data):
  pronouns = []
  for sentence in data:
    tokens_no_sw = [token for token in word_tokenize(sentence) if token not in set(stopwords.words('english'))]
    tags = pos_tag(tokens_no_sw)
    for (token, tag) in tags:
      if tag == 'PRP': pronouns.append(token)
  return(pronouns)

print(find_pronouns(sentences))


['They', 'He', 'She', 'I', 'She']


Create the function find_names that takes a string and returns a set of all names used in the string.

In [None]:
## Course 1: Natural Language Processing
## Module 1: Intro to NLP in Python
## Coding Exercise 4

from nltk import pos_tag, ne_chunk
from nltk.tokenize import word_tokenize

text = "The various continuations of William of Tyre above mentioned represent the opinion of the native Franks (which is hostile to Richard I.); while in Nicetas, who wrote a history of the Eastern empire from 1118 to 1206, we have a Byzantine authority who, as Professor Bury remarks, 'differs from Anna and Cinnamus in his tone towards the crusaders, to whom he is surprisingly fair.'"

def find_names(data):
  tags = pos_tag(word_tokenize(data))
  labeled_chunks = ne_chunk(tags, binary=True)
  return set(
    " ".join(word[0] for word in chunked_word)
    for chunked_word in labeled_chunks
    if hasattr(chunked_word, "label") and chunked_word.label() == "NE")

print(find_names(text))


Create the function remove_verbs that takes a list of strings and returns all parts of speech which are chunked together except for verbs.

In [5]:
## Course 1: Natural Language Processing
## Module 1: Intro to NLP in Python
## Coding Exercise 5

from nltk.corpus import reuters
from nltk.tokenize import word_tokenize
from nltk import pos_tag, RegexpParser

sentences = ["He washed the car yesterday.", "I bought her a book and a coffee.", "She thinks going outside is healthy.", "The dog runs away."]

def remove_verbs(data):
  grammar = r"""ChinkAllVerbs: {<.*>+}
  }<VB.*>{"""
  parser = RegexpParser(grammar)
  for sentence in data:
    tokens_by_sentence = [word_tokenize(sentence) for sentence in data]
    tokens = sum(tokens_by_sentence, [])
    tags = pos_tag(tokens)
    chunked_data = parser.parse(tags)
  return(chunked_data)

print(remove_verbs(sentences))


(S
  (ChinkAllVerbs He/PRP)
  washed/VBD
  (ChinkAllVerbs the/DT car/NN yesterday/NN ./. I/PRP)
  bought/VBD
  (ChinkAllVerbs
    her/PRP
    a/DT
    book/NN
    and/CC
    a/DT
    coffee/NN
    ./.
    She/PRP)
  thinks/VBZ
  going/VBG
  (ChinkAllVerbs outside/JJ)
  is/VBZ
  (ChinkAllVerbs healthy/JJ ./. The/DT dog/NN)
  runs/VBZ
  (ChinkAllVerbs away/RB ./.))
