In [33]:
import nltk
from nltk import sent_tokenize, word_tokenize
from nltk.corpus import stopwords, wordnet
import re, collections
import string

In [None]:
corpus = ["The brown fox wasn't that quick and he couldn't win the race",
          "Hey that's a great deal! I just bought a phone for $199",
          "@@You'll (learn) a **lot** in the book. Python is an amazing language!@@"]

## Tokenizing text

In [None]:
def tokenize_text(text):
    sentences = sent_tokenize(text)
    word_tokens = [word_tokenize(sentence) 
                   for sentence in sentences]
    return word_tokens

In [None]:
token_list = [tokenize_text(sentence)
              for sentence in corpus]
token_list

## Removing special characters

In [None]:
# string.punctuation contains all possible special characters/symbols.
PATTERN = re.compile('[{}]'.format(re.escape(string.punctuation)))

def remove_characters_after_tokenization(tokens):
    filtered_tokens = filter(None, [PATTERN.sub('', token)
                                    for token in tokens])
    return list(filtered_tokens)

In [None]:
filtered_list_1 = [list(filter(None, [remove_characters_after_tokenization(tokens)
                                      for tokens in sentence_tokens]))
                   for sentence_tokens in token_list]
list(filtered_list_1)

In [None]:
def remove_characters_before_tokenization(sentence, keep_apostrophes=False):
    sentence = sentence.strip()
    if keep_apostrophes:
        PATTERN = r'[?|$|&|*|%|@|(|)~]'
        filtered_sentence = re.sub(PATTERN, r'', sentence)
    else:
        # Only extract alpha-numeric characters.
        PATTERN = r'[^a-zA-Z0-9 ]' 
        filtered_sentence = re.sub(PATTERN, r'', sentence)
    return filtered_sentence

In [None]:
filtered_list_2 = [remove_characters_before_tokenization(sentence)
                   for sentence in corpus]
filtered_list_2

In [None]:
filtered_list_2 = [remove_characters_before_tokenization(sentence, keep_apostrophes=True)
                   for sentence in corpus]
filtered_list_2

## Expanding Contractions

In [None]:
CONTRACTION_MAP = {
    "ain't": "is not",
    "aren't": "are not",
    "can't": "cannot",
    "can't've": "cannot have",
    "'cause": "because",
    "could've": "could have",
    "couldn't": "could not",
    "couldn't've": "could not have",
    "didn't": "did not",
    "doesn't": "does not",
    "don't": "do not",
    "hadn't": "had not",
    "hadn't've": "had not have",
    "hasn't": "has not",
    "haven't": "have not",
    "he'd": "he would",
    "he'd've": "he would have",
    "he'll": "he will",
    "he'll've": "he he will have",
    "he's": "he is",
    "how'd": "how did",
    "how'd'y": "how do you",
    "how'll": "how will",
    "how's": "how is",
    "I'd": "I would",
    "I'd've": "I would have",
    "I'll": "I will",
    "I'll've": "I will have",
    "I'm": "I am",
    "I've": "I have",
    "i'd": "i would",
    "i'd've": "i would have",
    "i'll": "i will",
    "i'll've": "i will have",
    "i'm": "i am",
    "i've": "i have",
    "isn't": "is not",
    "it'd": "it would",
    "it'd've": "it would have",
    "it'll": "it will",
    "it'll've": "it will have",
    "it's": "it is",
    "let's": "let us",
    "ma'am": "madam",
    "mayn't": "may not",
    "might've": "might have",
    "mightn't": "might not",
    "mightn't've": "might not have",
    "must've": "must have",
    "mustn't": "must not",
    "mustn't've": "must not have",
    "needn't": "need not",
    "needn't've": "need not have",
    "o'clock": "of the clock",
    "oughtn't": "ought not",
    "oughtn't've": "ought not have",
    "shan't": "shall not",
    "sha'n't": "shall not",
    "shan't've": "shall not have",
    "she'd": "she would",
    "she'd've": "she would have",
    "she'll": "she will",
    "she'll've": "she will have",
    "she's": "she is",
    "should've": "should have",
    "shouldn't": "should not",
    "shouldn't've": "should not have",
    "so've": "so have",
    "so's": "so as",
    "that'd": "that would",
    "that'd've": "that would have",
    "that's": "that is",
    "there'd": "there would",
    "there'd've": "there would have",
    "there's": "there is",
    "they'd": "they would",
    "they'd've": "they would have",
    "they'll": "they will",
    "they'll've": "they will have",
    "they're": "they are",
    "they've": "they have",
    "to've": "to have",
    "wasn't": "was not",
    "we'd": "we would",
    "we'd've": "we would have",
    "we'll": "we will",
    "we'll've": "we will have",
    "we're": "we are",
    "we've": "we have",
    "weren't": "were not",
    "what'll": "what will",
    "what'll've": "what will have",
    "what're": "what are",
    "what's": "what is",
    "what've": "what have",
    "when's": "when is",
    "when've": "when have",
    "where'd": "where did",
    "where's": "where is",
    "where've": "where have",
    "who'll": "who will",
    "who'll've": "who will have",
    "who's": "who is",
    "who've": "who have",
    "why's": "why is",
    "why've": "why have",
    "will've": "will have",
    "won't": "will not",
    "won't've": "will not have",
    "would've": "would have",
    "wouldn't": "would not",
    "wouldn't've": "would not have",
    "y'all": "you all",
    "y'all'd": "you all would",
    "y'all'd've": "you all would have",
    "y'all're": "you all are",
    "y'all've": "you all have",
    "you'd": "you would",
    "you'd've": "you would have",
    "you'll": "you will",
    "you'll've": "you will have",
    "you're": "you are",
    "you've": "you have"
}

In [None]:
def expand_contraction(sentence, contraction_mapping=CONTRACTION_MAP):
    contraction_patterns = re.compile('({})'.format('|'.join(contraction_mapping.keys())), flags=re.IGNORECASE|re.DOTALL)
    
    def expand_match(contraction):
        match = contraction.group(0)
        first_char = match[0]
        expanded_contraction = contraction_mapping.get(match) if contraction_mapping.get(match) else contraction_mapping.get(match.lower())
        expanded_contraction = first_char + expanded_contraction[1:]
        return expanded_contraction

    expanded_sentence = contraction_patterns.sub(expand_match, sentence)
    return expanded_sentence

In [None]:
expanded_corpus = [expand_contraction(sentence, CONTRACTION_MAP)
                   for sentence in filtered_list_2]
expanded_corpus

## Removing stopwords

In [None]:
def remove_stopwords(tokens):
    stopword_list = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stopword_list]
    return list(filtered_tokens)

In [None]:
expanded_corpus_tokens = [tokenize_text(text)
                          for text in expanded_corpus]
filtered_list_3 = [[remove_stopwords(tokens)
                    for tokens in sentence_tokens]
                   for sentence_tokens in expanded_corpus_tokens]
filtered_list_3

## Correction words

- correcting repeating characters
- correcting spellings

In [27]:
old_word = 'finalllyyy'
repeat_pattern = re.compile(r'(\w*)(\w)\2(\w*)')
match_substitution = r'\1\2\3'
step = 1

while True:
    # Check for semantically correct word.
    if wordnet.synsets(old_word):
        print(f'Final correct word: {old_word}')
        break
    
    # Remove one repeated characters.
    new_word = repeat_pattern.sub(match_substitution, old_word)
    if new_word != old_word:
        print(f'Step: {step} Word: {new_word}')
        
        # Update step.
        step += 1 
        
        # Update old word to last substituted state.
        old_word = new_word
        continue
    else:
        print(f'Final word: {new_word}')
    break

Step: 1 Word: finalllyy
Step: 2 Word: finallly
Step: 3 Word: finally
Final correct word: finally


In [28]:
def remove_repeated_characters(tokens):
    repeat_pattern = re.compile(r'(\w*)(\w)\2(\w*)')
    match_substitution = r'\1\2\3'
    def replace(old_word):
        if wordnet.synsets(old_word):
            return old_word
        new_word = repeat_pattern.sub(match_substitution, old_word)
        return replace(new_word) if new_word != old_word else new_word
    
    correct_tokens = [replace(word) for word in tokens]
    return correct_tokens

In [32]:
sample_sentence = 'My schoool is really amaaaziinngggg'
remove_repeated_characters(tokenize_text(sample_sentence)[0])

['My', 'school', 'is', 'really', 'amazing']

In [34]:
def tokens(text):
    '''Get all words from the corpus.'''
    return re.findall('[a-z]+', text.lower())

In [37]:
WORDS = tokens(open('data/big.txt').read())
WORD_COUNTS = collections.Counter(WORDS)
WORD_COUNTS.most_common(10)

[('the', 80030),
 ('of', 40025),
 ('and', 38313),
 ('to', 28766),
 ('in', 22050),
 ('a', 21155),
 ('that', 12512),
 ('he', 12401),
 ('was', 11410),
 ('it', 10681)]

In [48]:
def edits0(word):
    '''
    Return all strings that are zero edits away from the input word (i.e, the word itself)
    '''
    return {word}

def edits1(word):
    '''
    Return all strings that are one edit away 
    from the input word.
    '''
    alphabet = 'abcdefghijklmnopqrstuvwxyz'
    def splits(word):
        '''
        Return a list of all possible (first, rest) pairs
        that the input is made of.
        '''
        return [(word[:i], word[i:])
                 for i in range(len(word) + 1)]
    
    pairs = splits(word)
    deletes = [a+b[1:] for (a, b) in pairs if b]
    transposes = [a+b[1]+b[0]+b[2:] for (a, b) in pairs if len(b) > 1]
    replaces = [a+c+b[1:] for (a, b) in pairs for c in alphabet if b]
    inserts = [a+c+b for (a, b) in pairs for c in alphabet]
    return set(deletes + transposes + replaces + inserts)
    
def edits2(word):
    '''
    Return all strings that are two edits away from the input word.
    '''
    return {e2 for e1 in edits1(word) for e2 in edits1(e1)}

def known(words):
    '''
    Return the subset of words that are actually in our WORD_COUNTS 
    dictionary.
    '''
    return {w for w in words if w in WORD_COUNTS}

In [49]:
word = 'fianlly'
edits0(word)

{'fianlly'}

In [52]:
word = 'fianlly'
edits1(word)

{'afianlly',
 'aianlly',
 'bfianlly',
 'bianlly',
 'cfianlly',
 'cianlly',
 'dfianlly',
 'dianlly',
 'efianlly',
 'eianlly',
 'faanlly',
 'faianlly',
 'fainlly',
 'fanlly',
 'fbanlly',
 'fbianlly',
 'fcanlly',
 'fcianlly',
 'fdanlly',
 'fdianlly',
 'feanlly',
 'feianlly',
 'ffanlly',
 'ffianlly',
 'fganlly',
 'fgianlly',
 'fhanlly',
 'fhianlly',
 'fiaally',
 'fiaanlly',
 'fiablly',
 'fiabnlly',
 'fiaclly',
 'fiacnlly',
 'fiadlly',
 'fiadnlly',
 'fiaelly',
 'fiaenlly',
 'fiaflly',
 'fiafnlly',
 'fiaglly',
 'fiagnlly',
 'fiahlly',
 'fiahnlly',
 'fiailly',
 'fiainlly',
 'fiajlly',
 'fiajnlly',
 'fiaklly',
 'fiaknlly',
 'fiallly',
 'fially',
 'fialnlly',
 'fialnly',
 'fiamlly',
 'fiamnlly',
 'fianally',
 'fianaly',
 'fianblly',
 'fianbly',
 'fianclly',
 'fiancly',
 'fiandlly',
 'fiandly',
 'fianelly',
 'fianely',
 'fianflly',
 'fianfly',
 'fianglly',
 'fiangly',
 'fianhlly',
 'fianhly',
 'fianilly',
 'fianily',
 'fianjlly',
 'fianjly',
 'fianklly',
 'fiankly',
 'fianlaly',
 'fianlay',
 'fi

In [53]:
known(edits1(word))

{'finally'}

In [55]:
known(edits2(word))

{'faintly', 'finally', 'finely', 'frankly'}

In [58]:
def correct(word):
    '''
    Get the best correct spelling for the input word.
    '''
    # Priority is for the edit distance 0, then 1, then 2
    # else defaults to the input word itself.
    candidates = (known(edits0(word)) or
                  known(edits1(word)) or
                  known(edits2(word)) or
                  [word])
    return max(candidates, key=WORD_COUNTS.get)

In [59]:
correct('fianlly')

'finally'

In [60]:
# Does not handle case-sensitive text.
correct('FIANLLY')

'FIANLLY'

In [64]:
def correct_match(match):
    '''
    Spell-correct word in match,
    and preserve proper upper/lower/title case.
    '''
    
    word = match.group()
    def case_of(text):
        '''
        Return the case-function appropriate for 
        text: upper, lower, title, or just str.
        '''
        return (str.upper if text.isupper() else
                str.lower if text.islower() else
                str.title if text.istitle() else
                str)
    return case_of(word)(correct(word.lower()))

In [65]:
def correct_word_generic(text):
    '''
    Correct all the words within a text,
    returning the corrected text.
    '''
    return re.sub('[a-zA-Z]+', correct_match, text)

In [66]:
correct_word_generic('fianlly')

'finally'

In [67]:
correct_word_generic('FIANLLY')

'FINALLY'

## Stemming

In [85]:
from nltk import PorterStemmer
from nltk.stem import LancasterStemmer, RegexpStemmer, SnowballStemmer

In [71]:
ps = PorterStemmer()

In [73]:
ps.stem('jumping'), ps.stem('jumps'), ps.stem('jumped')

('jump', 'jump', 'jump')

In [77]:
ps.stem('lying'), ps.stem('strange')

('lie', 'strang')

In [78]:
ls = LancasterStemmer()

In [79]:
ls.stem('jumping'), ls.stem('jumps'), ls.stem('jumped')

('jump', 'jump', 'jump')

In [80]:
ls.stem('lying'), ls.stem('strange')

('lying', 'strange')

In [82]:
rs = RegexpStemmer('ing$|s$|ed$', min=4)

In [83]:
rs.stem('jumping'), rs.stem('jumps'), rs.stem('jumped')

('jump', 'jump', 'jump')

In [84]:
rs.stem('lying'), rs.stem('strange')

('ly', 'strange')

In [86]:
ss = SnowballStemmer('german')

In [87]:
SnowballStemmer.languages

('arabic',
 'danish',
 'dutch',
 'english',
 'finnish',
 'french',
 'german',
 'hungarian',
 'italian',
 'norwegian',
 'porter',
 'portuguese',
 'romanian',
 'russian',
 'spanish',
 'swedish')

In [88]:
ss.stem('autobahnen')

'autobahn'

## Lemmatization

Similar to stemming - word affixes are removed to get the base form of the word.
For stemming, the base form is known as the root stem.

For lemmatization, the base form is known as the root word.
The root word is always present in the dictionary.

In [89]:
from nltk.stem import WordNetLemmatizer

In [90]:
wnl = WordNetLemmatizer()

In [91]:
# Lemmatize nouns.
wnl.lemmatize('cars', 'n')

'car'

In [92]:
wnl.lemmatize('men', 'n')

'men'

In [93]:
# Lemmatize verbs.
wnl.lemmatize('running', 'v')

'run'

In [94]:
wnl.lemmatize('ate', 'v')

'eat'

In [95]:
# Lemmatize adjectives.
wnl.lemmatize('saddest', 'a')

'sad'

In [96]:
wnl.lemmatize('fancier', 'a')

'fancy'

## Text syntax and structure
- parts of speech (POS) tagging
- shallow parsing
- dependency-based parsing
- constituency-based parsing

In [102]:
!pip3 install pattern



In [104]:
!pip3 install -U spacy
!python3 -m spacy download en_core_web_sm

Requirement already up-to-date: spacy in /usr/local/lib/python3.7/site-packages (2.2.1)
Collecting en_core_web_sm==2.2.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz#egg=en_core_web_sm==2.2.0
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz (12.0MB)
[K     |████████████████████████████████| 12.0MB 4.4MB/s eta 0:00:01
Building wheels for collected packages: en-core-web-sm
  Building wheel for en-core-web-sm (setup.py) ... [?25ldone
[?25h  Stored in directory: /private/var/folders/7m/74_ct3hx33d878n626w1wxyc0000gn/T/pip-ephem-wheel-cache-szpn37f5/wheels/48/5c/1c/15f9d02afc8221a668d2172446dd8467b20cdb9aef80a172a4
Successfully built en-core-web-sm
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-2.2.0
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web

In [106]:
!pip3 install pydot-ng
!pip3 install graphviz



## Important machine learning concepts

- data preparation: usually consists of pre-processing the data before extracting features and training
- feature extraction: the process of extracting useful features from raw data that are used to train machine learning models
- features: various useful attributes of the data (examples could be age, weight, and so on for personal data)
- training data: a set of data points used to train a model
- testing/validation data: a set of data points on which a pre-trained model is tested and evaluated to see how well it performs
- model: built using a combination of data/features and a machine learning algorithm that could be supervised or unsupervised
- accuracy: how well the model predicts something (also has other detailed evaluation metrics like precision, recall and f1-score)

### Recommended POS Taggers

In [144]:
from pattern.en import tag
from nltk.corpus import treebank
from nltk.tag import DefaultTagger, RegexpTagger, UnigramTagger, BigramTagger, TrigramTagger
from nltk.classify import NaiveBayesClassifier
from nltk.tag.sequential import ClassifierBasedPOSTagger 
import spacy

spacy.load('en_core_web_sm')

<spacy.lang.en.English at 0x1645d5790>

In [115]:
sentence = 'The brown fox is quick and he is jumping over the lazy dog'

In [116]:
tokens = nltk.word_tokenize(sentence)
tagged_sent = nltk.pos_tag(tokens, tagset='universal')
tagged_sent

[('The', 'DET'),
 ('brown', 'ADJ'),
 ('fox', 'NOUN'),
 ('is', 'VERB'),
 ('quick', 'ADJ'),
 ('and', 'CONJ'),
 ('he', 'PRON'),
 ('is', 'VERB'),
 ('jumping', 'VERB'),
 ('over', 'ADP'),
 ('the', 'DET'),
 ('lazy', 'ADJ'),
 ('dog', 'NOUN')]

In [117]:
tagged_sent = tag(sentence)
tagged_sent

[('The', 'DT'),
 ('brown', 'JJ'),
 ('fox', 'NN'),
 ('is', 'VBZ'),
 ('quick', 'JJ'),
 ('and', 'CC'),
 ('he', 'PRP'),
 ('is', 'VBZ'),
 ('jumping', 'VBG'),
 ('over', 'IN'),
 ('the', 'DT'),
 ('lazy', 'JJ'),
 ('dog', 'NN')]

## Building your own POS taggers

In [120]:
data = treebank.tagged_sents()
train_data = data[:3500]
test_data = data[3500:]

In [122]:
# How the train data looks like.
train_data[0]

[('Pierre', 'NNP'),
 ('Vinken', 'NNP'),
 (',', ','),
 ('61', 'CD'),
 ('years', 'NNS'),
 ('old', 'JJ'),
 (',', ','),
 ('will', 'MD'),
 ('join', 'VB'),
 ('the', 'DT'),
 ('board', 'NN'),
 ('as', 'IN'),
 ('a', 'DT'),
 ('nonexecutive', 'JJ'),
 ('director', 'NN'),
 ('Nov.', 'NNP'),
 ('29', 'CD'),
 ('.', '.')]

In [123]:
tokens = nltk.word_tokenize(sentence)
tokens

['The',
 'brown',
 'fox',
 'is',
 'quick',
 'and',
 'he',
 'is',
 'jumping',
 'over',
 'the',
 'lazy',
 'dog']

In [125]:
dt = DefaultTagger('NN')

# Accuracy on test data.
dt.evaluate(test_data)

0.1454158195372253

In [127]:
# Tagging our sample sentence.
dt.tag(tokens)

[('The', 'NN'),
 ('brown', 'NN'),
 ('fox', 'NN'),
 ('is', 'NN'),
 ('quick', 'NN'),
 ('and', 'NN'),
 ('he', 'NN'),
 ('is', 'NN'),
 ('jumping', 'NN'),
 ('over', 'NN'),
 ('the', 'NN'),
 ('lazy', 'NN'),
 ('dog', 'NN')]

In [129]:
patterns = [
    (r'.*ing$', 'VBG'), # Gerunds
    (r'.*ed$', 'VBD'), # Simple past
    (r'.*es$', 'VBZ'), # 3rd singular present
    (r'.*ould$', 'MD'), # Modals
    (r'.*\'s$', 'NN$'), # Possessive nouns
    (r'.*s$', 'NNS'), # Plural nouns
    (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # Cardinal numbers
    (r'.*', 'NN') # Nouns (default)
]
rt = RegexpTagger(patterns)

In [130]:
rt.evaluate(test_data)

0.24039113176493368

In [131]:
rt.tag(tokens)

[('The', 'NN'),
 ('brown', 'NN'),
 ('fox', 'NN'),
 ('is', 'NNS'),
 ('quick', 'NN'),
 ('and', 'NN'),
 ('he', 'NN'),
 ('is', 'NNS'),
 ('jumping', 'VBG'),
 ('over', 'NN'),
 ('the', 'NN'),
 ('lazy', 'NN'),
 ('dog', 'NN')]

In [133]:
ut = UnigramTagger(train_data)
bt = BigramTagger(train_data)
tt = TrigramTagger(train_data)

In [134]:
ut.evaluate(test_data)

0.8607803272340013

In [135]:
ut.tag(tokens)

[('The', 'DT'),
 ('brown', None),
 ('fox', None),
 ('is', 'VBZ'),
 ('quick', 'JJ'),
 ('and', 'CC'),
 ('he', 'PRP'),
 ('is', 'VBZ'),
 ('jumping', 'VBG'),
 ('over', 'IN'),
 ('the', 'DT'),
 ('lazy', None),
 ('dog', None)]

In [136]:
bt.evaluate(test_data)

0.13466937748087907

In [137]:
bt.tag(tokens)

[('The', 'DT'),
 ('brown', None),
 ('fox', None),
 ('is', None),
 ('quick', None),
 ('and', None),
 ('he', None),
 ('is', None),
 ('jumping', None),
 ('over', None),
 ('the', None),
 ('lazy', None),
 ('dog', None)]

In [138]:
tt.evaluate(test_data)

0.08064672281924679

In [139]:
tt.tag(tokens)

[('The', 'DT'),
 ('brown', None),
 ('fox', None),
 ('is', None),
 ('quick', None),
 ('and', None),
 ('he', None),
 ('is', None),
 ('jumping', None),
 ('over', None),
 ('the', None),
 ('lazy', None),
 ('dog', None)]

In [140]:
# Create a chain of taggers, and each tagger will fallback on a backoff tagger
# if it cannot tag the input tokens.
def combined_tagger(train_data, taggers, backoff=None):
    for tagger in taggers:
        backoff = tagger(train_data, backoff=backoff)
    return backoff

In [141]:
ct = combined_tagger(train_data=train_data,
                     taggers=[UnigramTagger, BigramTagger, TrigramTagger],
                     backoff=rt)

In [142]:
ct.evaluate(test_data)

0.9094781682641108

In [143]:
ct.tag(tokens)

[('The', 'DT'),
 ('brown', 'NN'),
 ('fox', 'NN'),
 ('is', 'VBZ'),
 ('quick', 'JJ'),
 ('and', 'CC'),
 ('he', 'PRP'),
 ('is', 'VBZ'),
 ('jumping', 'VBG'),
 ('over', 'IN'),
 ('the', 'DT'),
 ('lazy', 'NN'),
 ('dog', 'NN')]

In [145]:
nbt = ClassifierBasedPOSTagger(train=train_data, 
                               classifier_builder=NaiveBayesClassifier.train)

In [146]:
nbt.evaluate(test_data)

0.9306806079969019

In [147]:
nbt.tag(tokens)

[('The', 'DT'),
 ('brown', 'JJ'),
 ('fox', 'NN'),
 ('is', 'VBZ'),
 ('quick', 'JJ'),
 ('and', 'CC'),
 ('he', 'PRP'),
 ('is', 'VBZ'),
 ('jumping', 'VBG'),
 ('over', 'IN'),
 ('the', 'DT'),
 ('lazy', 'JJ'),
 ('dog', 'VBG')]