## POS tagging using modified Viterbi

### Data Preparation

In [1]:
#Importing libraries
import nltk
from sklearn.model_selection import train_test_split

import random
import numpy as np
import pandas as pd
import pprint, time
import collections

from nltk.tokenize import word_tokenize

In [2]:
# reading the Treebank tagged sentences
nltk_data = list(nltk.corpus.treebank.tagged_sents(tagset='universal'))

In [3]:
nltk_data[:2]

[[('Pierre', 'NOUN'),
  ('Vinken', 'NOUN'),
  (',', '.'),
  ('61', 'NUM'),
  ('years', 'NOUN'),
  ('old', 'ADJ'),
  (',', '.'),
  ('will', 'VERB'),
  ('join', 'VERB'),
  ('the', 'DET'),
  ('board', 'NOUN'),
  ('as', 'ADP'),
  ('a', 'DET'),
  ('nonexecutive', 'ADJ'),
  ('director', 'NOUN'),
  ('Nov.', 'NOUN'),
  ('29', 'NUM'),
  ('.', '.')],
 [('Mr.', 'NOUN'),
  ('Vinken', 'NOUN'),
  ('is', 'VERB'),
  ('chairman', 'NOUN'),
  ('of', 'ADP'),
  ('Elsevier', 'NOUN'),
  ('N.V.', 'NOUN'),
  (',', '.'),
  ('the', 'DET'),
  ('Dutch', 'NOUN'),
  ('publishing', 'VERB'),
  ('group', 'NOUN'),
  ('.', '.')]]

### Split Treebank in Train and Test set 

In [4]:
# Split data into train and test
random.seed(123)
train_set, test_set = train_test_split(nltk_data,train_size= .95, test_size=0.05, random_state = 123)

In [5]:
train_set[0]

[('Reliance', 'NOUN'),
 ('acquired', 'VERB'),
 ('a', 'DET'),
 ('7', 'NUM'),
 ('%', 'NOUN'),
 ('UAL', 'NOUN'),
 ('stake', 'NOUN'),
 ('early', 'ADV'),
 ('this', 'DET'),
 ('year', 'NOUN'),
 ('at', 'ADP'),
 ('an', 'DET'),
 ('average', 'ADJ'),
 ('cost', 'NOUN'),
 ('of', 'ADP'),
 ('$', '.'),
 ('110', 'NUM'),
 ('*U*', 'X'),
 ('a', 'DET'),
 ('share', 'NOUN'),
 (',', '.'),
 ('and', 'CONJ'),
 ('reduced', 'VERB'),
 ('its', 'PRON'),
 ('stake', 'NOUN'),
 ('to', 'PRT'),
 ('4.7', 'NUM'),
 ('%', 'NOUN'),
 ('after', 'ADP'),
 ('UAL', 'NOUN'),
 ('accepted', 'VERB'),
 ('the', 'DET'),
 ('bid', 'NOUN'),
 ('at', 'ADP'),
 ('prices', 'NOUN'),
 ('higher', 'ADJ'),
 ('than', 'ADP'),
 ('$', '.'),
 ('282', 'NUM'),
 ('*U*', 'X'),
 ('a', 'DET'),
 ('share', 'NOUN'),
 ('.', '.')]

In [6]:
# Getting list of tagged words
train_tagged_words = [word for tup in train_set for word in tup]

test_tagged_words = [word for tup in test_set for word in tup]

train_tagged_words[:10]

[('Reliance', 'NOUN'),
 ('acquired', 'VERB'),
 ('a', 'DET'),
 ('7', 'NUM'),
 ('%', 'NOUN'),
 ('UAL', 'NOUN'),
 ('stake', 'NOUN'),
 ('early', 'ADV'),
 ('this', 'DET'),
 ('year', 'NOUN')]

In [7]:
# list of all words
train_words = [tags[0] for tags in train_tagged_words]
test_words = [tags[0] for tags in test_tagged_words]
print(len(train_words))

95656


In [8]:
# unique vocabulary
v_train = set(train_words)
v_test = set(test_words)
print(len(v_train))

12102


In [125]:
# check the differnece in vocabulary of train and test
(v_test.difference(v_train))

{"'82",
 '*-101',
 '*-109',
 '*-144',
 '*-147',
 '*-150',
 '*-151',
 '*-155',
 '*-156',
 '*T*-104',
 '*T*-105',
 '*T*-143',
 '*T*-144',
 '*T*-161',
 '*T*-168',
 '*T*-184',
 '*T*-232',
 '*T*-249',
 '*T*-250',
 '115',
 '154,240,000',
 '158,666',
 '176.1',
 '1925',
 '1934',
 '20.07',
 '26,956',
 '3.43',
 '352.7',
 '35500.64',
 '51-year-old',
 '576',
 '6.53',
 '608,413',
 '62.625',
 '63.79',
 '692',
 '7.20',
 '7.84',
 '80.8',
 '81.8',
 '84-year-old',
 '960',
 '967,809',
 'Alexander',
 'Ariail',
 'Assets',
 'Bennett',
 'Boeing',
 'Buckhead',
 'Carbide',
 'Carnegie-Mellon',
 'Carrier',
 'Cartons',
 'Cerf',
 'Competes',
 'Debt',
 'Default',
 'Destinations',
 'Determining',
 'Discos',
 'Doerflinger',
 'Drink',
 'Dunn',
 'Dynamics',
 'Editorials',
 'Emile',
 'Foreigners',
 'Fuentes',
 'Graham',
 'Green',
 'Gringo',
 'Hans',
 'Helsinki',
 'Henderson',
 'Herald-American',
 'Honolulu',
 'Ian',
 'Joni',
 'Kelli',
 'Kirkpatrick',
 'Leningrad',
 'Leon',
 'Level',
 'Lucille',
 'Manila',
 'McCabe',
 'M

#### Observation:
There is a difference in the volcabularies of train and test set. Hence there will be cases on unkown words while tagging test dataset. We shall try following methods for unknown words

<hr>

1. Tag unkown words with mode of available tags.
2. Apply Regex to find the closest tag from train set that can be aplied to unknwon words.

<hr>

In [10]:
# number of unique tags
T = set([tags[1] for tags in train_tagged_words])
len(T)

12

In [11]:
# Max tags
most_tag = collections.Counter([tags[1] for tags in train_tagged_words]).most_common(1)[0][0]
most_tag

'NOUN'

### Build the vanilla Viterbi based POS tagger

In [12]:
# Emission Probability
def word_given_tag(word, tag, train_bag = train_tagged_words):
    # find all words where the give tag is available
    tag_list = [i for i in train_bag if i[1] == tag]
    # from tag_list find out where the given word is available
    word_given_tag_list = [i[0] for i in tag_list if i[0] == word]
    
    count_tags = len(tag_list)
    count_words_given_tag = len(word_given_tag_list)
    
    return(count_words_given_tag/count_tags)

In [13]:
# Transition Probability
def t2_given_t1(t2, t1, train_bag = train_tagged_words):
    # find all tags
    all_tags = [i[1] for i in train_bag]
    
    # from all tags find all t1 tages
    count_t1 = len([i for i in all_tags if i == t1])
    
    # from all tags find where t1 is followed by t2
    count_t2_t1 = 0
    
    for index in range(len(all_tags)-1):
        if all_tags[index]==t1 and all_tags[index+1] == t2:
            count_t2_t1 += 1
       
    return(count_t2_t1/count_t1)

In [14]:
# Transition Probability Matrix of all combinations of tags t2 given tags t1
tags_matrix = np.zeros((len(T), len(T)), dtype='float32')

# column is t2, row is t1

for i, t1 in enumerate(list(T)):
    for j, t2 in enumerate(list(T)):
        tags_matrix[i,j] = t2_given_t1(t2,t1)

In [15]:
tags_matrix

array([[6.71592802e-02, 6.56814431e-04, 1.06732352e-02, 2.08538584e-02,
        4.92610829e-03, 6.99507415e-01, 7.79967159e-02, 2.08538584e-02,
        1.55993430e-02, 6.46962225e-02, 1.23152705e-02, 4.76190494e-03],
       [7.18146712e-02, 8.10810830e-03, 1.15830116e-02, 9.34362933e-02,
        9.65250935e-03, 2.08880305e-01, 2.35521235e-02, 6.17760606e-03,
        5.01930481e-03, 4.09266427e-02, 4.86872584e-01, 3.39768343e-02],
       [8.56954530e-02, 1.64798945e-02, 1.97758735e-03, 1.41727095e-02,
        1.02834545e-01, 2.45550424e-01, 2.07646675e-02, 5.63612394e-02,
        1.97758735e-03, 4.28477265e-02, 4.01450217e-01, 9.88793653e-03],
       [1.76386461e-02, 5.49817272e-02, 1.82583824e-01, 7.51628801e-02,
        5.51406331e-02, 6.13379963e-02, 1.44287303e-01, 2.70141428e-03,
        1.03289373e-02, 1.64150640e-01, 2.06102014e-01, 2.55839825e-02],
       [2.05684006e-01, 3.49229295e-03, 2.40847789e-04, 4.57610786e-02,
        5.41907502e-03, 6.38246655e-01, 9.27263964e-03, 2.16

In [16]:
# Convert Matrix to Data Frame for easy understanding
tags_df = pd.DataFrame(tags_matrix, columns = list(T), index=list(T))
tags_df

Unnamed: 0,ADJ,PRON,PRT,X,DET,NOUN,ADP,NUM,CONJ,.,VERB,ADV
ADJ,0.067159,0.000657,0.010673,0.020854,0.004926,0.699507,0.077997,0.020854,0.015599,0.064696,0.012315,0.004762
PRON,0.071815,0.008108,0.011583,0.093436,0.009653,0.20888,0.023552,0.006178,0.005019,0.040927,0.486873,0.033977
PRT,0.085695,0.01648,0.001978,0.014173,0.102835,0.24555,0.020765,0.056361,0.001978,0.042848,0.40145,0.009888
X,0.017639,0.054982,0.182584,0.075163,0.055141,0.061338,0.144287,0.002701,0.010329,0.164151,0.206102,0.025584
DET,0.205684,0.003492,0.000241,0.045761,0.005419,0.638247,0.009273,0.021676,0.000482,0.018064,0.038776,0.012885
NOUN,0.012093,0.004735,0.043417,0.029211,0.013185,0.263996,0.177381,0.009579,0.042615,0.23992,0.146713,0.017155
ADP,0.105986,0.068172,0.001491,0.034725,0.325309,0.322113,0.017043,0.063059,0.000852,0.039625,0.008628,0.012995
NUM,0.032641,0.001484,0.027596,0.209792,0.003264,0.354599,0.035015,0.184866,0.013947,0.116617,0.017211,0.002967
CONJ,0.11718,0.057423,0.004669,0.00887,0.119514,0.355275,0.052288,0.041083,0.000467,0.034547,0.155462,0.053221
.,0.045279,0.065942,0.002426,0.027311,0.17294,0.222622,0.091277,0.081215,0.057587,0.092804,0.088222,0.052286


In [17]:
# Viterbi Heuristic
def Viterbi(words, train_bag = train_tagged_words):
    state = []
    # Find all unique tags
    T = list(set([i[1] for i in train_bag]))
    
    # Find likelyhood of a word having all types of tags
    for key, word in enumerate(words):
        #initialise list of probability column for a given word/observation
        p = []
        
        for tag in T:
            if key == 0: # if the word is the first word in the sentence
                transition_p = tags_df.loc[".",tag]
            elif key == len(words): # if the word is the last word in the sentence
                transition_p = tags_df.loc[tag,"."]
            else:
                transition_p = tags_df.loc[state[-1],tag]
            
            # compute emission and state probabilities
            emission_p = word_given_tag(words[key], tag)
            state_probability = emission_p * transition_p
            
            p.append(state_probability)
        
        # Find the tag with maximum state probability
        pmax = max(p)
        
        # getting state for which probability is maximum
        state_max = T[p.index(pmax)] 
        state.append(state_max)
    
    return list(zip(words, state))
        

In [18]:
# Untagged test set
test_untagged_words = [i[0] for i in test_tagged_words]

In [85]:
# tagging the test sentences
start = time.time()
tagged_seq = Viterbi(test_untagged_words)
end = time.time()
difference = end-start

In [20]:
print("Time taken in seconds: ", difference)
#print(tagged_seq)

Time taken in seconds:  513.8241579532623


In [21]:
# accuracy
matched = [i for i,j in zip(tagged_seq,test_tagged_words) if i == j]

In [22]:
accuracy = len(matched)/len(tagged_seq)
accuracy

0.9205179282868526

In [23]:
unmatched= [i for i,j in zip(tagged_seq,test_tagged_words) if i != j]

In [24]:
# %Unmatched records
failure = len(unmatched)/len(tagged_seq)
failure

0.0794820717131474

In [124]:
tagged_seq

[('Worksheets', 'ADJ'),
 ('in', 'ADP'),
 ('a', 'DET'),
 ('test-practice', 'ADJ'),
 ('kit', 'ADJ'),
 ('called', 'VERB'),
 ('*', 'X'),
 ('Learning', 'NOUN'),
 ('Materials', 'NOUN'),
 (',', '.'),
 ('sold', 'VERB'),
 ('*', 'X'),
 ('to', 'PRT'),
 ('schools', 'NOUN'),
 ('across', 'ADP'),
 ('the', 'DET'),
 ('country', 'NOUN'),
 ('by', 'ADP'),
 ('Macmillan\\/McGraw-Hill', 'NOUN'),
 ('School', 'NOUN'),
 ('Publishing', 'NOUN'),
 ('Co.', 'NOUN'),
 (',', '.'),
 ('contain', 'VERB'),
 ('the', 'DET'),
 ('same', 'ADJ'),
 ('questions', 'NOUN'),
 ('.', '.'),
 ('Government', 'NOUN'),
 ('officials', 'NOUN'),
 (',', '.'),
 ('especially', 'ADV'),
 ('in', 'ADP'),
 ('Japan', 'NOUN'),
 (',', '.'),
 ('probably', 'ADV'),
 ('would', 'VERB'),
 ('resist', 'VERB'),
 ('any', 'DET'),
 ('onslaught', 'ADJ'),
 ('of', 'ADP'),
 ('program', 'NOUN'),
 ('trading', 'NOUN'),
 ('by', 'ADP'),
 ('players', 'NOUN'),
 ('trying', 'VERB'),
 ('*', 'X'),
 ('to', 'PRT'),
 ('shrug', 'ADJ'),
 ('off', 'ADP'),
 ('the', 'DET'),
 ('U.S.', 'NOU

#### Observations:
It can be noticed, lot of words are not predicted correctly. Like Worksheets etc are given the defauy tag of ADJ. Also, numeric fields are marked as default ADJ too. It is further noticed that the defaults are marked as ADJ for the unknown words.

### Solve the problem of unknown words

#### Model 1: Tagging unknow with mode of know tags from train

In [25]:
def Viterbi_model1(words, train_bag = train_tagged_words):
    state = []
    # Find all unique tags
    T = list(set([i[1] for i in train_bag]))
    
    # Find likelyhood of a word having all types of tags
    for key, word in enumerate(words):
        #initialise list of probability column for a given word/observation
        p = []
        
        for tag in T:
            if key == 0: # if the word is the first word in the sentence
                transition_p = tags_df.loc[".",tag]
            elif key == len(words): # if the word is the last word in the sentence
                transition_p = tags_df.loc[tag,"."]
            else:
                transition_p = tags_df.loc[state[-1],tag]
            
            # compute emission and state probabilities
            emission_p = word_given_tag(words[key], tag)
            state_probability = emission_p * transition_p
            
            p.append(state_probability)
        
        # Find the tag with maximum state probability
        pmax = max(p)
        
        # For unknown words the state probability will be zero
        if pmax == 0:
            state.append(most_tag)
        else:
            # getting state for which probability is maximum
            state_max = T[p.index(pmax)] 
            state.append(state_max)
    
    return list(zip(words, state))

In [26]:
# tagging the test sentences
start = time.time()
tagged_seq_model1 = Viterbi_model1(test_untagged_words)
end = time.time()
difference = end-start

In [27]:
print("Time taken in seconds: ", difference)
#print(tagged_seq)

Time taken in seconds:  516.9418249130249


In [28]:
# accuracy
matched_model1 = [i for i,j in zip(tagged_seq_model1,test_tagged_words) if i == j]

In [29]:
accuracy_model1 = len(matched_model1)/len(tagged_seq_model1)
accuracy_model1

0.9408366533864542

#### Model 2: Tagging unknown with using Morphology

In [32]:
def Viterbi_model2(words, train_bag = train_tagged_words):
    state = []
    state_bk = []
    # Find all unique tags
    T = list(set([i[1] for i in train_bag]))
    
    # Find likelyhood of a word having all types of tags
    for key, word in enumerate(words):
        #initialise list of probability column for a given word/observation
        p = []
        
        for tag in T:
            if key == 0: # if the word is the first word in the sentence
                transition_p = tags_df.loc[".",tag]
            elif key == len(words): # if the word is the last word in the sentence
                transition_p = tags_df.loc[tag,"."]
            else:
                transition_p = tags_df.loc[state[-1],tag]
            
            # compute emission and state probabilities
            emission_p = word_given_tag(words[key], tag)
            state_probability = emission_p * transition_p
            
            p.append(state_probability)
        
        # Find the tag with maximum state probability
        pmax = max(p)
        
        # For unknown words the state probability will be zero
        if pmax == 0:
            patterns = [
                (r'.*ing$', 'VERB'),              # gerund
                (r'.*ed$', 'VERB'),               # past tense
                (r'.*es$', 'VERB'),               # 3rd singular present
                (r'.*ould$', 'VERB'),              # modals
                (r'.*\'s$', 'NOUN'),              # possessive nouns
                (r'.*s$', 'NOUN'),                # plural nouns
                (r'^-?[0-9]+(.[0-9]+)?$', 'NUM'), # cardinal numbers
                (r'.*', 'NOUN')                    # nouns
                ]
            
            regexp_tagger = nltk.RegexpTagger(patterns)
            
            tags = regexp_tagger.tag(nltk.word_tokenize(word))
            
            state.append(tags[0][1])
                       
        else:
            # getting state for which probability is maximum
            state_max = T[p.index(pmax)] 
            state.append(state_max)
            
    return list(zip(words, state))

In [34]:
# tagging the test sentences
start = time.time()
tagged_seq_model2 = Viterbi_model2(test_untagged_words)
end = time.time()
difference = end-start

In [35]:
print("Time taken in seconds: ", difference)
#print(tagged_seq)

Time taken in seconds:  585.9888517856598


In [37]:
# accuracy
matched_model2 = [i for i,j in zip(tagged_seq_model2,test_tagged_words) if i == j]

In [38]:
accuracy_model2 = len(matched_model2)/len(tagged_seq_model2)
accuracy_model2

0.951195219123506

#### Evaluating tagging accuracy

### Compare the tagging accuracies of the modifications with the vanilla Viterbi algorithm

#### Observations:

1. The accuracy of vanilla Veterbi algorithm is 91% 
2. The accuracy of Veterbi algorithm modified to assign Mode of tags for unknown words has an accuracy of 94%.
3. The accuracy of Veterbi algorithm modified to assign tags using RegEx/Morphology for unknown words has an accuracy of 95%.

Hence, we shall use the "Viterbi_model2" for further prediction of the POS tags.

### List down cases which were incorrectly tagged by original POS tagger and got corrected by your modifications

### Incorrect Tags by Vanilla Veterbi

In [91]:
unmatched= [i for i,j in zip(tagged_seq,test_tagged_words) if i != j]

In [105]:
unmatched_words = set([i[0] for i in unmatched])
len(unmatched_words)

358

### Incorrect Tags post modifying Veterbi with Morphology

In [94]:
unmatched_model2= [i for i,j in zip(tagged_seq_model2,test_tagged_words) if i != j]

In [106]:
unmatched_words_model2 = set([i[0] for i in unmatched_model2])
len(unmatched_words_model2)

209

In [110]:
# Words which got predicted correctly post mophological changes Veterbi
unmatched_words.difference(unmatched_words_model2)

{'115',
 '158,666',
 '176.1',
 '1925',
 '1934',
 '20.07',
 '26,956',
 '3.43',
 '352.7',
 '35500.64',
 '576',
 '6.53',
 '608,413',
 '62.625',
 '63.79',
 '692',
 '7.20',
 '7.84',
 '80.8',
 '81.8',
 '960',
 '967,809',
 'Alexander',
 'Ariail',
 'Assets',
 'Bennett',
 'Buckhead',
 'Carbide',
 'Carnegie-Mellon',
 'Carrier',
 'Cartons',
 'Cerf',
 'Competes',
 'Debt',
 'Default',
 'Destinations',
 'Determining',
 'Discos',
 'Doerflinger',
 'Drink',
 'Dunn',
 'Dynamics',
 'Editorials',
 'Emile',
 'Foreigners',
 'Graham',
 'Green',
 'Gringo',
 'Hans',
 'Helsinki',
 'Henderson',
 'Herald-American',
 'Honolulu',
 'Ian',
 'Joni',
 'Kelli',
 'Kirkpatrick',
 'Leningrad',
 'Leon',
 'Level',
 'Lucille',
 'Manila',
 'McCabe',
 'McFarlan',
 'Metal',
 'Mexican',
 'Mutchin',
 'News-American',
 'Orlando',
 'Performing',
 'Phillip',
 'Rail',
 'Regarded',
 'Revolution',
 'Rock',
 'Schaefer',
 'Schwab',
 'Sherwin',
 'Skokie',
 'Sonny',
 'Squier',
 'Subcontractors',
 'Sumitomo',
 'Terrace',
 'Theodore',
 'Vice'

#### Observation:
The number of incorrect tags reduced from 399 to 245 post modifiations to Vanilla Veterbi.

### Predicting POS tags for the Test Sentences

In [118]:
with open("Test_sentences.txt") as f:
    contents = f.readlines()

In [119]:
sents = [word_tokenize(i.rstrip("\n")) for i in contents]

In [120]:
# Tagging using original Vetribi
start = time.time()
tagged_seq_vanilla = [Viterbi(words) for words in sents]
end = time.time()
difference = end-start

In [121]:
tagged_seq_vanilla

[[('Android', 'ADJ'),
  ('is', 'VERB'),
  ('a', 'DET'),
  ('mobile', 'ADJ'),
  ('operating', 'NOUN'),
  ('system', 'NOUN'),
  ('developed', 'VERB'),
  ('by', 'ADP'),
  ('Google', 'ADJ'),
  ('.', '.')],
 [('Android', 'ADJ'),
  ('has', 'VERB'),
  ('been', 'VERB'),
  ('the', 'DET'),
  ('best-selling', 'ADJ'),
  ('OS', 'ADJ'),
  ('worldwide', 'ADJ'),
  ('on', 'ADP'),
  ('smartphones', 'ADJ'),
  ('since', 'ADP'),
  ('2011', 'ADJ'),
  ('and', 'CONJ'),
  ('on', 'ADP'),
  ('tablets', 'NOUN'),
  ('since', 'ADP'),
  ('2013', 'ADJ'),
  ('.', '.')],
 [('Google', 'ADJ'),
  ('and', 'CONJ'),
  ('Twitter', 'ADJ'),
  ('made', 'VERB'),
  ('a', 'DET'),
  ('deal', 'NOUN'),
  ('in', 'ADP'),
  ('2015', 'ADJ'),
  ('that', 'ADP'),
  ('gave', 'VERB'),
  ('Google', 'ADJ'),
  ('access', 'NOUN'),
  ('to', 'PRT'),
  ('Twitter', 'ADJ'),
  ("'s", 'PRT'),
  ('firehose', 'ADJ'),
  ('.', '.')],
 [('Twitter', 'ADJ'),
  ('is', 'VERB'),
  ('an', 'DET'),
  ('online', 'ADJ'),
  ('news', 'NOUN'),
  ('and', 'CONJ'),
  ('socia

#### Observations:
It can be seen that vanilla Veterbi algorithm is not able to predict the unknow words correctly from the sample text. FOllowing are somre of the examples noticed:
1. Andriod, Google and Twitter are tagged as Adjectives. These should have been marked as Noun.
2. Numeric figures like 2011 are marked as Adjective. These should have been marked as Number.
3. Words like "domineering" etc are marked as Adjective. This shoud have been makred as verb.

In [113]:
# Tagging with modified Veterbi with Morphology
start = time.time()
tagged_seq_verify = [Viterbi_model2(words) for words in sents]
end = time.time()
difference = end-start

In [123]:
print(tagged_seq_verify)
print(difference)

[[('Android', 'NOUN'), ('is', 'VERB'), ('a', 'DET'), ('mobile', 'ADJ'), ('operating', 'NOUN'), ('system', 'NOUN'), ('developed', 'VERB'), ('by', 'ADP'), ('Google', 'NOUN'), ('.', '.')], [('Android', 'NOUN'), ('has', 'VERB'), ('been', 'VERB'), ('the', 'DET'), ('best-selling', 'ADJ'), ('OS', 'NOUN'), ('worldwide', 'NOUN'), ('on', 'ADP'), ('smartphones', 'VERB'), ('since', 'ADP'), ('2011', 'NUM'), ('and', 'CONJ'), ('on', 'ADP'), ('tablets', 'NOUN'), ('since', 'ADP'), ('2013', 'NUM'), ('.', '.')], [('Google', 'NOUN'), ('and', 'CONJ'), ('Twitter', 'NOUN'), ('made', 'VERB'), ('a', 'DET'), ('deal', 'NOUN'), ('in', 'ADP'), ('2015', 'NUM'), ('that', 'ADP'), ('gave', 'VERB'), ('Google', 'NOUN'), ('access', 'NOUN'), ('to', 'PRT'), ('Twitter', 'NOUN'), ("'s", 'PRT'), ('firehose', 'NOUN'), ('.', '.')], [('Twitter', 'NOUN'), ('is', 'VERB'), ('an', 'DET'), ('online', 'NOUN'), ('news', 'NOUN'), ('and', 'CONJ'), ('social', 'ADJ'), ('networking', 'NOUN'), ('service', 'NOUN'), ('on', 'ADP'), ('which', 'D

#### Observations:
It can be noticed that unkwown words like Google, Android, domineering and numeric values like 2011 etc which were marked as Adjective are not correctly tagged as Noun and Verbs.