<a href="https://colab.research.google.com/github/ammaarahmad1999/CS563-NLP-Lab/blob/main/LAB2/Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Upload Folder to colab or mount gdrive
%cp /content/drive/MyDrive/CS563_LAB2_Dataset/* ./

In [None]:
import pandas as pd
import numpy as np
import json
import re
import string
from tqdm import tqdm
from nltk.util import ngrams
from tabulate import tabulate
from collections import Counter, defaultdict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score

In [None]:
def load_dataset(filename):
  text, tags = [], []
  tag_set = set()
  with open(filename) as f:
    lines = f.readlines()
    sent, tag = "", []
    for line in lines:
      item = line.split()
      if(len(item) == 2):
        sent += item[0]+" "
        tag.append(item[1])
        tag_set.add(item[1])
      else:
        text.append(sent)
        tags.append(tag)
        sent, tag = "", []
  text = np.asarray(text, dtype = 'object')
  tags = np.asarray(tags, dtype = 'object')
  return text, tags, tag_set

In [None]:
def generate_transition_matrix(y):
    
    trigram_tags = []
    for tag_list in y:
        tag_list = ["START"] * 3 + tag_list + ["STOP"]
        trigram_tags.extend(ngrams(tag_list, 3))
    trigram_count = dict(Counter(trigram_tags))

    bigram_tags = []
    for tag_list in y:
        tag_list = ["START"] * 2 + tag_list + ["STOP"]
        bigram_tags.extend(ngrams(tag_list, 2))
    bigram_count = dict(Counter(bigram_tags))

    unigram_tags = []
    for tag_list in y:
        tag_list = ["START"] + tag_list + ["STOP"]
        unigram_tags.extend(tag_list)
    unigram_count = dict(Counter(unigram_tags))

    tri_transition_matrix = defaultdict(lambda: 0.0000000001)
    bi_transition_matrix = defaultdict(lambda: 0.0000000001)

    for trigram in trigram_count:
        first, second, third = trigram
        tri_transition_matrix[trigram] = trigram_count[trigram] / bigram_count[(first, second)]
    
    for bigram in bigram_count:
        first, second = bigram
        bi_transition_matrix[bigram] = bigram_count[bigram] / unigram_count[first]

    return tri_transition_matrix, bi_transition_matrix

In [None]:
def generate_emission_matrix(x, y):
    word_tag_count = defaultdict(lambda: 0)
    tag_count = defaultdict(lambda: 0)

    for line, tags in zip(x, y):
        for word, tag in zip(line.split(), tags):
            tag_count[tag] += 1
            word_tag_count[(word, tag)] += 1
    
    emission_matrix = defaultdict(lambda: 0.0000000001)
    
    for word_tag in word_tag_count.keys():
        word, tag = word_tag
        emission_matrix[word_tag] = word_tag_count[word_tag] / tag_count[tag]
    
    word_tag_count = defaultdict(lambda: 0)
    tag_count = defaultdict(lambda: 0)

    for line, tags in zip(x, y):
        tags = ['START'] + tags
        for i, word in enumerate(line.split()):
            tag_count[(tags[i], tags[i+1])] += 1
            word_tag_count[(word, tags[i], tags[i+1])] += 1

    emission_context = defaultdict(lambda: 0.0000000001)

    for word_tag in word_tag_count.keys():
        word, tag1, tag2 = word_tag
        emission_context[word_tag] = word_tag_count[word_tag] / tag_count[(tag1, tag2)]

    return emission_matrix, emission_context

In [None]:
def collapse_to_3_tags(y_train, y_valid, y_test):
    
    y_train_new = [[tag[0] for tag in tag_seq] for tag_seq in y_train]
    y_valid_new = [[tag[0] for tag in tag_seq] for tag_seq in y_valid]
    y_test_new = [[tag[0] for tag in tag_seq] for tag_seq in y_test]

    return y_train_new, y_valid_new, y_test_new 

In [None]:
def evaluate_accuracy_metrics(correct, total):
    accuracy = sum(x for x in correct.values()) / sum(x for x in total.values())
    
    classwise_accuracy = {}


    for tag in sorted(total.keys()):
        classwise_accuracy[tag] = correct[tag] / total[tag]
    
    print(f'\nHMM Model Accuracy = {accuracy}\n')
    print('Class-wise Accuracies \n')
    print(tabulate(zip(classwise_accuracy.keys(), classwise_accuracy.values()),
                   headers=['Class (Tag)', 'Accuracy'],
                   tablefmt='orgtbl'))
    
    df = pd.DataFrame.from_dict(classwise_accuracy, orient='index')
    
    return df, accuracy

In [None]:
def kappa(position, all_tags):
    return all_tags if position not in [0, -1] else ['START']

In [None]:
def trigram_viterbi_model(sentence, transition, emission, all_tags):
    sentence = sentence.split()
    pi = defaultdict(lambda: 0)
    bp = defaultdict(lambda: "OTH")
    pi[(0, 'START', 'START')] = 1.0

    n = len(sentence)

    for k in range(1, n + 1):
        u_set = kappa(k - 1, all_tags)
        v_set = kappa(k, all_tags)
        w_set = kappa(k - 2, all_tags)

        for v in v_set:
            for u in u_set:
                for w in w_set:
                    reach_prob = pi[(k - 1, w, u)] * transition[(w, u, v)] * emission[(sentence[k - 1], v)]
                    if reach_prob > pi[(k, u, v)]:
                        pi[(k, u, v)] = reach_prob
                        bp[(k, u, v)] = w
    
    u_set = kappa(n - 1, all_tags)
    v_set = kappa(n, all_tags)
    result_tags = []
    for u in u_set:
        for v in v_set:
            if len(result_tags) == 0:
                result_tags = [v, u]
            if pi[(n, u, v)] * transition[(u, v, 'STOP')] > \
            pi[(n, result_tags[1], result_tags[0])] * transition[result_tags[1], result_tags[0], 'STOP']:
                result_tags = [v, u]
    
    for k in range(n - 2, 0, -1):
        result_tags.append(bp[(k + 2, result_tags[-1], result_tags[-2])])
    
    result_tags.reverse()

    return result_tags

In [None]:
def bigram_viterbi_model(sentence, transition, emission, all_tags):
    sentence = sentence.split()
    pi = defaultdict(lambda: 0)
    bp = defaultdict(lambda: "OTH")
    pi[(0, 'START')] = 1.0

    n = len(sentence)

    for k in range(1, n + 1):
        u_set = kappa(k - 1, all_tags)
        v_set = kappa(k, all_tags)

        for v in v_set:
            for u in u_set:
                reach_prob = pi[(k - 1, u)] * transition[(u, v)] * emission[(sentence[k - 1], v)]
                if reach_prob > pi[(k, v)]:
                    pi[(k, v)] = reach_prob
                    bp[(k, v)] = u
    
    #u_set = kappa(n - 1, all_tags)
    
    v_set = kappa(n, all_tags)
    result_tags = []
    for v in v_set:
        if len(result_tags) == 0:
            result_tags = [v]
        if pi[(n, v)] * transition[(v, 'STOP')] > \
        pi[(n, result_tags[0])] * transition[result_tags[0], 'STOP']:
                result_tags = [v]
    
    for k in range(n - 1, 0, -1):
        result_tags.append(bp[(k + 1, result_tags[-1])])
    
    result_tags.reverse()

    return result_tags

In [None]:
def tricontext_viterbi_model(sentence, transition, emission, all_tags):
    sentence = sentence.split()
    pi = defaultdict(lambda: 0)
    bp = defaultdict(lambda: "OTH")
    pi[(0, 'START', 'START')] = 1.0

    n = len(sentence)

    for k in range(1, n + 1):
        u_set = kappa(k - 1, all_tags)
        v_set = kappa(k, all_tags)
        w_set = kappa(k - 2, all_tags)

        for v in v_set:
            for u in u_set:
                for w in w_set:
                    reach_prob = pi[(k - 1, w, u)] * transition[(w, u, v)] * emission[(sentence[k - 1], u, v)]
                    if reach_prob > pi[(k, u, v)]:
                        pi[(k, u, v)] = reach_prob
                        bp[(k, u, v)] = w
    
    u_set = kappa(n - 1, all_tags)
    v_set = kappa(n, all_tags)
    result_tags = []
    for u in u_set:
        for v in v_set:
            if len(result_tags) == 0:
                result_tags = [v, u]
            if pi[(n, u, v)] * transition[(u, v, 'STOP')] > \
            pi[(n, result_tags[1], result_tags[0])] * transition[result_tags[1], result_tags[0], 'STOP']:
                result_tags = [v, u]
    
    for k in range(n - 2, 0, -1):
        result_tags.append(bp[(k + 2, result_tags[-1], result_tags[-2])])
    
    result_tags.reverse()

    return result_tags

In [None]:
def bicontext_viterbi_model(sentence, transition, emission, all_tags):
    sentence = sentence.split()
    pi = defaultdict(lambda: 0)
    bp = defaultdict(lambda: "OTH")
    pi[(0, 'START')] = 1.0

    n = len(sentence)

    for k in range(1, n + 1):
        u_set = kappa(k - 1, all_tags)
        v_set = kappa(k, all_tags)

        for v in v_set:
            for u in u_set:
                reach_prob = pi[(k - 1, u)] * transition[(u, v)] * emission[(sentence[k - 1], u, v)]
                if reach_prob > pi[(k, v)]:
                    pi[(k, v)] = reach_prob
                    bp[(k, v)] = u
    
    #u_set = kappa(n - 1, all_tags)
    
    v_set = kappa(n, all_tags)
    result_tags = []
    for v in v_set:
        if len(result_tags) == 0:
            result_tags = [v]
        if pi[(n, v)] * transition[(v, 'STOP')] > \
        pi[(n, result_tags[0])] * transition[result_tags[0], 'STOP']:
                result_tags = [v]
    
    for k in range(n - 1, 0, -1):
        result_tags.append(bp[(k + 1, result_tags[-1])])
    
    result_tags.reverse()

    return result_tags

In [None]:
def test_and_evaluate(x, y, transition, emission, all_tags, order = "tri", context = "no"):
    correct_predictions = defaultdict(lambda: 0)
    tag_count = defaultdict(lambda: 0)

    print(f'Evaluating {len(x)} sentences.\n')

    labels = []
    predictions = []

    for sentence, actual_tag_sequence in tqdm(zip(x, y), total=len(x)):
        if (order == "tri" and context == "no") :
            pred_tag_sequence = trigram_viterbi_model(sentence, transition, emission, all_tags)
        elif (order == "bi" and context == "no") :
            pred_tag_sequence = bigram_viterbi_model(sentence, transition, emission, all_tags)
        elif (order == "tri" and context == "yes") :
            pred_tag_sequence = tricontext_viterbi_model(sentence, transition, emission, all_tags)
        else :
            pred_tag_sequence = bicontext_viterbi_model(sentence, transition, emission, all_tags)
        
        labels.extend(actual_tag_sequence)
        predictions.extend(pred_tag_sequence)
        
        for predicted, actual in zip(pred_tag_sequence, actual_tag_sequence):
            correct_predictions[actual] += predicted == actual
            tag_count[actual] += 1
    
    df, accuracy = evaluate_accuracy_metrics(correct_predictions, tag_count)   

    return df, accuracy, labels, predictions

In [None]:
def HMM(x_train, y_train, x_valid, y_valid, x_test, y_test, tag_set):

    if(len(tag_set) == 3):
        y_train, y_valid, y_test = collapse_to_3_tags(y_train, y_valid, y_test)

    all_tags = ['START'] + list(tag_set) + ['STOP']

    emission_matrix, emission_context = generate_emission_matrix(x_train, y_train)
    tri_transition_matrix, bi_transition_matrix = generate_transition_matrix(y_train)


    print("\n\nTrigram Model with No Context Validation Results")
    _, acc1, _, _ = test_and_evaluate(x_valid, y_valid, tri_transition_matrix, emission_matrix, all_tags, order = "tri", context = "no")

    print("\n\nBigram Model with No Context Validation Results")
    _, acc2, _, _ = test_and_evaluate(x_valid, y_valid, bi_transition_matrix, emission_matrix, all_tags, order = "bi", context = "no")

    print("\n\nTrigram Model with Context Validation Results")
    _, acc3, _, _ = test_and_evaluate(x_valid, y_valid, tri_transition_matrix, emission_context, all_tags, order = "tri", context = "yes")

    print("\n\nBigram Model with Context Validation Results")
    _, acc4, _, _ = test_and_evaluate(x_valid, y_valid, bi_transition_matrix, emission_context, all_tags, order = "bi", context = "yes")
    
    accuracy = max(acc1, acc2, acc3, acc4)

    print("\n\nBest Model Test Dataset Results\n")
    if(accuracy == acc1):
      df, acc, labels, predict = test_and_evaluate(x_test, y_test, tri_transition_matrix, emission_matrix, all_tags, order = "tri", context = "no")
    elif(accuracy == acc2):
      df, acc, labels, predict = test_and_evaluate(x_test, y_test, bi_transition_matrix, emission_matrix, all_tags, order = "bi", context = "no")
    elif(accuracy == acc3):
      df, acc, labels, predict = test_and_evaluate(x_test, y_test, tri_transition_matrix, emission_context, all_tags, order = "tri", context = "yes")
    else:
      df, acc, labels, predict = test_and_evaluate(x_test, y_test, bi_transition_matrix, emission_context, all_tags, order = "bi", context = "yes")
    
    return df

In [None]:
def main():

    x_train, y_train, tag_set = load_dataset("train.txt")
    x_valid, y_valid, _ = load_dataset("dev.txt")
    x_test, y_test, _ = load_dataset("test.txt")
    
    # For all the tags
    print('-' * 80)
    print('HMM for 3 tags : ')
    tag_set = set(['B', 'I', 'O'])
    df = HMM(x_train, y_train, x_valid, y_valid, x_test, y_test, tag_set)
    df.to_csv("HMM_3_results.csv")
    print('-' * 80)

if __name__ == "__main__":
    main()

--------------------------------------------------------------------------------
HMM for 3 tags : 


Trigram Model with No Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [00:01<00:00, 520.54it/s]



HMM Model Accuracy = 0.9037574564909907

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B             |   0.20121  |
| I             |   0.117773 |
| O             |   0.9587   |


Bigram Model with No Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [00:00<00:00, 2452.22it/s]



HMM Model Accuracy = 0.9015435705061189

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B             |   0.2118   |
| I             |   0.100642 |
| O             |   0.956387 |


Trigram Model with Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [00:02<00:00, 454.28it/s]



HMM Model Accuracy = 0.912121025767173

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B             |   0.192133 |
| I             |   0.130621 |
| O             |   0.967687 |


Bigram Model with Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [00:00<00:00, 1992.08it/s]



HMM Model Accuracy = 0.9114445606051288

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B             |   0.193646 |
| I             |   0.122056 |
| O             |   0.967158 |


Best Model Test Dataset Results

Evaluating 3849 sentences.



100%|██████████| 3849/3849 [00:08<00:00, 466.42it/s]


HMM Model Accuracy = 0.8859377019516609

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B             |   0.183703 |
| I             |   0.130137 |
| O             |   0.963068 |
61896 60033
--------------------------------------------------------------------------------





In [None]:
df = pd.read_csv("HMM_3_results.csv")
df.columns = ['POS', 'Probability']
df.to_csv("HMM_4_results.csv", index = None)
df

Unnamed: 0,POS,Probability
0,B,0.183703
1,I,0.130137
2,O,0.963068


In [None]:
def main():

    x_train, y_train, tag_set = load_dataset("train.txt")
    x_valid, y_valid, _ = load_dataset("dev.txt")
    x_test, y_test, _ = load_dataset("test.txt")

    # For all the tags
    print('-' * 80)
    print('HMM for 21 tags : ')
    df = HMM(x_train, y_train, x_valid, y_valid, x_test, y_test, tag_set)
    df.to_csv("HMM_21_results.csv")

    print('-' * 80)

if __name__ == "__main__":
    main()

--------------------------------------------------------------------------------
HMM for 21 tags : 


Trigram Model with No Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [02:39<00:00,  6.28it/s]



HMM Model Accuracy = 0.9002521370149438

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B-company     |  0.153846  |
| B-facility    |  0.0526316 |
| B-loc         |  0.284483  |
| B-movie       |  0         |
| B-musicartist |  0.0243902 |
| B-other       |  0.106061  |
| B-person      |  0.157895  |
| B-product     |  0.189189  |
| B-sportsteam  |  0.0285714 |
| B-tvshow      |  0         |
| I-company     |  0         |
| I-facility    |  0.102564  |
| I-loc         |  0.166667  |
| I-movie       |  0         |
| I-musicartist |  0.0285714 |
| I-other       |  0.0412371 |
| I-person      |  0.0842105 |
| I-product     |  0.0165289 |
| I-sportsteam  |  0.0769231 |
| O             |  0.959492  |


Bigram Model with No Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [00:06<00:00, 154.08it/s]



HMM Model Accuracy = 0.8979152573642457

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B-company     | 0.153846   |
| B-facility    | 0.0526316  |
| B-loc         | 0.284483   |
| B-movie       | 0          |
| B-musicartist | 0.0243902  |
| B-other       | 0.106061   |
| B-person      | 0.175439   |
| B-product     | 0.189189   |
| B-sportsteam  | 0.0285714  |
| B-tvshow      | 0          |
| I-company     | 0          |
| I-facility    | 0.102564   |
| I-loc         | 0.166667   |
| I-movie       | 0          |
| I-musicartist | 0.0285714  |
| I-other       | 0.0412371  |
| I-person      | 0.0631579  |
| I-product     | 0.00826446 |
| I-sportsteam  | 0.0769231  |
| O             | 0.956981   |


Trigram Model with Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [02:50<00:00,  5.86it/s]



HMM Model Accuracy = 0.9095381587848226

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B-company     | 0.153846   |
| B-facility    | 0.0526316  |
| B-loc         | 0.25       |
| B-movie       | 0          |
| B-musicartist | 0.0243902  |
| B-other       | 0.0833333  |
| B-person      | 0.128655   |
| B-product     | 0.108108   |
| B-sportsteam  | 0          |
| B-tvshow      | 0          |
| I-company     | 0          |
| I-facility    | 0.0769231  |
| I-loc         | 0.166667   |
| I-movie       | 0          |
| I-musicartist | 0          |
| I-other       | 0.0412371  |
| I-person      | 0.136842   |
| I-product     | 0.00826446 |
| I-sportsteam  | 0          |
| O             | 0.970528   |


Bigram Model with Context Validation Results
Evaluating 1000 sentences.



100%|██████████| 1000/1000 [00:09<00:00, 107.74it/s]



HMM Model Accuracy = 0.9094151651189963

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B-company     |  0.153846  |
| B-facility    |  0.0263158 |
| B-loc         |  0.25      |
| B-movie       |  0         |
| B-musicartist |  0.0243902 |
| B-other       |  0.0833333 |
| B-person      |  0.140351  |
| B-product     |  0.108108  |
| B-sportsteam  |  0         |
| B-tvshow      |  0         |
| I-company     |  0         |
| I-facility    |  0.0512821 |
| I-loc         |  0.190476  |
| I-movie       |  0         |
| I-musicartist |  0         |
| I-other       |  0.0515464 |
| I-person      |  0.147368  |
| I-product     |  0         |
| I-sportsteam  |  0         |
| O             |  0.970264  |


Best Model Test Dataset Results

Evaluating 3849 sentences.



100%|██████████| 3849/3849 [11:26<00:00,  5.61it/s]



HMM Model Accuracy = 0.8806061781052087

Class-wise Accuracies 

| Class (Tag)   |   Accuracy |
|---------------+------------|
| B-company     |  0.0772947 |
| B-facility    |  0.0632411 |
| B-loc         |  0.226757  |
| B-movie       |  0         |
| B-musicartist |  0.0052356 |
| B-other       |  0.0273973 |
| B-person      |  0.136929  |
| B-product     |  0.0203252 |
| B-sportsteam  |  0.0204082 |
| B-tvshow      |  0         |
| I-company     |  0.045283  |
| I-facility    |  0.103825  |
| I-loc         |  0.0913242 |
| I-movie       |  0         |
| I-musicartist |  0.0142857 |
| I-other       |  0.0647482 |
| I-person      |  0.103333  |
| I-product     |  0.002     |
| I-sportsteam  |  0         |
| I-tvshow      |  0         |
| O             |  0.965499  |
--------------------------------------------------------------------------------


In [None]:
df = pd.read_csv("HMM_21_results.csv")
df.columns = ['POS', 'Probability']
df.to_csv("HMM_21_results.csv", index = None)
df