In [1]:
import string
import time
import random
from nltk.util import ngrams
from nltk.corpus import stopwords
import nltk
from collections import OrderedDict
from collections import Counter
from collections import defaultdict


## Preprocessing the tokens

In [2]:
def preprocess(tokenList):
    i=0
    for word1 in tokenList:
    #conversion into lowercase
        word1=word1.lower()
    #Takes Care of Multiple Punctuation Marks
        word1=word1.replace('.','').replace(',','').replace(':','').replace(';','').replace('!','').replace('?','').replace('(','').replace(')','').replace('-','').replace('_','').replace('\\',' ').replace('\"',' ').replace('\'',' ')      
    
        tokenList[i]=word1        
        i=i+1

## Creating unigram,bigram,trigram,quadgram dictionaries

In [3]:
def unigramize(unigrams):
    od=Counter()
    for item in unigrams:
        od[item]+=1
    return od

def bigramize(bigrams):
    od2=Counter()
    for item in bigrams:
        od2[item]+=1
    return od2

def trigramize(trigrams):
    od3=Counter()
    for item in trigrams:
        od3[item]+=1
    return od3

def quadgramize(quadgrams):
    od4=Counter()
    for item in quadgrams:
        od4[item]+=1
    return od4

## Training the language model
Performing necessary modifications and training the langugage model with the corpus

In [4]:
def i_unigrams(unigrams,unigram):
    for uni in unigrams:
        stri = ''.join(uni)
        if not unigram or stri not in unigram:
            unigram[stri] = 1;
        else:
            unigram[stri] = unigram[stri] + 1
            
def i_bigrams(bigrams,bigram):
    for bi in bigrams:
        stri = ' '.join(bi)
        if not bigram or stri not in bigram:
            bigram[stri] = 1;
        else:
            bigram[stri] = bigram[stri] + 1
            

def i_trigrams(trigrams,trigram):
    for tri in trigrams:
        stri = ' '.join(tri)
        if not trigram or stri not in trigram:
            trigram[stri] = 1;
        else:
            trigram[stri] = trigram[stri] + 1
            
def i_quadgrams(quadgrams,quadgram):
    for quad in quadgrams:
        stri = ' '.join(quad)
        if not quadgram or stri not in quadgram:
            quadgram[stri] = 1;
        else:
            quadgram[stri] = quadgram[stri] + 1
            
def create_token_list(tokens,vocab):
    newlist=[]
    for word in tokens:
        if word:
            if not vocab or word not in vocab: 
                vocab[word] = len(vocab)
            newlist.append(str(vocab[word]))
    return newlist

def remove_punc(text):
    for c in string.punctuation:
        if c!='\'':
            text=text.replace(c," ")
    text = text.replace("' "," ")
    text = text.replace("\n"," ")
    answer = text.lower()
    return answer

def tokenize(text, ngrams,vocab):
    clean_text = remove_punc(text)
    tokens = create_token_list(clean_text.split(),vocab)
    #print(tokens)
    return [tuple(tokens[i:i+ngrams]) for i in range(len(tokens)-ngrams+1)]

def train_data(unigram,bigram,trigram,quadgram,vocab):
    l = 0
    with open('training_corpus.txt',buffering=20000,encoding='latin1') as f:
        for line in f:
            data = tokenize(line,1,vocab)
            i_unigrams(data,unigram)
            l = l + len(data)
            data = tokenize(line,2,vocab)
            i_bigrams(data,bigram)
            data = tokenize(line,3,vocab)
            i_trigrams(data,trigram)
            data = tokenize(line,4,vocab)
            i_quadgrams(data,quadgram)
    f.close()
    return l

## Creating probability dictionaries

In [5]:
def bi_probability(unigram,bigram,bi_probab):
    #creating bigram probability table
    for bi in bigram:
        s = bi.split()
        w = s[1]
        stri = s[0]
        p = float(bigram[bi])/unigram[stri]
        if stri in bi_probab:
            if w not in bi_probab[stri]:
                bi_probab[stri][w] = p
                d = OrderedDict(sorted(bi_probab[stri].items(), key=lambda t: t[1]))
                bi_probab[stri] = d
        else:
            bi_probab[stri] = {}
            bi_probab[stri][w] = p

In [6]:
def tri_probability(bigram,trigram,tri_probab):
    #creating trigram probability table
    for tri in trigram:
        s = tri.split()
        w = s[-1]
        stri = ' '.join(s[0:2])
        p = float(trigram[tri])/bigram[stri]
        if stri in tri_probab:
            if w not in tri_probab[stri]:
                tri_probab[stri][w] = p
                d = OrderedDict(sorted(tri_probab[stri].items(), key=lambda t: t[1]))
                tri_probab[stri] = d
        else:
            tri_probab[stri] = {}
            tri_probab[stri][w] = p

In [7]:
def quad_probability(vocab,bi_probab,tri_probab,unigram,bigram,trigram,quadgram,probab,tokens):
    
    #getting the parameters of interpolation by performing grid search
    l = grid_search(vocab,bi_probab,tri_probab,unigram,bigram,trigram,quadgram,tokens)
    #lambda1 = 0.25
    #lambda2 = 0.25
    #lambda3 = 0.25
    #lambda4 = 0.25
    lambda1 = l[0]
    lambda2 = l[1]
    lambda3 = l[2]
    lambda4 = l[3]
    #quadgram probability to be found out after interpolation
    for quad in quadgram:
        s = quad.split()
        w = s[-1]
        stri = ' '.join(s[0:3])
        p = interpolation(quad,stri,s,quadgram,trigram,bigram,unigram,tokens,lambda1,lambda2,lambda3,lambda4)
        if stri in probab:
            if w not in probab[stri]:
                probab[stri][w] = p
                d = OrderedDict(sorted(probab[stri].items(), key=lambda t: t[1]))
                probab[stri] = d
        else:
            probab[stri] = {}
            probab[stri][w] = p

## Creating quadgram probability table and predicting the word

In [8]:
def quad_prob(trigrams,quadgrams):
    
    tri_freq=Counter(trigrams)
    quad_freq=Counter(quadgrams)
    #print (tri_freq.items())
    #creating the quadram probabilty table for each word given its trigram context
    for item in quad_freq:
        tri=item[0:3]
        quad_freq[item]=(quad_freq[item]/tri_freq[tri])
        
    return quad_freq

def pred_table(quad_prob_table):
    quad_pred_table=defaultdict(dict)
    #creating the prediction table with all the probable words after a given trigram
    for quad in quad_prob_table:
        prob=quad_prob_table[quad]
        tri=quad[0:3]
        token=quad[3]
        quad_pred_table[tri][token]=prob
    #sorting so tht we can find the most probable word in O(1) time     
    for tri in quad_pred_table:
        quad_pred_table[tri]=sorted(quad_pred_table[tri].items(), key=lambda x: x[1], reverse=True)
        
        return quad_pred_table
        


## Interpolation using given weights

In [9]:
def interpolation_table(od,od2,od3,od4):
    
    pole=defaultdict(dict)
    
    i=0
    s=0.0
    lambda1=0.25
    lambda2=0.25
    lambda3=0.25
    lambda4=0.25
    
    #Simple interpolation using given weights
    for item,value in od4.items():
        #if i>100:
         #   break    
        p=lambda1*float((value)/od3[item[0:3]]) + lambda2*float((od3[item[1:4]])/(od2[item[1:3]])) + lambda3*float((od2[item[2:4]])/(od[item[2]]+1)) +lambda4*(float((od[item[3]]+1)))
        pole[item[0:3]][item[3]]=p    
        
    for tri in pole:
        pole[tri]=sorted(pole[tri].items(), key=lambda x: x[1], reverse=True)
        
    return pole

## Interpolation using customised weights

In [10]:
def cus_interpolation_table(vocab,bi_probab,tri_probab,unigram,bigram,trigram,quadgram,tokens):

    pole=defaultdict(dict)
    pmax=0
    i=0
                         
    for item,value in od4.items(): 
        
        for i in range (10):
            #finding the weights randomly
            #l=weights(l)
            #finding the weights by tuning the parameters using grid search
            l=grid_search(vocab,bi_probab,tri_probab,unigram,bigram,trigram,quadgram,tokens)
            p=l[0]*float((value)/od3[item[0:3]]) + l[1]*float((od3[item[1:4]])/(od2[item[1:3]])) + l[2]*float((od2[item[2:4]])/(od[item[2]]+1)) +l[3]*(float((od[item[3]]+1)))
            #max probabilty case is considered
            if(p>pmax):
                pole[item[0:3]][item[3]]=p
                pmax=p
                                
    
    for tri in pole:
        pole[tri]=sorted(pole[tri].items(), key=lambda x: x[1], reverse=True)
        
    return pole

## Partitioning testing corpus to create the held-out corpus

In [11]:
#loads the testing corpus and partitions it into equal halves to create testing corpus and held-out corpus
def partitionFile(file_path,word_count):
    token = []
    word_count = int(word_count*0.5)
      
    pos = 0
    word_len = 0

    #open the corpus file and read it line by line
    file = open(file_path,'r')
    test_file = open('testing_corpus.txt','w')
    held_file = open('heldset_corpus.txt','w') 

    line = file.readline()

    while line:
        #split the line into tokens
        token = line.split()

        #write the line to the training file
        test_file.write(line)

        word_len = word_len + len(token)  
            
        #quit training when 90% of the corpus has been read
        if word_len >= word_count:
                pos = file.tell()
                break;
            
        line = file.readline()

    #Prepare the testing data
    if word_count <= word_len:
        file.seek(pos)
        test_data = file.read();
        held_file.write(test_data)
            
    file.close()
    test_file.close()
    held_file.close()

## Finding the weights using grid search

We find the weights of interpolation using grid search. For each set of lambda values, we find the score using our held out corpus. The set of lambda values giving us the maximised score is the one which we use for our interpolation.


In [12]:
def interpolation(quad,stri,s,quadgram,trigram,bigram,unigram,tokens,lambda1,lambda2,lambda3,lambda4):
    
    #computing the interpolated probability using all n-grams
    p = ( lambda1 * (quadgram[quad]/trigram[stri])) 
    + (lambda2 * (trigram[' '.join(s[1:4])]/bigram[' '.join(s[1:3])])) 
    + (lambda3 * (bigram[' '.join(s[2:4])]/unigram[str(s[2])])) 
    + (lambda4 * (unigram[str(s[3])]/tokens))
    
    return p

def grid_search(vocab,bi_probab,tri_probab,unigram,bigram,trigram,quadgram,tokens):
    
    lambda1 = 0
    lambda2 = 0
    lambda3 = 0
    max_score = 0
    i=0
    while i<=1:
        j=0
        while j<=1:
            k=0
            while k<=1:
                if (i+j+k)<=1:
                    probab_dict = {}
                    for quad in quadgram:
                        s = quad.split()
                        w = s[-1]
                        stri = ' '.join(s[0:3])
                        #considering i,j,k,(1-i-j-k) as our lambda values, we find interpolation probabilty
                        p = interpolation(quad,stri,s,quadgram,trigram,bigram,unigram,tokens,i,j,k,(1-i-j-k))
                        if stri in probab_dict:
                            if w not in probab_dict[stri] :
                                probab_dict[stri][w] = p
                                d = OrderedDict(sorted(probab_dict[stri].items(), key=lambda t: t[1]))
                                probab_dict[stri] = d
                        else:
                            probab_dict[stri] = {}
                            probab_dict[stri][w] = p
                    #we find out the set of lambda for which the interpolation score is maximun for the held out corpus
                    score = interpol_score('held_out_corpus.txt',bi_probab,tri_probab,probab_dict,vocab)
                    #print(i,j,k,score,max_score)
                    if score > max_score:
                        lambda1 = i
                        lambda2 = j
                        lambda3 = k
                        max_score = score
                k = k + 0.1
            j = j + 0.1
        i = i + 0.1
        
    l=[]
    l.append(lambda1)
    l.append(lambda2)
    l.append(lambda3)
    l.append(1-lambda1-lambda2-lambda3)
    return (l)


def interpol_score(file_name,bi_probab,tri_probab,probab,vocab):
    score = 0
    l = list(vocab.keys())
    #opening the file, here held out corpus, with which we want to tune our interpolation parameters
    with open(file_name,buffering=20000,encoding='latin1') as f:
        for line in f:
            text = remove_punc(line)
            tokens = text.split()
            data = [tuple(tokens[i:i+4]) for i in range(len(tokens)-4+1)]            
            for quad in data:
                counts = []
                flag = 0
                for ele in quad:
                    if ele in vocab:
                        counts.append(str(vocab[ele]))
                    else:
                        flag = 1
                        break
                if flag == 0:
                    tri = ' '.join(counts).split()
                    w = tri[-1]
                    del tri[-1]
                    s = ' '.join(tri)
                    #if predicted word is same as the word in corpus, we add 1 to our score
                    predict = predictWord(s,bi_probab,tri_probab,probab,vocab)
                    if w == predict:
                        score = score + 1
    #file closed                    
    f.close()
    return score

def predictWord(s,bi_probab,tri_probab,probab,vocab):
    predict = ""
    l = s.split()
    tri = ' '.join(s[1:3])
    bi = s[2]
    #backoff principle used for prediction here
    #if string exists in quadgram probability dictionary
    if s in probab:
        predict = list(probab[s].keys())[-1]
    #backing off to trigram probability dictionary
    elif tri in tri_probab:
        predict = list(tri_probab[tri].keys())[-1]
    #backing off to bigram probability dictionary
    elif bi in bi_probab:
        predict = list(bi_probab[bi].keys())[-1]
    return predict


## Finding the weights using Random Search

In [47]:
def weights(l):
    
    #finding lambda1,lambda2 and lambda3 as any random value between 0.01-0.33
    i=float("%0.2f" %(random.uniform(0.01,0.33)))
    j=float("%0.2f" %(random.uniform(0.01,0.33)))
    k=float("%0.2f" %(random.uniform(0.01,0.33)))
    m=1-(i+j+k)
    l.append(i)
    l.append(j)
    l.append(k)
    l.append(m)
    
                            
    return l
                            

## Calculating score of the language model

In [None]:
def scoreCalc(quad,tri,tokenList2):
    score=0
    scorepred=OrderedDict()
    scorepred=OrderedDict.fromkeys(tokenList2,0)

    #if predicted word is present in the vocabulary, we add 1 to our score
    for item in quad:
        if item[0:3] in tri:
            scorepred[item[3]]+=1
        v=list(scorepred.values())
        k=list(scorepred.keys())
        if (k[v.index(max(v))]==item[3]):
            score+=1

    return score

## Add-K smoothing
Simply adding k to the numerator (count of words) and kV to the denominator (count of words) while computing probability.
THe value of k is taken as user input.

In [15]:
def trismoothk(tokens,tri,trigramSet2,smooth_tri,k):
    
    #Add k Smoothing for trigram model
    
    i=len(trigramSet2)
    for item in trigramSet2:
        smooth_tri[item]=tri.count(item) + k/float((tokens.count(item[0:2])+ i))
        
def quadsmoothk(tokens,quad,quadgramSet2,smooth_quad,k):
    
    #Add k Smoothing for quadgram model
    
    i=len(quadgramSet2)
    for item in quadgramSet2:
        smooth_quad[item]=quad.count(item) + k/float((tokens.count(item[0:3])+ i))
          
        

## Good Turing Smoothing
Here we use the count of words we have seen once to predict the words we have not even seen once. Thus for a word which has occured 'c' times, we use the count of words which has occured 'c+1' times, and so on.

In [17]:
def goodturing(quadgramSet2,quadgramcount,wcount):
    
    c=0
    c_star=0
    n=len(quadgramSet2)
    
    wprob=defaultdict(dict)
    
    freq = {}
    #storing the frequencies of count of ngrams
    for item in wcount:
        freq[wcount[item]]=0
    for item in wcount:
        freq[wcount[item]]=freq[wcount[item]]+1
    #storing frequencies of count of ngrams in the test set        
    for quad in quadgramcount:
        if quadgramcount[quad] not in freq:
            val = 0
            for four in quadgramcount:
                if quadgramcount[quad] == quadgramcount[four]:
                    val = val +1
            freq[quadgramcount[quad]] = val
            
    for item in wcount:
        if (wcount[item]==0):
            wcount[item]=float(freq[1]/n)
    for item in freq:
        if(freq[item]==0):
            freq[item]=1
    
    #computing revised count for each word given its trigram context
    for item in quadgramSet2:
        c=wcount[item[3]]
        if c not in freq.keys():
            freq[c]=1
        if (c+1) not in freq.keys():
            freq[c+1]=freq[c]+1
            #print (freq[c])
            #print (freq[c+1])
        c_star=(c+1)*freq[c+1]/freq[c]
        tri=item[0:3]
        prob=c_star/n
        wprob[tri][item[3]]=prob
    
    #sorting the smoothed dictionary    
    for tri in wprob:
        wprob[tri]=sorted(wprob[tri].items(), key=lambda x: x[1], reverse=True)
        
            
    return wprob
            
            
            
        

## Kneser Ney Smoothing
Here we put two ideas into use -- one, we subtract a specific given discount from our n-gram counts and two, we use the continuation probability of the word for the given context.
Weight for the continuation probability is calculated from the discount and continuation counts.

In [41]:
def kney(trigramCount,quadgramCount):
    
    kneser=defaultdict(dict)
    d=0.75
    lambda1=0
    tri_context_count=''
    word_count=''
    
    for item,val in quadgramCount.items():
        if trigramCount[item[0:3]]==0 :
            trigramCount[item[0:3]]=1
        if quadgramCount[item]==0 :
            quadgramCount[item]=1
        #calculation of discounted probability
        p_disc=float((quadgramCount[item]-d)/trigramCount[item[0:3]])
        #for tri,val2 in od3.items():
         #   if (tri[2]==item[3]):
        tri_context_count=trigramCount[item[0:3]]
        word_count=quadgramCount[item]
        lambda1=float(d/tri_context_count)
        #calculation of continuation probability
        p_cont=float(lambda1*tri_context_count/word_count)
        prob=p_disc+p_cont
        #print (item[0])
        kneser[item[0:3]][item[3]]=prob
     
    #sorting the smoothed dictionary
    for tri in kneser:
        kneser[tri]=sorted(kneser[tri].items(), key=lambda x: x[1], reverse=True)
        
    return kneser
        
    

## Perplexity
A measure to judge how effective the language model is after performing smoothing.

In [19]:
def quadperp(tokenList2,smooth_quad,smooth_tri,quadgramSet2):
    
    #computing quadgram perplexity
    n= len (tokenList2)
    perplexity4=1.0
    
    for item in quadgramSet2:
        perplexity4=perplexity4*(((1/float(smooth_quad[item]))*smooth_tri[item[0:3]])**(1./n))
        
    print ("Quadgram Perplexity = %f" %(perplexity4))


## Back-Off
Trigrams from testing corpus have been taken to predict the most probable word following each trigram using backoff. Probability with quadgram, trigram or bigram is displayed correspondingly.

In [20]:
def backoff(tokens,test_quadgrams,train_quadgrams,train_trigrams,train_bigrams,train_unigrams):
    
    for item in test_quadgrams:
        print (item)
        if(train_quadgrams[item]>0):
            print ("probability with quadgram %f"
                  %((train_quadgrams[item])/float(train_trigrams[item[0:3]])))
        
        elif(train_trigrams[item[0:3]]>0):
            print ("probability with trigram %f"
                  %((train_trigrams[item[1:4]])/float(train_bigrams[item[1:3]])))
            
        elif(train_bigrams[item[1:3]]>0):
            print ("probability with bigram %f"
                  %((train_bigrams[item[2:4]])/float(train_unigrams[item[2]]+1)))
        elif(train_bigrams[item[2:4]]<=0):
            print ("Probability with unigram %f" %((train_unigrams[item[3]])/float(len(tokens))))
                    
                
        

## Main function calling all modules

In [45]:
def main():
    
    #opening the training corpus
    f=open('Data/LanguageModels/training_corpus.txt','r',encoding='latin1')
    content=f.read()
    token=content.split()

    #splitting into tokens
    tokenList=list(token)
    tokenSet=set(tokenList)
    vocab = {}
    probab = {}
    tri_probab = {}
    bi_probab = {}
    unigram = {}
    bigram = {}
    trigram = {}
    quadgram = {}
    time1=time.time()
    n = train_data(unigram,bigram,trigram,quadgram,vocab)
    bi_probability(unigram,bigram,bi_probab)
    tri_probability(bigram,trigram,tri_probab)
    quad_probability(vocab,bi_probab,tri_probab,unigram,bigram,trigram,quadgram,probab,n)
    print ("Time taken for training the model and parameter tuning: ", time.time()-time1)

    #listing the tokens into n-grams
    unigrams=list(ngrams(token,1))
    bigrams=list(ngrams(token,2))
    trigrams=list(ngrams(token,3))
    quadgrams=list(ngrams(token,4))
    
    #Preprocessing the training set
    preprocess(tokenList)
    od=Counter()
    od=unigramize(unigrams)
    od2=Counter()
    od2=bigramize(bigrams)
    od3=Counter()
    od3=trigramize(trigrams)
    od4=Counter()
    od4=quadgramize(quadgrams)
    
    #Taking our input string
    sent=input("Enter your test string: ")
    list2=sent.split()
    sent_tri=list(ngrams(list2,3))
    x=len(sent_tri)
    test_tri=sent_tri[x-1]
    
    #Normal prediction of the most probable word
    time1=time.time()
    quad_prob_table=Counter(od4)
    quad_prob_table=quad_prob(od3,od4)
    quad_pred_table=defaultdict(dict)
    quad_pred_table=pred_table(quad_prob_table)
    word=quad_pred_table[test_tri]
    word=sorted(word.items(), key=lambda x: x[1], reverse=True)
    print ("The next word could be: ")    
    print (word[0][0])
    print ("Time taken for prediction: ", time.time()-time1)
    
    #Prediction of the word after interpolation using weights
    time1=time.time()
    quad_pole_table=defaultdict(dict)
    quad_pole_table=interpolation_table(od,od2,od3,od4)
    word=quad_pole_table[test_tri]
    word=sorted(word, key=lambda x: x[1], reverse=True)
    print ("After interpolation, the most probable word could be: ")
    print (word[0][0])
    print ("Time taken for prediction: ", time.time()-time1)
    
    
    with open('Data/LanguageModels/testing_corpus.txt','r',encoding='latin1') as f:
            contents=f.read()
            tokens=contents.split()
            tokenList2=list(tokens)
            
    #creating the held-out set for interpolation
    partitionFile('Data/LanguageModels/testing_corpus.txt',len(tokens))
    
    
    with open('Data/LanguageModels/heldset_corpus.txt','r',encoding='latin1') as fheld:
            hcontents=fheld.read()
            htokens=contents.split()
            htokenList=list(htokens)
     
    #Preprocessing the test set and held-out set
    preprocess(tokens)
    preprocess(htokens)
    bi=list(ngrams(tokenList2,2))
    tri=list(ngrams(tokenList2,3))
    quad=list(ngrams(tokenList2,4))
    hbi=list(ngrams(htokenList,2))
    htri=list(ngrams(htokenList,3))
    hquad=list(ngrams(htokenList,4))
    bigramSet2=set(bi)
    trigramSet2=set(tri)
    quadgramSet2=set(quad)
    hod=Counter()
    hod=unigramize(htokens)
    hod2=Counter()
    hod2=bigramize(hbi)
    hod3=Counter()
    hod3=trigramize(htri)
    hod4=Counter()
    hod4=quadgramize(hquad)
   
    
    #Prediction of the word after interpolation using customised weights
    #time1=time.time()
    #quad_cpole_table=defaultdict(dict)
    #l=[]
    #quad_cpole_table=cus_interpolation_table(hod,hod2,hod3,hod4,l)
    #word=quad_cpole_table[test_tri]
    #word=sorted(word, key=lambda x: x[1], reverse=True)
    #print ("After interpolation, the most probable word could be: ")
    #print (quad_cpole_table.items())
    #print (word[0][0])
    #print (l)
    #print ("Time taken for prediction: ", time.time()-time1)
    
    
    #Add K Smoothing
    smooth_tri=OrderedDict()
    smooth_tri=OrderedDict.fromkeys(trigramSet2,0)
    smooth_quad=OrderedDict()
    smooth_quad=OrderedDict.fromkeys(quadgramSet2,0)
    k=int(input("We will perform Add k smoothing now. Enter value of k: "))
    time1=time.time()
    trismoothk(tokens,tri,trigramSet2,smooth_tri,k)
    quadsmoothk(tokens,quad,quadgramSet2,smooth_quad,k)
    print ("Time taken for Add k Smoothing: ", time.time()-time1)
    
    #Good Turing Smoothing
    od=unigramize(tokens) 
    wprob=defaultdict(dict)
    time1=time.time()
    wprob=goodturing(quadgramSet2,od4,od)
    word=wprob[test_tri]
    word=sorted(word, key=lambda x: x[1], reverse=True)
    print ("After Good Turing Smoothing, the most probable word could be: ")
    print (word[0][0])
    print ("Time taken for Good Turing Smoothing: ", time.time()-time1)
    
    #Kneser Ney Smoothing
    wprob=defaultdict(dict)
    time1=time.time()
    wprob=kney(od3,od4)
    word=wprob[test_tri]
    word=sorted(word, key=lambda x: x[1], reverse=True)
    print ("After Kneser Ney Smoothing, the most probable word could be: ")
    print (word[0][0])
    print ("Time taken for Kneser Ney Smoothing: ", time.time()-time1)
    
    #Calculating Score of the language model
    time1=time.time()
    print ("Score of the language model is: ")
    print (scoreCalc(quad,tri,tokenList2))
    print ("Time taken for calculating Score: ", time.time()-time1)
    
    #Perplexity
    time1=time.time()
    quadperp(tokenList2,smooth_quad,smooth_tri,quadgramSet2)
    print ("Time taken to compute perplexity: ", time.time()-time1)
    
    #Backoff
    time1=time.time()
    #backoff(tokens,quad,od4,od3,od2,od)
    #print ("Time taken to compute Backoff: ", time.time()-time1)
    
    
    
    

In [46]:
if __name__ == "__main__":
    main()

Time taken for training the model and parameter tuning:  189.94470596313477
Enter your test string: I could not
The next word could be: 
have
Time taken for prediction:  0.40525007247924805
After interpolation, the most probable word could be: 
be
Time taken for prediction:  1.124948263168335
We will perform Add k smoothing now. Enter value of k: 4
Time taken for Add k Smoothing:  33.70561981201172
After Good Turing Smoothing, the most probable word could be: 
hope
Time taken for Good Turing Smoothing:  0.11780452728271484
After Kneser Ney Smoothing, the most probable word could be: 
walk
Time taken for Kneser Ney Smoothing:  1.0115365982055664
Score of the language model is: 
549
Time taken for calculating Score:  11.336305141448975
Quadgram Perplexity = 1.052211
Time taken to compute perplexity:  0.018884897232055664
