# Neural Approach

In [0]:
from nltk.tokenize import sent_tokenize
import string
from keras.preprocessing.text import Tokenizer
import numpy as np
import pandas as pd
from nltk.tokenize import word_tokenize
import random
import re
import math
import nltk

from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

In [0]:
fp = open("/content/drive/My Drive/Theory_of_Computation/speeches.txt", 'r', encoding="utf-8-sig", errors='ignore')
data = fp.read()

data = data.replace("\n", "")
data = data.replace("...", ". ")
data = re.sub(r"SPEECH [0-9]","", data)
data = re.sub(r"[:;]",".",data)

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [0]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [0]:
sent_tokenize_list = sent_tokenize(data)

In [0]:
lines = list()
for i in range(len(sent_tokenize_list)):
  if(len(sent_tokenize_list[i])>1):
    lines.append("<s> " + sent_tokenize_list[i][:-1] + " </s>")

In [0]:
random.shuffle(lines)
cut = int(0.8*len(lines))
train = np.array(lines[:cut])
test = np.array(lines[cut:])

In [0]:
train

array(['<s> Oh boy </s>',
       '<s> Somebody else would come, they’d have like 50 people and they wouldn’t need this room </s>',
       '<s> Where’s Sharon </s>', ...,
       '<s> He borrowed a million dollars at an interest rate that everybody in this room would be proud to have – a very low interest rate </s>',
       '<s> We love you </s>', '<s> But beautiful, beautiful bikes </s>'],
      dtype='<U1761')

In [0]:
def clean_doc(doc):
  table = str.maketrans('', '', string.punctuation)
  text = doc.translate(table)
  tokens = word_tokenize(text)
  tokens = [word for word in tokens if word.isalpha()]
  tokens = [word.lower() for word in tokens]
  return tokens

In [0]:
sequences = list()
seq_length = 2

for i in range(len(train)):
#   tokens = ["<start>"]
  tokens = (clean_doc(train[i]))
#   tokens.append("<end>")
  if(len(tokens)>seq_length):
    for k in range(0,len(tokens)-seq_length):
      sequences.append(tokens[k:k+seq_length+1])
#   titles[i] = ' '.join(clean_doc(titles[i]))

In [0]:
sequences[:10]

[['s', 'oh', 'boy'],
 ['oh', 'boy', 's'],
 ['s', 'somebody', 'else'],
 ['somebody', 'else', 'would'],
 ['else', 'would', 'come'],
 ['would', 'come', 'they'],
 ['come', 'they', 'd'],
 ['they', 'd', 'have'],
 ['d', 'have', 'like'],
 ['have', 'like', 'people']]

In [0]:
# integer encode sequences of words
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sequences)
sequences_intEncoded = tokenizer.texts_to_sequences(sequences)

In [0]:
# vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print(vocab_size)

7494


In [0]:
sequences_intEncoded[:5]

[[1, 178, 864], [178, 864, 1], [1, 193, 269], [193, 269, 90], [269, 90, 112]]

In [0]:
sequences_intEncoded = np.array(sequences_intEncoded)
np.shape(sequences)

(134740, 3)

In [0]:
X = sequences_intEncoded[:,:-1]
y = sequences_intEncoded[:,-1]

In [0]:
X[:5]

array([[  1, 178],
       [178, 864],
       [  1, 193],
       [193, 269],
       [269,  90]])

In [0]:
y = to_categorical(y, num_classes=vocab_size)

In [0]:
###### VANILLA RECURRENT NEURAL NETWORK MODEL ######
from keras.layers import SimpleRNN

model1 = Sequential()
model1.add(Embedding(vocab_size, 50, input_length=seq_length))
model1.add(SimpleRNN(25, return_sequences=True))
model1.add(SimpleRNN(25))
model1.add(Dense(50, activation='relu'))
model1.add(Dense(vocab_size, activation='softmax'))

model1.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 2, 50)             374700    
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 2, 25)             1900      
_________________________________________________________________
simple_rnn_4 (SimpleRNN)     (None, 25)                1275      
_________________________________________________________________
dense_5 (Dense)              (None, 50)                1300      
_________________________________________________________________
dense_6 (Dense)              (None, 7494)              382194    
Total params: 761,369
Trainable params: 761,369
Non-trainable params: 0
_________________________________________________________________


In [0]:
###### LONG SHORT TERM MEMORY NEURAL NETWORK MODEL ######
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=seq_length))
model.add(LSTM(25, return_sequences=True))
model.add(LSTM(25))
model.add(Dense(50, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 2, 50)             375950    
_________________________________________________________________
lstm_1 (LSTM)                (None, 2, 25)             7600      
_________________________________________________________________
lstm_2 (LSTM)                (None, 25)                5100      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                1300      
_________________________________________________________________
dense_4 (Dense)              (None, 7519)              383469    
Total params: 773,419
Trainable params: 773,419
Non-trainable params: 0
_________________________________________________________________


In [0]:
###### TRAINING LONG SHORT TERM MEMORY NEURAL NETWORK MODEL ######

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, batch_size=128, epochs=40)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fdc83bdeb00>

In [0]:
###### TRAINING VANILLA RECURRENT NEURAL NETWORK MODEL ######

model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model1.fit(X, y, batch_size=128, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fdc82486390>

**Random text of 5 sentences**

In [127]:
###### LSTM OUTPUT WORDS ######

total_score = 0
total_count = 0
generated_test_lstm = []
count = 0

for i in range(len(test)):
  result = list()
  text = clean_doc(test[i])
  
  if(len(text)>2):
    result = text[:seq_length]
    r = -1*seq_length
    for _ in range(2,len(text)):
      encoded = tokenizer.texts_to_sequences([' '.join(result[r:])])[0]
      if(len(encoded)<seq_length):
        for j in range(len(encoded),seq_length):
          encoded.append(0)
      encoded = np.array([encoded])
      yhat = model.predict_classes(encoded, verbose=0)
      out_word = ''
      for word, index in tokenizer.word_index.items():
        if index == yhat:
          out_word = word
          break
      result.append(out_word)
    
    generated_test_lstm.append(result)
    #print(test[i])

    if(count < 5): ### to print 5 sentences only
      print(result)
    count+=1

['s', 'so', 'going', 'to', 'he', 'a', 'on', 'china', 's']
['s', 'thats', 'about', 'some']
['s', 'thats', 'about', 'some', 'be', 's', 'a', 'now', 'of', 'be']
['s', 'ive', 'is', 'what', 's', 'going', 'to', 'he', 'a', 'on', 'china', 's', 'a']
['s', 'all', 'going', 'to']


In [128]:
##### VANILLA RNN OUTPUT WORDS #######

total_score = 0
total_count = 0
generated_test_rnn = []
count = 0

for i in range(len(test)):
  result = list()
  text = clean_doc(test[i])
  
  if(len(text)>2):
    result = text[:seq_length]
    r = -1*seq_length
    for _ in range(2,len(text)):
      encoded = tokenizer.texts_to_sequences([' '.join(result[r:])])[0]
      if(len(encoded)<seq_length):
        for j in range(len(encoded),seq_length):
          encoded.append(0)
      encoded = np.array([encoded])
      yhat = model1.predict_classes(encoded, verbose=0)
      out_word = ''
      for word, index in tokenizer.word_index.items():
        if index == yhat:
          out_word = word
          break
      result.append(out_word)
    
    generated_test_rnn.append(result)
    #print(test[i])

    if(count < 5): ### to print 5 sentences only
      print(result)
    count+=1

['s', 'so', 'i', 'think', 'it', 's', 'a', 'great', 'company']
['s', 'thats', 'the', 'way']
['s', 'thats', 'the', 'way', 's', 'historyso', 'many', 'people', 's', 'like']
['s', 'ive', 'admired', 'the', 'work', 's', 'been', 'a', 'great', 'company', 's', 'a', 'great']
['s', 'all', 'of', 'the']


**Calculation of Perplexity**

In [0]:
file1 = open("/content/drive/My Drive/Theory_of_Computation/speeches.txt", 'r', encoding="utf-8-sig", errors='ignore')
train_1 = file1.read()
train_1 = trainstr.lower()

In [0]:
t1 = sent_tokenize(train_1)
for i in range (len(t1)):
  t1[i] = re.sub(r'[^A-Za-z\s\']+', "", t1[i])
  t1[i] = '<s> '+t1[i]+' </s>'
train = t1[:13000] # 80% of dataset is train
test = t1[13000:] # 20% of dataset is test
print(len(t1))

16401


In [0]:
from collections import Counter
fdist1 = {} #bigram
for i in range (len(train)):
  tokens = train[i].split()
  bigrams = nltk.bigrams(tokens)
  fdist = dict(nltk.FreqDist(bigrams))
  fdist1 = dict(Counter(fdist)+Counter(fdist1))

In [0]:
trainstr = re.sub(r'[^A-Za-z\s\']+', "", trainstr)
ls = trainstr.split()
d = {}
for i in range (len(ls)):
  if ls[i] not in d:
    d[ls[i]] = 1
  else:
    d[ls[i]] += 1
Vocab = len(d)
Token = sum(d.values())

In [0]:
def MLEbigram(w1,w2):
  if (w1+" "+w2) not in fdist1:
    if w1 in d.keys():
      return 1/(d[w1]+Vocab) # Add 1 Smoothing
    else:
      return 1/(Vocab) # Add 1 Smoothing
  else:
    if w1 in d.keys():
      return (fdist1(w1+" "+w2)+1)/(d[w1]+Vocab) # Add 1 Smoothing
    else:
      return (fdist1(w1+" "+w2)+1)/(Vocab) # Add 1 Smoothing

In [0]:
def bigramsentenceprob(sentence):
  bigram_sentence_probability_log_sum = 0
  previous_word = None
  for word in sentence:
    if previous_word!=None:
      x = MLEbigram(previous_word,word)
      bigram_sentence_probability_log_sum += math.log(x,2)
    previous_word = word
  return math.pow(2, bigram_sentence_probability_log_sum)

In [0]:
def calculate_number_of_bigrams(sentences):
  bigram_count = 0
  for sentence in sentences:
    bigram_count += len(sentence) - 1
  return bigram_count

In [0]:
def calculate_bigram_perplexity(sentences):
  bigram_count = calculate_number_of_bigrams(sentences)
  sentence_probability_log_sum = 0
  for sentence in sentences:
    try:
      sentence_probability_log_sum -= math.log(bigramsentenceprob(sentence), 2)
    except:
      sentence_probability_log_sum -= 0
  return math.pow(2, sentence_probability_log_sum / bigram_count)

In [129]:
print("Perplexity of test corpus with respect to bigram model is:",end = " ")
print(calculate_bigram_perplexity(test))

print("Perplexity of test corpus generated by LSTM architecture with respect to bigram model is:",end = " ")
print(calculate_bigram_perplexity(generated_test_lstm))

Perplexity of test corpus with respect to bigram model is: 147.40810983034604
Perplexity of test corpus generated by LSTM architecture with respect to bigram model is: 7622.6088372200265


In [130]:
print("Perplexity of test corpus with respect to bigram model is:",end = " ")
print(calculate_bigram_perplexity(test))

print("Perplexity of test corpus generated by Baseline RNN architecture with respect to bigram model is:",end = " ")
print(calculate_bigram_perplexity(generated_test_rnn))

Perplexity of test corpus with respect to bigram model is: 147.40810983034604
Perplexity of test corpus generated by Baseline RNN architecture with respect to bigram model is: 7361.914927290586


# Classical Approach

In [0]:
from nltk.tokenize import sent_tokenize, word_tokenize 
from nltk.util import ngrams
from collections import Counter
import re
import numpy as np
import random
import math
import operator
import string

In [0]:
file2 = open("/content/drive/My Drive/Theory_of_Computation/speeches.txt", 'r', encoding="utf-8-sig", errors='ignore')
trainstr = file2.read()
trainstr = trainstr.lower()

In [0]:
tnew = sent_tokenize(trainstr)
for i in range (len(tnew)):
  tnew[i] = re.sub(r'[^A-Za-z\s\']+', "", tnew[i])
  tnew[i] = '<s> '+tnew[i]+' </s>'
train = tnew[:13000] # 80% of dataset is train
test = tnew[13000:] # 20% of dataset is test
print(len(tnew))

16401


In [0]:
fdist1 = {} #bigram
for i in range (len(train)):
  tokens = train[i].split()
  bigrams = nltk.bigrams(tokens)
  fdist = dict(nltk.FreqDist(bigrams))
  fdist1 = dict(Counter(fdist)+Counter(fdist1))

In [0]:
fdist2 = {} #trigram
for i in range (len(train)):
  tokens = train[i].split()
  trigrams = nltk.trigrams(tokens)
  fdist = dict(nltk.FreqDist(trigrams))
  fdist2 = dict(Counter(fdist)+Counter(fdist2))

In [0]:
from nltk import everygrams
fdist3 = {} #quadgram
for i in range (len(train)):
  tokens = train[i].split()
  fourgrams = list(everygrams(tokens,4,4)) 
  fdist = dict(nltk.FreqDist(fourgrams))
  fdist3 = dict(Counter(fdist)+Counter(fdist3))

In [0]:
trainstr = re.sub(r'[^A-Za-z\s\']+', "", trainstr)
ls = trainstr.split()
d = {}
for i in range (len(ls)):
  if ls[i] not in d:
    d[ls[i]] = 1
  else:
    d[ls[i]] += 1
Vocab = len(d)
Token = sum(d.values())

In [0]:
def MLEunigram(w1):
  if w1 not in d:
    return 1/(Vocab+Token) # Add 1 Smoothing
  else:
    return (d[w1]+1)/(Vocab+Token) # Add 1 Smoothing

In [0]:
def MLEbigram(w1,w2):
  if (w1+" "+w2) not in fdist1:
    if w1 in d.keys():
      return 1/(d[w1]+Vocab) # Add 1 Smoothing
    else:
      return 1/(Vocab) # Add 1 Smoothing
  else:
    if w1 in d.keys():
      return (fdist1(w1+" "+w2)+1)/(d[w1]+Vocab) # Add 1 Smoothing
    else:
      return (fdist1(w1+" "+w2)+1)/(Vocab) # Add 1 Smoothing

In [0]:
def MLEtrigram(w1,w2,w3):
  if (w1+" "+w2+" "+w3) not in fdist2:
    if w1+" "+w2 in fdist1.keys():
      return 1/(fdist1[w1+" "+w2]+Vocab) # Add 1 Smoothing
    else:
      return 1/(Vocab) # Add 1 Smoothing
  else:
    if w1+" "+w2 in fdist1.keys():
      return (fdist2(w1+" "+w2+" "+w3)+1)/(fdist1[w1+" "+w2]+Vocab) # Add 1 Smoothing
    else:
      return (fdist2(w1+" "+w2+" "+w3)+1)/(Vocab) # Add 1 Smoothing

In [0]:
def MLEquadgram(w1,w2,w3,w4):
  if (w1+" "+w2+" "+w3+" "+w4) not in fdist3:
    if w1+" "+w2 + " "+w3 in fdist2.keys():
      return 1/(fdist2[w1+" "+w2+" "+w3]+Vocab) # Add 1 Smoothing
    else:
      return 1/(Vocab) # Add 1 Smoothing
  else:
    if w1+" "+w2 + " "+w3 in fdlist2.keys():
      return (fdist3(w1+" "+w2+" "+w3+" "+w4)+1)/(fdist2[w1+" "+w2+" "+w3]+Vocab) # Add 1 Smoothing
    else:
      return (fdist3(w1+" "+w2+" "+w3+" "+w4)+1)/(Vocab) # Add 1 Smoothing

In [0]:
def unigramsentenceprob(sentence):
  sentence_probability_log_sum = 0
  for word in sentence:
    x = MLEunigram(word)
    sentence_probability_log_sum += math.log(x,2)
  return math.pow(2, sentence_probability_log_sum)

In [0]:
def bigramsentenceprob(sentence):
  bigram_sentence_probability_log_sum = 0
  previous_word = None
  for word in sentence:
    if previous_word!=None:
      x = MLEbigram(previous_word,word)
      bigram_sentence_probability_log_sum += math.log(x,2)
    previous_word = word
  return math.pow(2, bigram_sentence_probability_log_sum)

In [0]:
def trigramsentenceprob(sentence):
  trigram_sentence_probability_log_sum = 0
  previous_word = None
  previous_previous_word = None
  for word in sentence:
    if previous_word!=None and previous_previous_word!=None:
      x = MLEtrigram(previous_previous_word,previous_word,word)
      trigram_sentence_probability_log_sum += math.log(x,2)
    previous_previous_word = previous_word
    previous_word = word
  return math.pow(2, trigram_sentence_probability_log_sum)

In [0]:
def quadgramsentenceprob(sentence):
  quadgram_sentence_probability_log_sum = 0
  previous_word = None
  previous_previous_word = None
  previous_previous_previous_word = None
  for word in sentence:
    if previous_word!=None and previous_previous_word!=None and previous_previous_previous_word!=None :
      x = MLEquadgram(previous_previous_previous_previous_previous_word,previous_word,word)
      quadgram_sentence_probability_log_sum += math.log(x,2)
    previous_previous_previous_word = previous_previous_word
    previous_previous_word = previous_word
    previous_word = word
  return math.pow(2, quadgram_sentence_probability_log_sum)

In [0]:
def calculate_number_of_unigrams(sentences):
  unigram_count = 0
  for sentence in sentences:
    # remove two for <s> and </s>
    unigram_count += len(sentence) - 2
  return unigram_count
def calculate_number_of_bigrams(sentences):
  bigram_count = 0
  for sentence in sentences:
    bigram_count += len(sentence) - 1
  return bigram_count
def calculate_number_of_trigrams(sentences):
  trigram_count = 0
  for sentence in sentences:
    trigram_count += len(sentence) - 2
  return trigram_count
def calculate_number_of_quadgrams(sentences):
  quadgram_count = 0
  for sentence in sentences:
    quadgram_count += len(sentence) - 3
  return quadgram_count

In [0]:
def calculate_unigram_perplexity(sentences):
  unigram_count = calculate_number_of_unigrams(sentences)
  sentence_probability_log_sum = 0
  for sentence in sentences:
    try:
      sentence_probability_log_sum -= math.log(unigramsentenceprob(sentence), 2)
    except:
      sentence_probability_log_sum -= 0
  return math.pow(2, sentence_probability_log_sum / unigram_count)

In [0]:
def calculate_bigram_perplexity(sentences):
  bigram_count = calculate_number_of_bigrams(sentences)
  sentence_probability_log_sum = 0
  for sentence in sentences:
    try:
      sentence_probability_log_sum -= math.log(bigramsentenceprob(sentence), 2)
    except:
      sentence_probability_log_sum -= 0
  return math.pow(2, sentence_probability_log_sum / bigram_count)

In [0]:
def calculate_trigram_perplexity(sentences):
  trigram_count = calculate_number_of_bigrams(sentences)
  sentence_probability_log_sum = 0
  for sentence in sentences:
    try:
      sentence_probability_log_sum -= math.log(trigramsentenceprob(sentence), 2)
    except:
      sentence_probability_log_sum -= 0
  return math.pow(2, sentence_probability_log_sum / trigram_count)

In [0]:
def calculate_quadgram_perplexity(sentences):
  quadgram_count = calculate_number_of_quadgrams(sentences)
  sentence_probability_log_sum = 0
  for sentence in sentences:
    try:
      sentence_probability_log_sum -= math.log(quadgramsentenceprob(sentence), 2)
    except:
      sentence_probability_log_sum -= 0
  return math.pow(2, sentence_probability_log_sum / quadgram_count)

**Calculation of Perplexity**

In [122]:
import math
print("Perplexity of test corpus with respect to unigram model is:",end = " ")
print(calculate_unigram_perplexity(test))
print("Perplexity of test corpus with respect to bigram model is:",end = " ")
print(calculate_bigram_perplexity(test))
print("Perplexity of test corpus with respect to trigram model is:",end = " ")
print(calculate_trigram_perplexity(test))
print("Perplexity of test corpus with respect to quadgram model is:",end = " ")
print(calculate_quadgram_perplexity(test))


Perplexity of test corpus with respect to unigram model is: 186.64101133266124
Perplexity of test corpus with respect to bigram model is: 147.40810983034604
Perplexity of test corpus with respect to trigram model is: 135.92047675682477
Perplexity of test corpus with respect to quadgram model is: 1.0


In [0]:
#Classical generator to generate new words using a n_gram model
def MLE_Generator(n_gram, initial_sequence):
  sentence = ["<s>"]
  if(n_gram==1):
    for i in range(20):
      max_prob = 0
      max_prob_list = list()
      for j in d.keys():
        k = (MLEunigram(j))
        if(k>max_prob):
          max_prob = k
          max_prob_list = [j]
        elif(k==max_prob):
          max_prob_list.append(j)
      samples = np.random.multinomial(20,[max_prob]*len(max_prob_list),size=1)
      index, value = max(enumerate(samples), key = operator.itemgetter(1))
      sentence.append(max_prob_list[index])
  else:
    sentence.extend(initial_sequence)
    i = len(initial_sequence)
    while(sentence[-1]!="</s>" and i<20):
      max_prob = 0
      max_prob_list = list()
      for j in d.keys():
        word_list = sentence[-n_gram+1:]
        word_list.append(j)
        if(n_gram==2):
          k = MLEbigram(word_list[0],word_list[1])
        elif(n_gram==3):
          k = MLEtrigram(word_list[0],word_list[1],word_list[2])
        elif(n_gram==4):
          k = MLEquadgram(word_list[0],word_list[1],word_list[2],word_list[3])
        if(k>max_prob):
          max_prob = k
          max_prob_list = [j]
        elif(k==max_prob):
          max_prob_list.append(j)
      samples = np.random.multinomial(20,[max_prob]*len(max_prob_list),size=1)
      index, value = max(enumerate(samples), key = operator.itemgetter(1))
      sentence.append(max_prob_list[index])
      i+=1
    
  return sentence

In [125]:
print(" ".join(MLE_Generator(3,["it", "should", "not", "be"])))


<s> it should not be speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech


**Random text generation of 5 sentences**

In [126]:
# GENERATING RANDOM TEXT:
# UNIGRAMS
print("UNIGRAM: ", end = " ")
print(" ".join(MLE_Generator(1,[])))
# BIGRAM
print("BIGRAM: ", end = " ")
print(" ".join(MLE_Generator(2,["it", "is"])))
# TRIGRAM
print("TRIGRAM: ", end = " ")
print(" ".join(MLE_Generator(3,["there", "is", "something"])))
# QUADGRAM
print("QUADGRAM1: ", end = " ")
print(" ".join(MLE_Generator(4,["but", "there", "is", "no"])))
# ANOTHER QuADGRAM
print("QUADGRAM2: ", end = " ")
print(" ".join(MLE_Generator(4,["it", "is", "to", "be"])))

UNIGRAM:  <s> the the the the the the the the the the the the the the the the the the the the
BIGRAM:  <s> it is speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech
TRIGRAM:  <s> there is something speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech
QUADGRAM1:  <s> but there is no speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech
QUADGRAM2:  <s> it is to be speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech speech
