# Topic Modelling

Creating a system which, given turns of conversation that has happened and a new turn, can return the most important topic(s) the conversation is revolving around

Note: This will be fed into the knowledge retreival phases to provide the core context in addition to the latest turn of conversation

## Sample Data Definition

In [None]:
docs = ["Hey! How are you doing?", "This COVID thing has been crazy hasn't it", "I heard the vaccines aren't all that effective",
       "I heard Pfizer had something to do with the vaccines", "and what about Moderna? I'm pretty sure they were involved too",
       "I'm not sure social distancing is useful in stopping the spread"]
docs.extend(docs)
docs.extend(docs)
docs.extend(docs)

# from sklearn.datasets import fetch_20newsgroups
# docs = fetch_20newsgroups(subset='all',  remove=('headers', 'footers', 'quotes'))['data']

### Imports

In [None]:
from bertopic import BERTopic
import numpy as np

In [None]:
topic_model = BERTopic(language="english", calculate_probabilities=True, verbose=True, embedding_model="../../models/bert-base-cased-squad2")

In [None]:
topics, probs = topic_model.fit_transform(docs)

In [None]:
freq = topic_model.get_topic_info(); freq.head(5)

In [None]:
topic_model.get_topic(0)  # Select the most frequent topic

## Using genism

### Imports

In [None]:
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

### Pre-process text

In [None]:
stop = set(stopwords.words('english'))
exclude = set(string.punctuation)
lemmatizer = WordNetLemmatizer()

def clean(doc):
    stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    normalized = " ".join(lemmatizer.lemmatize(word) for word in punc_free.split())
    return normalized

clean_corpus = [clean(doc).split() for doc in docs]
print(clean_corpus)

In [None]:
from gensim import corpora
dictionary = corpora.Dictionary(text_data)corpus = [dictionary.doc2bow(text) for text in ]

## Manually

This is where the approach used in MUKALMA model is built

In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import time
import numpy as np
from nltk.tokenize import word_tokenize, sent_tokenize
import nltk

In [None]:
model = SentenceTransformer('../../models/all-MiniLM-L6-v2', device='cpu')

In [None]:
def calc_sentence_similarity(msg, candidates):
    msg_embedding = model.encode([msg])
    candidate_embeddings = model.encode(candidates)
    distances = cosine_similarity(msg_embedding, candidate_embeddings).flatten()
    return distances

def has_topic_changed(msg, prev_msg, control_msg, error_threshold=0.02):
    distances = calc_sentence_similarity(msg, [prev_msg, control_msg])
    print(f"topic_change_dist: {distances}")
    return distances[1] - distances[0] > -error_threshold

In [None]:
def tag_sentence(message):
    tokenized = sent_tokenize(message)
    nouns = []
    for sentence in tokenized:
        wordsList = word_tokenize(sentence)
        #print(wordsList)
        # wordsList = [w for w in wordsList if not w in stop_words]
        tagged = nltk.pos_tag(wordsList)
        nouns.extend([tag[0] for tag in tagged if tag[1][:2] in ['NN', 'CD'] and tag[0].lower() not in ['hi', 'hey']])
    return nouns

### Experimenting with knowledge transition

In [None]:
prev_m_k = []
m_k = []
cur_k = []
topic_k = []
while True:
    m = input('User: ')
    if m == 'exit':
        break
    m_k = tag_sentence(m)
    prev_m_k = m_k
    

### Clustering cosine similarity

In [None]:
from scipy.cluster.vq import kmeans

def list_sorted_args(l, reverse=False):
    return sorted(range(len(l)), key=l.__getitem__, reverse=reverse)

def find_highest_similarity_scores(scores, n=3):
    s_idxs = list_sorted_args(scores)
    s = [scores[i] for i in s_idxs]
    s_len = len(s)
    s_range = range(s_len)
    
    kclust = kmeans(np.matrix([s_range, s]).transpose(), n)
    assigned_clusters = [abs(kclust[0][:, 0] - e).argmin() for e in s_range]
    print(assigned_clusters)
    
    highest_cluster = assigned_clusters[-1]
    highest_idxs = []
    for i in range(s_len-1, -1, -1):
        if assigned_clusters[i] != highest_cluster:
            return highest_idxs
        highest_idxs.append(s_idxs[i])
    return highest_idxs

#t = [0.12140948, 0.426371, 0.11862079, 0.44534147, 0.17006755, 0.55, 0.00, 0.00, 0.00, 0.00]
t = [0, 0.2, 0.5, 1]
[t[i] for i in find_highest_similarity_scores(t)]

In [None]:
topics = []
keywords = []
c_keywords = []
t_keywords = []
prev_msg = control_msg = "Hey! How are you doing?"
prev_keywords = []

# Keeps track of whether the topic changed in the previous turn
topic_changed = False
cur_topic_desc_keywords = []
while True:
    msg = input('\nUser: ')
    t1 = time.time()
    if msg == 'exit':
        break
    c_keywords = tag_sentence(msg)
    
    if has_topic_changed(msg, prev_msg, control_msg):
        keywords = c_keywords
        print(f"topic has changed to {keywords}")
        topic_changed = True
    else:
        if topic_changed:
            topic_changed = False

            # Compare the current sentence against the keywords in the previous turn and keep the most relevant ones
            # until the topic changes
            if len(keywords) > 3:
                scores = calc_sentence_similarity(msg, keywords)
                cur_topic_desc_keywords = [keywords[i] for i in find_highest_similarity_scores(scores)]
            else:
                cur_topic_desc_keywords = keywords
                keywords = []
            t_keywords = list(set(c_keywords) | set(cur_topic_desc_keywords) | set(keywords))
        else:
            # It's been >2 turns since the topic changed
            t_keywords = list(set(c_keywords) | set(cur_topic_desc_keywords) | set(keywords))
            if len(t_keywords) > 0:
                s_scores = calc_sentence_similarity(msg, t_keywords)
                print(f"scores: {s_scores}")
                s_idxs = list_sorted_args(s_scores, reverse=True)
                print(f"s_idxs: {s_idxs}")
                t_keywords = [t_keywords[i] for i in s_idxs]
        
        if keywords is None:
            keywords = []
        print(f"topic has not changed, keywords: {t_keywords}")
        keywords = c_keywords
            
    print(f"time elapsed: {time.time() - t1}")
    prev_msg = msg

### backup

In [None]:
topics = []
keywords = []
c_keywords = []
prev_msg = control_msg = "Hey! How are you doing?"
prev_keywords = []

# Keeps track of whether the topic changed in the previous turn
topic_changed = False
cur_topic_desc_keywords = []
while True:
    msg = input('User: ')
    t1 = time.time()
    if msg == 'exit':
        break
    c_keywords = tag_sentence(msg)
    
    if has_topic_changed(msg, prev_msg, control_msg):
        keywords = c_keywords
        print(f"topic has changed to {keywords}")
        topic_changed = True
    else:
        if topic_changed:
            topic_changed = False

            # Compare the current sentence against the keywords in the previous turn and keep the most relevant ones
            # until the topic changes
            if len(keywords) > 3:
                scores = calc_sentence_similarity(msg, keywords)
                cur_topic_desc_keywords = [keywords[i] for i in find_highest_similarity_scores(scores)]
            else:
                cur_topic_desc_keywords = keywords
                keywords = None
        #print("um", keywords)
        if keywords is None:
            keywords = []
        print(f"topic has not changed, keywords: {c_keywords + cur_topic_desc_keywords + keywords}")
        keywords = c_keywords
            
    print(f"time elapsed: {time.time() - t1}")
    prev_msg = msg

## Nabeel's Section

#### Imports

In [1]:
import nltk
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize, sent_tokenize
from scipy.cluster.vq import kmeans

# For Utilities
import math
import time
import numpy as np

# For NER
import spacy
import en_core_web_sm
from spacy import displacy
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('../../models/all-MiniLM-L6-v2', device='cpu')

#### Testing NER

In [3]:
# Creating the NER NLP model
nlp = en_core_web_sm.load()

In [4]:
doc = nlp("I recently travelled to Paris")
print ([(X.text, X.label_) for X in doc.ents])

[('Paris', 'GPE')]


#### Functions

In [10]:
from scipy.cluster.vq import kmeans

def list_sorted_args(l, reverse=False):
    return sorted(range(len(l)), key=l.__getitem__, reverse=reverse)

def find_highest_similarity_scores(scores, n=3):
    s_idxs = list_sorted_args(scores)
    s = [scores[i] for i in s_idxs]
    s_len = len(s)
    s_range = range(s_len)
    
    kclust = kmeans(np.matrix([s_range, s]).transpose(), n)
    assigned_clusters = [abs(kclust[0][:, 0] - e).argmin() for e in s_range]
    print(assigned_clusters)
    
    highest_cluster = assigned_clusters[-1]
    highest_idxs = []
    for i in range(s_len-1, -1, -1):
        if assigned_clusters[i] != highest_cluster:
            return highest_idxs
        highest_idxs.append(s_idxs[i])
    return highest_idxs

#t = [0.12140948, 0.426371, 0.11862079, 0.44534147, 0.17006755, 0.55, 0.00, 0.00, 0.00, 0.00]
t = [0, 0.2, 0.5, 1]
[t[i] for i in find_highest_similarity_scores(t)]

[2, 2, 0, 1]


[1]

In [37]:
def calc_sentence_similarity(msg, candidates):
    msg_embedding = model.encode([msg])
    candidate_embeddings = model.encode(candidates)
    distances = cosine_similarity(msg_embedding, candidate_embeddings).flatten()
    return distances

# Making it tougher to change, send in a negative error_threshold
def has_topic_changed(msg, prev_msg, control_msg, error_threshold=0.02):
    distances = calc_sentence_similarity(msg, [prev_msg, control_msg])
    p_dif = (distances[1] - distances[0]) / (distances[0] + distances[1])
    
    print(f"topic_change_dist: {distances}\tdistance: {p_dif}")

    return p_dif > -error_threshold

def topic_change_score(msg, prev_msg, prev_topic_msg, control_msg, alpha=0.375, beta=42.825):
    prev_message_distances = calc_sentence_similarity(msg, [prev_msg, control_msg])
    prev_topic_distance = calc_sentence_similarity(msg, [prev_topic_msg, control_msg])
    
    message_score = prev_message_distances[1] - prev_message_distances[0]
    topic_score = prev_topic_distance[1] - prev_topic_distance[0]
    
    score = (alpha * message_score) - (beta * topic_score)
    print (f"score = {score}")
    
    sigmoid_score = (1.0 / (1.0 + math.exp(score)))
    print (f"sigmoid_score = {sigmoid_score}")
    
    return abs(1 - sigmoid_score) <= 0.1

def tag_sentence(message):
    tokenized = sent_tokenize(message)
    nouns = []
    for sentence in tokenized:
        wordsList = word_tokenize(sentence)
        tagged = nltk.pos_tag(wordsList)
        nouns.extend([tag[0] for tag in tagged if tag[1][:2] in ['NN', 'CD'] and tag[0].lower() not in ['hi', 'hey']])
    return nouns

def get_keywords(message):
    doc = nlp(message)
    tokenized = sent_tokenize(message)
    ner_tokens = []
    
    for X in doc.ents:
        ner_tokens.append(X.text)
        
    keywords = ner_tokens
    
    for sentence in tokenized:
        wordsList = word_tokenize(sentence)
        noNER = list(filter(lambda x: x not in ner_tokens, wordsList))
        tagged = nltk.pos_tag(noNER)
        
        keywords.extend([tag[0] for tag in tagged if tag[1][:2] in ['NN', 'CD'] and tag[0].lower() not in ['hi', 'hey']])
        
    return keywords

In [38]:
score = topic_change_score("is there anything special about it?", 
                           "it's in Pakistan", 
                           "where is Karachi located?",
                          "Hey! How are you doing?")

print (score)

score = -0.00048185242339968126
sigmoid_score = 0.5001204631035191
False


#### Main function

In [39]:
topics = []
keywords = []
c_keywords = []
t_keywords = []
prev_msg = control_msg = "Hey! How are you doing?"
prev_keywords = []

# Keeps track of whether the topic changed in the previous turn
topic_changed = False
cur_topic_desc_keywords = []

# Keeping track of the sentence that changed the topic
sent_changed_topic = ""

# Main loop
while True:
    msg = input('\nUser: ')
    t1 = time.time()
    
    # Breaking
    if msg == 'exit':
        break
        
    # Tagging the sentence
    c_keywords = tag_sentence(msg)
    
    # Calculating topic changes
    topic_change_from_prev_msg = has_topic_changed(msg, prev_msg, control_msg)
    topic_change_from_prev_topic = has_topic_changed(msg, sent_changed_topic, control_msg, error_threshold=0.05)
    
    if topic_change_from_prev_msg:
        print ("topic changed from previous message")
        
    if topic_change_from_prev_topic:
        print ("topic changed from previous topic message")
        
    # If topic changes
    if topic_change_from_prev_msg or topic_change_from_prev_topic:
#     if topic_change_score(msg, prev_msg, sent_changed_topic, control_msg):
        keywords = c_keywords
        print(f"topic has changed to {keywords}")
        topic_changed = True
        sent_changed_topic = msg
        
    else:
        if topic_changed:
            topic_changed = False

            # Compare the current sentence against the keywords in the previous turn and keep the most relevant ones
            # until the topic changes
            if len(keywords) > 3:
                scores = calc_sentence_similarity(msg, keywords)
                cur_topic_desc_keywords = [keywords[i] for i in find_highest_similarity_scores(scores)]
            else:
                cur_topic_desc_keywords = keywords
                keywords = []
                
            t_keywords = list(set(c_keywords) | set(cur_topic_desc_keywords) | set(keywords))
        else:
            # It's been >2 turns since the topic changed
            t_keywords = list(set(c_keywords) | set(cur_topic_desc_keywords) | set(keywords))
            
            # If we have unique keywords remaining
            if len(t_keywords) > 0:
                s_scores = calc_sentence_similarity(msg, t_keywords)
                print(f"scores: {s_scores}")
                s_idxs = list_sorted_args(s_scores, reverse=True)
                print(f"s_idxs: {s_idxs}")
                t_keywords = [t_keywords[i] for i in s_idxs]
            # End if
        # End else
        
        if keywords is None:
            keywords = []
            
        print(f"topic has not changed, keywords: {t_keywords}")
        keywords = c_keywords
    
    # End if-else
            
    print(f"time elapsed: {time.time() - t1}")
    prev_msg = msg

KeyboardInterrupt: Interrupted by user

#### Keyword Pass through

In [40]:
def order_keywords_by_similarity(msg, keywords):
    s_scores = calc_sentence_similarity(msg, keywords)
    print(f"scores: {s_scores}")
    s_idxs = list_sorted_args(s_scores, reverse=True)
    print(f"s_idxs: {s_idxs}")
    t_keywords = [keywords[i] for i in s_idxs]
    return t_keywords

In [21]:
topics = []
keywords = []
c_keywords = []
t_keywords = []
prev_msg = control_msg = "Hey! How are you doing?"
prev_keywords = []

# Keeps track of whether the topic changed in the previous turn
topic_changed = False
cur_topic_desc_keywords = []

# Keeping track of the sentence that changed the topic
sent_changed_topic = ""

# Main loop
while True:
    msg = input('\nUser: ')
    t1 = time.time()
    
    t_keywords = []
    
    # Breaking
    if msg == 'exit':
        break
        
    # Comparing the current message to the previous keywords
    if len(prev_keywords) > 0:
        print ("---------------------------------------")
        ordered_keywords = order_keywords_by_similarity(msg, prev_keywords)
        s_scores = calc_sentence_similarity(msg, prev_keywords)
        
        pass_through = []
        if len(prev_keywords) >= 3:
            pass_through = [prev_keywords[i] for i in find_highest_similarity_scores(s_scores, 2 if len(prev_keywords) <= 3 else 3)]
        else:
            pass_through = prev_keywords
        
        print (f"pass_through: {pass_through}")
        print ("---------------------------------------")
        
    # Tagging the sentence
    c_keywords = tag_sentence(msg)
    
    # Calculating topic changes
    topic_change_from_prev_msg = has_topic_changed(msg, prev_msg, control_msg)
    topic_change_from_prev_topic = has_topic_changed(msg, sent_changed_topic, control_msg, error_threshold=0.05)
    
    if topic_change_from_prev_msg:
        print ("topic changed from previous message")
        
    if topic_change_from_prev_topic:
        print ("topic changed from previous topic message")
        
    # If topic changes
    if topic_change_from_prev_msg or topic_change_from_prev_topic:
        keywords = t_keywords = c_keywords
        print(f"topic has changed to {keywords}")
        topic_changed = True
        sent_changed_topic = msg
        
    else:
        if topic_changed:
            topic_changed = False

            # Compare the current sentence against the keywords in the previous turn and keep the most relevant ones
            # until the topic changes
            if len(keywords) > 3:
                scores = calc_sentence_similarity(msg, keywords)
                cur_topic_desc_keywords = [keywords[i] for i in find_highest_similarity_scores(scores)]
            else:
                cur_topic_desc_keywords = keywords
                keywords = []
                
            t_keywords = list(set(c_keywords) | set(cur_topic_desc_keywords) | set(keywords))
        else:
            # It's been >2 turns since the topic changed
            t_keywords = list(set(c_keywords) | set(cur_topic_desc_keywords) | set(keywords))
            
            # If we have unique keywords remaining
            if len(t_keywords) > 0:
                t_keywords = order_keywords_by_similarity(msg, t_keywords)
            # End if
        # End else
        
        if keywords is None:
            keywords = []
            
        print(f"topic has not changed, keywords: {t_keywords}")
        keywords = c_keywords
        
    # End if-else
    print(f"time elapsed: {time.time() - t1}")
    prev_msg = msg
    prev_keywords = t_keywords


User: what are you doing?
topic_change_dist: [0.60658103 0.60658103]	distance: 0.0
topic_change_dist: [0.22907016 0.60658103]	distance: 0.3775108754634857
topic changed from previous message
topic changed from previous topic message
topic has changed to []
time elapsed: 0.10200309753417969

User: just enjoying the weather
topic_change_dist: [0.41421357 0.4227327 ]	distance: 0.008519142866134644
topic_change_dist: [0.41421357 0.4227327 ]	distance: 0.008519142866134644
topic changed from previous message
topic changed from previous topic message
topic has changed to ['weather']
time elapsed: 0.09202218055725098

User: yeah, it's a great day to play football
---------------------------------------
scores: [0.16020462]
s_idxs: [0]
pass_through: ['weather']
---------------------------------------
topic_change_dist: [0.37693125 0.27387968]	distance: -0.1030515730381012
topic_change_dist: [0.37693125 0.27387968]	distance: -0.1030515730381012
topic has not changed, keywords: ['football', 'wea

KeyboardInterrupt: Interrupted by user

#### Main function v2

In [46]:
topics = []
keywords = []
c_keywords = []
t_keywords = []
prev_msg = control_msg = "name place animal thing?"# "Hey! How are you doing?"

# Keeping track of the sentence that changed the topic
sent_changed_topic = prev_msg
prev_keywords = []

# Keeps track of whether the topic changed in the previous turn
topic_changed = False
cur_topic_desc_keywords = []

# Chitchat
is_chit_chat= True

# Main loop
while True:
    
    # Taking user input
    msg = input('\nUser: ')
    t1 = time.time()
    
    # Breaking if user enters 'exit'
    if msg == 'exit':
        break
        
    # Tagging the sentence
    c_keywords = tag_sentence(msg)
    
    # Removing duplicates
    c_keywords_set = set(c_keywords)
    c_keywords = list(c_keywords_set)
        
    # Keeping track of the pass through from the previous turns
    pass_through = []
        
    # Comparing the current message to the previous keywords
    if len(prev_keywords) > 0:
        print ("---------------------------------------")
    
        prev_keywords = list(set(prev_keywords).difference(c_keywords_set)) 
        s_scores = calc_sentence_similarity(msg, prev_keywords)
        
        if len(prev_keywords) >= 3:
            pass_through = [prev_keywords[i] for i in find_highest_similarity_scores(s_scores, 2 if len(prev_keywords) <= 3 else 3)]
        else:
            pass_through = prev_keywords
        
        print (f"pass_through: {pass_through}")
        print (f"s_scores: {s_scores}")
        print ("---------------------------------------")
    # End if

    # Calculating topic changes
    topic_change_from_prev_msg = has_topic_changed(msg, prev_msg, control_msg, error_threshold=-0.04 if is_chit_chat else 0.02)
    topic_change_from_prev_topic = has_topic_changed(msg, sent_changed_topic, control_msg, error_threshold = -0.09 if is_chit_chat else 0.05)
    
    if topic_change_from_prev_msg:
        print ("topic changed from previous message")
        
    if topic_change_from_prev_topic:
        print ("topic changed from previous topic message")
        
    # If topic changes
    if topic_change_from_prev_msg or topic_change_from_prev_topic:    
        sent_changed_topic = msg
    
    # Adding new keywords
    t_keywords = c_keywords + pass_through
    
    if len(t_keywords) > 0:
        t_keywords = order_keywords_by_similarity(msg, t_keywords)
    print (f"t_keywords: {t_keywords}")

    # Updating previous messages and state for the next turn
    print(f"time elapsed: {time.time() - t1}")
    prev_msg = msg
    prev_keywords = t_keywords
    print ("#" * 100)
# End while


User: Hi there, what's up?
topic_change_dist: [0.14649999 0.14650002]	distance: 1.0171440578687907e-07
topic_change_dist: [0.14649999 0.14650002]	distance: 1.0171440578687907e-07
t_keywords: []
time elapsed: 0.08899521827697754
####################################################################################################

User: nothing much, just enjoying the weather
topic_change_dist: [0.37994462 0.14813232]	distance: -0.4389744699001312
topic_change_dist: [0.14813235 0.14813238]	distance: 1.0059355304292694e-07
scores: [0.47469684 0.5217279 ]
s_idxs: [1, 0]
t_keywords: ['nothing', 'weather']
time elapsed: 0.13300037384033203
####################################################################################################

User: I love rain
---------------------------------------
pass_through: ['weather', 'nothing']
s_scores: [0.39766064 0.10055146]
---------------------------------------
topic_change_dist: [0.29025584 0.12285186]	distance: -0.40523087978363037
topic_change_

KeyboardInterrupt: Interrupted by user

### Backup for the current version of the approach

Remember to adapt the code below to use percentage difference in the algorithm below

In [None]:
topics = []
keywords = []
c_keywords = []
t_keywords = []
prev_msg = control_msg = "name place animal thing?"# "Hey! How are you doing?"

# Keeping track of the sentence that changed the topic
sent_changed_topic = prev_msg
prev_keywords = []

# Keeps track of whether the topic changed in the previous turn
topic_changed = False
cur_topic_desc_keywords = []

# Main loop
while True:
    
    # Taking user input
    msg = input('\nUser: ')
    t1 = time.time()
    
    # Breaking if user enters 'exit'
    if msg == 'exit':
        break
        
    # Tagging the sentence
    c_keywords = tag_sentence(msg)
    
    # Removing duplicates
    c_keywords_set = set(c_keywords)
    c_keywords = list(c_keywords_set)
        
    # Keeping track of the pass through from the previous turns
    pass_through = []
        
    # Comparing the current message to the previous keywords
    if len(prev_keywords) > 0:
        print ("---------------------------------------")
    
        prev_keywords = list(set(prev_keywords).difference(c_keywords_set)) 
        s_scores = calc_sentence_similarity(msg, prev_keywords)
        
        if len(prev_keywords) >= 3:
            pass_through = [prev_keywords[i] for i in find_highest_similarity_scores(s_scores, 2 if len(prev_keywords) <= 3 else 3)]
        else:
            pass_through = prev_keywords
        
        print (f"pass_through: {pass_through}")
        print (f"s_scores: {s_scores}")
        print ("---------------------------------------")
    # End if

    # Calculating topic changes
    topic_change_from_prev_msg = has_topic_changed(msg, prev_msg, control_msg)
    topic_change_from_prev_topic = has_topic_changed(msg, sent_changed_topic, control_msg, error_threshold=0.05)
    
    if topic_change_from_prev_msg:
        print ("topic changed from previous message")
        
    if topic_change_from_prev_topic:
        print ("topic changed from previous topic message")
        
    # If topic changes
    if topic_change_from_prev_msg or topic_change_from_prev_topic:    
        sent_changed_topic = msg
    
    # Adding new keywords
    t_keywords = c_keywords + pass_through
    
    if len(t_keywords) > 0:
        t_keywords = order_keywords_by_similarity(msg, t_keywords)
    print (f"t_keywords: {t_keywords}")

    # Updating previous messages and state for the next turn
    print(f"time elapsed: {time.time() - t1}")
    prev_msg = msg
    prev_keywords = t_keywords
    print ("#" * 100)
# End while