In [1]:
import random
import string  # for punctuation removal
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
import nltk

In [3]:

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to /home/atwal/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/atwal/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /home/atwal/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/atwal/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [5]:
with open("panjabdcsa.txt", 'r', errors='ignore') as f:
    raw = f.read().lower()

sent_tokens= nltk.sent_tokenize(raw)
word_tokens = nltk.word_tokenize(raw)

print("Total sentences:", len(sent_tokens))
print("Total words:", len(word_tokens))


Total sentences: 54
Total words: 1475


In [6]:
from nltk.stem import WordNetLemmatizer
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


In [7]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")
GREETING_RESPONSES = ["hi", "hey", "hello", "I am glad you are talking to me!"]

def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)


In [8]:
last_idx = None

# Response function
def response(user_response):
    global last_idx
    robo_response = ''
    sent_tokens.append(user_response)

    # TF-IDF
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]

    if req_tfidf == 0:
        robo_response = "Hmm... I am not sure about that. Could you ask differently?"
        last_idx = None
    else:
        last_idx = idx
        robo_response = "Here’s what I found: "+ sent_tokens[idx]

    sent_tokens.remove(user_response)
    return robo_response

In [9]:
def tell_me_more():
    global last_idx
    if last_idx is not None and last_idx + 1 < len(sent_tokens):
        last_idx += 1
        return "Sure! Here's more: " + sent_tokens[last_idx]
    else:
        return "I don’t have more details on that right now."


In [10]:
flag = True
print("Chatbot: Hi! I am UniBot 🤖. I can answer your queries about Panjab University.\nType 'bye' to exit.")

while(flag):
    user_response = input("You: ").lower()

    if user_response != 'bye':
        if user_response in ('thanks', 'thank you'):
            print("Chatbot: You're welcome!")
        elif "tell me more" in user_response:
            print("Chatbot:", tell_me_more())
        elif greeting(user_response) is not None:
            print("Chatbot:", greeting(user_response))  
        else:
            print("Chatbot:", response(user_response))
    else:
        flag = False
        print("Chatbot: Goodbye! Take care.")

Chatbot: Hi! I am UniBot 🤖. I can answer your queries about Panjab University.
Type 'bye' to exit.




Chatbot: Here’s what I found: panjab university | brief history and present infrastructure
-------------------------------------------------------------
- one of the oldest universities in india, the panjab university (pu) initiated at lahore in 1882, has a long tradition of pursuing excellence in teaching and research in science and technology, humanities, social sciences, performing arts and sports.




Chatbot: Sure! Here's more: the university supports excellence and innovation in academic programmes, promotes excellence in research, scholarship and teaching.
Chatbot: Here’s what I found: department of computer science & applications (dcsa) | overview
---------------------------------------------------------------
- the computer culture at the panjab university dates back to 1966. an independent centre for computer science and applications (now a full fledged department) was set-up in 1983. the department aims at ingraining the spirit of ingenuity, innovativeness and technical competence in the students through rigorous competition and regular guidance.
Chatbot: Here’s what I found: master of computer applications (mca) (2 years full time course) in self financed mode and master of science (computer science) under the framework of the hon's school (02 years full time course).
Chatbot: Here’s what I found: - research lab with specialized computing systems.
Chatbot: Here’s what I foun