In [23]:
!python -m spacy download en_core_web_md

from google.colab import drive
drive.mount('/content/drive')

Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
data_root='/content/drive/My Drive/wiki'
data_file=open(data_root+'/intents.json').read()

In [25]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [26]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

def calculate_similarity(text1, text2):
    processed_text1 = preprocess_text(text1)
    processed_text2 = preprocess_text(text2)

    processed_texts = [processed_text1, processed_text2]

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(processed_texts)

    similarity_matrix = cosine_similarity(tfidf_matrix)

    similarity_score = similarity_matrix[0][1]
    return similarity_score


In [27]:
import json
import spacy

intents = json.loads(data_file)
nlp = spacy.load('en_core_web_md')

def weighted_similarity(text1, text2):
    doc1 = nlp(text1)
    doc2 = nlp(text2)
    similarity = doc1.similarity(doc2)
    weighted_similarity = similarity * min(len(text1), len(text2)) / max(len(text1), len(text2))
    return weighted_similarity

def get_intent(user_input):
    max_similarity = 0
    intent_tag = None

    for intent in intents['intents']:
        for pattern in intent['patterns']:
            similarity = calculate_similarity(user_input.lower(), pattern.lower())
            if similarity > max_similarity:
                max_similarity = similarity
                intent_tag = intent['tag']

    return intent_tag

def get_response(intent_tag):
    for intent in intents['intents']:
        if intent['tag'] == intent_tag:
            return intent['responses'][0]

def chat():
    print("Hello! Welcome to the Wikipedia editing support chatbot.")
    while True:
        user_input = input("You: ").strip()
        if user_input.lower() == 'exit':
            print("Goodbye! Don't hesitate to return if you need further assistance.")
            break
        intent_tag = get_intent(user_input)
        response = get_response(intent_tag)
        print("Bot:", response)

if __name__ == "__main__":
    chat()


Hello! Welcome to the Wikipedia editing support chatbot.
You: hello
Bot: Hello! Welcome to the Wikipedia editing support chatbot. How can I assist you today?
You: how to edit pages
Bot: To edit the whole page, click the 'edit' tab at the top of the page.
You: why should i add citations
Bot: To show content is verifiable, provide an inline citation when adding content. See WP:Citing sources for instructions or ask for help at the Help desk.
You: how to add citations
Bot: To show content is verifiable, provide an inline citation when adding content. See WP:Citing sources for instructions or ask for help at the Help desk.
You: exit
Goodbye! Don't hesitate to return if you need further assistance.


In [28]:
text1 = "how do i develop a chatbot"
text2 = "help me develop a chatbot"
similarity = calculate_similarity(text1, text2)
print("Similarity Score:", similarity)

Similarity Score: 0.7092972666062739
