In [4]:
!pip install pyspellchecker


Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.2-py3-none-any.whl (7.1 MB)
   ---------------------------------------- 0.0/7.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.1 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.1 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.1 MB 1.4 MB/s eta 0:00:05
   ---- ----------------------------------- 0.8/7.1 MB 1.2 MB/s eta 0:00:06
   ----- ---------------------------------- 1.0/7.1 MB 1.2 MB/s eta 0:00:06
   ------- -------------------------------- 1.3/7.1 MB 1.1 MB/s eta 0:00:06
   -------- ------------------------------- 1.6/7.1 MB 1.1 MB/s eta 0:00:05
   ---------- ----------------------------- 1.8/7.1 MB 1.2 MB/s eta 0:00:05
   ----------- ---------------------------- 2.1/7.1 MB 1.2 MB/s eta 0:00:05
   -------------- ------------------------- 2.6/7.1 MB 1.3 MB/s eta 0:00:04
   ---------------- ----------

In [None]:
import random
import string
import time
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from spellchecker import SpellChecker

# Download required NLTK data
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('stopwords', quiet=True)

class HighAccuracyChatBot:
    def __init__(self, filepath):
        # Initialize NLP components
        self.lemmer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))
        self.spell = SpellChecker()
        
        # Load and process QA pairs
        self.qa_pairs = self._load_data(filepath)
        self.questions = [q.lower() for q, _ in self.qa_pairs]
        
        # Initialize vectorizer and create TF-IDF matrix
        self.vectorizer = TfidfVectorizer(tokenizer=self._normalize_text)
        self.tfidf_matrix = self.vectorizer.fit_transform(self.questions)
        
        # Conversation parameters
        self.greeting_inputs = ("hello", "hi", "greetings", "hey", "hola")
        self.greeting_responses = ["Hello!", "Hi there!", "Greetings!", "Hi! How can I help?"]
        self.exit_commands = ['bye', 'goodbye', 'exit', 'quit']
        self.thanks_responses = ["You're welcome!", "Happy to help!", "My pleasure!"]
        self.fallback_responses = [
            "I'm not sure I understand. Could you rephrase that?",
            "I didn't quite get that. Can you ask differently?",
            "I'm still learning. Could you try another way to ask that?"
        ]

    def _load_data(self, filepath):
        """Load and parse QA pairs from file"""
        with open(filepath, "r", encoding="utf-8") as f:
            lines = [line.strip() for line in f if line.strip()]
        
        pairs = []
        for i in range(len(lines) - 1):
            if lines[i].lower().startswith("user:") and lines[i+1].lower().startswith("bot:"):
                question = lines[i][5:].strip()
                answer = lines[i+1][4:].strip()
                pairs.append((question, answer))
        return pairs

    def _normalize_text(self, text):
        """Advanced text normalization with spell correction"""
        # Remove punctuation and lowercase
        text = text.lower().translate(str.maketrans('', '', string.punctuation))
        
        # Tokenize and correct spelling
        words = nltk.word_tokenize(text)
        corrected_words = [self.spell.correction(word) or word for word in words]
        
        # Lemmatize and remove stopwords
        return [self.lemmer.lemmatize(word) for word in corrected_words 
                if word not in self.stop_words and word.isalpha()]

    def _get_best_match(self, user_input):
        """Find best matching question with similarity score"""
        input_tfidf = self.vectorizer.transform([user_input])
        similarity_scores = cosine_similarity(input_tfidf, self.tfidf_matrix)
        best_match_idx = np.argmax(similarity_scores)
        best_score = similarity_scores[0, best_match_idx]
        return best_match_idx, best_score

    def respond(self, user_input):
        """Generate response to user input"""
        user_input = user_input.strip().lower()
        
        # Check for greetings
        if any(greeting in user_input for greeting in self.greeting_inputs):
            return random.choice(self.greeting_responses)
        
        # Check for exit commands
        if any(cmd in user_input for cmd in self.exit_commands):
            return f"Goodbye! Have a great day! [{time.strftime('%I:%M %p')}]"
        
        # Check for thanks
        if "thank" in user_input:
            return random.choice(self.thanks_responses)
        
        # Find best matching question
        idx, score = self._get_best_match(user_input)
        
        # Dynamic threshold - longer queries get higher thresholds
        threshold = min(0.7, max(0.4, 0.5 + len(user_input.split())*0.015))
        
        if score >= threshold:
            return self.qa_pairs[idx][1]
        return random.choice(self.fallback_responses)

# Main program
if __name__ == "__main__":
    print("Initializing high-accuracy chatbot...")
    bot = HighAccuracyChatBot("C://Users//ALWAYSRAMESH//Downloads//chatbot_copy.txt")
    
    print("\nChatbot ready! Type your message or 'bye' to exit.")
    while True:
        try:
            user_input = input("> ").strip()
            if not user_input:
                continue
                
            response = bot.respond(user_input)
            print(response)
            
            if any(cmd in user_input.lower() for cmd in bot.exit_commands):
                break
                
        except KeyboardInterrupt:
            print("\nChatbot session ended.")
            break
        except Exception as e:
            print("Sorry, I encountered an error. Please try again.")

Initializing high-accuracy chatbot...




In [None]:
from flask import Flask, render_template, request
import string
import warnings
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity 

warnings.filterwarnings('ignore')


nltk.download('popular', quiet=True)
nltk.download('punkt')
nltk.download('wordnet')

app = Flask(__name__)

# Load chatbot data
with open("chatbot_copy.txt", 'r', errors='ignore') as f:
    raw = f.read().lower()

sent_tokens = nltk.sent_tokenize(raw)
word_tokens = nltk.word_tokenize(raw)

# Initialize Lemmatizer
lemmer = nltk.stem.WordNetLemmatizer()


def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]


remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)


def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


def response(user_response):
    robo_response = ''
    sent_tokens.append(user_response)
    Tfidfvec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = Tfidfvec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    sent_tokens.pop(-1)

    if req_tfidf == 0:
        return "I am sorry! I don't understand you."
    else:
        return sent_tokens[idx]


@app.route('/')
def home():
    return render_template('index.html')


@app.route('/get', methods=['POST'])
def chatbot_response():
    user_text = request.form['msg']
    return response(user_text)


if __name__ == '__main__':
    app.run(debug=False)
