In [None]:
import nltk
import numpy as np
import random
import string
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

# Download necessary NLTK data
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

class NLPChatbot:
    def __init__(self):
        self.name = "ChatBot"
        self.context = {}
        self.greeting_inputs = ["hello", "hi", "greetings", "sup", "what's up", "hey"]
        self.greeting_responses = ["Hi", "Hey", "Hello there", "Hi there", "Hello! How can I help you today?"]

        # Initialize lemmatizer
        self.lemmatizer = nltk.stem.WordNetLemmatizer()

        # Load corpus (can be replaced with a larger dataset)
        self.corpus = self.load_corpus()

    def load_corpus(self):
        """
        Load the knowledge corpus for the chatbot. This can be replaced with a larger dataset.
        """
        return [
            "A chatbot is a software application used to conduct an online chat conversation.",
            "Natural language processing is a subfield of linguistics, computer science, and artificial intelligence.",
            "NLP is used to help computers understand, interpret and manipulate human language.",
            "Machine learning algorithms use computational methods to learn information directly from data.",
            "Python is a high-level, interpreted programming language.",
            "NLTK is a leading platform for building Python programs to work with human language data.",
            "AI chatbots use artificial intelligence to simulate human conversation.",
            "Chatbots can be used for customer service, information delivery, and entertainment.",
            "The Turing Test is a test of a machine's ability to exhibit intelligent behavior.",
            "Deep learning is a subset of machine learning based on artificial neural networks.",
            "Intent recognition helps chatbots understand the purpose of user messages.",
            "Entity extraction identifies key pieces of information from text input.",
            "Conversational AI systems can maintain context across multiple exchanges.",
            "Dialog management systems track the state of a conversation.",
            "Language generation creates natural-sounding responses for chatbots."
        ]

    def lemmatize_tokens(self, tokens):
        """Lemmatize tokens to their root form."""
        return [self.lemmatizer.lemmatize(token) for token in tokens]

    def preprocess_text(self, text):
        """
        Preprocess text by tokenizing, removing punctuation, and lemmatizing.
        """
        # Tokenize
        tokens = nltk.word_tokenize(text.lower())

        # Remove punctuation
        tokens = [token for token in tokens if token not in string.punctuation]

        # Lemmatize
        tokens = self.lemmatize_tokens(tokens)

        return tokens

    def generate_response(self, user_input):
        """
        Generate a response based on user input using various NLP techniques.
        """
        # Check for greetings
        if user_input.lower() in self.greeting_inputs:
            return random.choice(self.greeting_responses)

        # Check for exit phrases
        if re.search(r'\b(bye|goodbye|exit|quit)\b', user_input.lower()):
            return "Goodbye! Have a nice day!"

        # Process specific question patterns
        if re.search(r'\bwho are you\b', user_input.lower()):
            return f"I am {self.name}, an NLP-powered chatbot designed to demonstrate natural language processing capabilities."

        if re.search(r'\b(what can you do|help me)\b', user_input.lower()):
            return "I can answer questions about chatbots, NLP, and AI. I can also have a general conversation with you. Just ask me anything!"

        # Extract name if introduced
        name_match = re.search(r'my name is (\w+)', user_input.lower())
        if name_match:
            self.context['user_name'] = name_match.group(1).capitalize()
            return f"Nice to meet you, {self.context['user_name']}! How can I help you today?"

        # If we have the user's name in context, we can use it
        if 'user_name' in self.context and random.random() < 0.3:  # 30% chance to use name
            personalized_responses = [
                f"I'm thinking about that, {self.context['user_name']}...",
                f"That's an interesting question, {self.context['user_name']}!",
                f"Let me help you with that, {self.context['user_name']}."
            ]
            if random.random() < 0.5:  # 50% chance to use personalized response
                return random.choice(personalized_responses)

        # For general queries, use TF-IDF and cosine similarity to find the best response
        # Add the user input to the corpus temporarily
        all_text = self.corpus + [user_input]

        # Vectorize using TF-IDF
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform(all_text)

        # Calculate similarity between user input and each sentence in corpus
        similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])

        # Find the most similar sentence
        max_idx = np.argmax(similarity_scores)

        # If similarity is very low, give a fallback response
        if similarity_scores[0, max_idx] < 0.1:
            return self.get_fallback_response()

        # Return a response based on the most similar sentence in the corpus
        return self.generate_response_from_similar_sentence(self.corpus[max_idx])

    def get_fallback_response(self):
        """
        Return a fallback response when no good match is found.
        """
        fallbacks = [
            "I'm not sure I understand. Could you rephrase that?",
            "That's an interesting point. Could you tell me more?",
            "I'm still learning. Can you elaborate on that?",
            "I don't have enough information about that yet. Could you explain more?",
            "That's beyond my current knowledge. Is there something else I can help with?"
        ]
        return random.choice(fallbacks)

    def generate_response_from_similar_sentence(self, similar_sentence):
        """
        Generate a response based on the most similar sentence found in the corpus.
        """
        # Simple templates for responses
        templates = [
            "According to my knowledge, {}",
            "I understand that {}",
            "From what I know, {}",
            "I believe that {}",
            "{}"
        ]

        return random.choice(templates).format(similar_sentence)

    def chat(self):
        """
        Main chat function to interact with users.
        """
        print(f"{self.name}: Hello! I'm {self.name}, an NLP-powered chatbot. How can I help you today? (Type 'bye' to exit)")

        while True:
            user_input = input("You: ")

            if user_input.lower() == 'bye':
                print(f"{self.name}: Goodbye! Have a nice day!")
                break

            response = self.generate_response(user_input)
            print(f"{self.name}: {response}")


# Enhanced version with more sophisticated NLP capabilities
class AdvancedNLPChatbot(NLPChatbot):
    def __init__(self):
        super().__init__()
        self.name = "AdvancedBot"

        # Add intent patterns
        self.intent_patterns = {
            'weather': [r'\b(weather|temperature|forecast)\b'],
            'time': [r'\b(time|hour|clock)\b'],
            'joke': [r'\b(joke|funny|laugh)\b'],
            'define': [r'\bwhat is(?: a| an)? (.+)[\?]?\b', r'\bdefine (.+)[\?]?\b'],
            'how_to': [r'\bhow do (?:i|you) (.+)[\?]?\b', r'\bhow to (.+)[\?]?\b']
        }

        # Enhanced corpus with categories
        self.enhanced_corpus = {
            'general': self.corpus,
            'jokes': [
                "Why don't scientists trust atoms? Because they make up everything!",
                "What do you call a fake noodle? An impasta!",
                "Why did the chatbot go to school? To improve its AI-Q!",
                "How many programmers does it take to change a light bulb? None, that's a hardware problem!"
            ],
            'definitions': {
                'chatbot': "A chatbot is a software application used to conduct online chat conversations via text or voice.",
                'nlp': "Natural Language Processing (NLP) is a field of AI that enables computers to understand, interpret, and respond to human language.",
                'ai': "Artificial Intelligence (AI) is the simulation of human intelligence in machines programmed to think and learn like humans.",
                'machine learning': "Machine Learning is a subset of AI that enables systems to learn and improve from experience without being explicitly programmed."
            }
        }

        # Load additional context phrases
        self.context_phrases = self.load_context_phrases()

    def load_context_phrases(self):
        """
        Load phrases to help maintain context in conversations.
        """
        return {
            'follow_up': [
                "Is there anything else you'd like to know about that?",
                "Would you like me to elaborate on any part of that?",
                "Do you have any follow-up questions?",
                "Is that what you were looking for?",
                "Does that answer your question?"
            ],
            'transition': [
                "Now, what else can I help you with?",
                "Is there anything else you'd like to discuss?",
                "What other questions do you have?",
                "Is there something else on your mind?"
            ]
        }

    def detect_intent(self, user_input):
        """
        Detect the user's intent based on pattern matching.
        """
        for intent, patterns in self.intent_patterns.items():
            for pattern in patterns:
                match = re.search(pattern, user_input.lower())
                if match:
                    # If there's a capture group, extract it as the entity
                    if len(match.groups()) > 0:
                        entity = match.group(1).strip()
                        return intent, entity
                    return intent, None

        return None, None

    def generate_response(self, user_input):
        """
        Generate a response based on detected intent and context.
        """
        # Check for basic responses first (greetings, etc.)
        basic_response = super().generate_response(user_input)

        # If it's not a greeting or exit phrase, check for intent
        if basic_response not in self.greeting_responses and "Goodbye" not in basic_response:
            intent, entity = self.detect_intent(user_input)

            if intent:
                if intent == 'joke':
                    return random.choice(self.enhanced_corpus['jokes'])

                elif intent == 'define' and entity:
                    # Look for definition in our knowledge base
                    for key, definition in self.enhanced_corpus['definitions'].items():
                        if entity.lower() in key:
                            return definition

                    # If not found, give a fallback
                    return f"I don't have a definition for '{entity}' in my database yet."

                elif intent == 'weather':
                    return "I'm sorry, I don't have access to real-time weather data. You might want to check a weather service or app for the current forecast."

                elif intent == 'time':
                    return "I don't have access to your local time. You can check the time on your device."

                elif intent == 'how_to' and entity:
                    return f"To {entity}, you would typically: \n1. Start by researching the basics\n2. Practice regularly\n3. Learn from experts in the field\n\nWould you like more specific information?"

            # Use the basic response if no specific intent is detected
            return basic_response

        return basic_response

    def add_context_extension(self, response):
        """
        Extend response with context-maintaining phrases occasionally.
        """
        if random.random() < 0.3:  # 30% chance to add a follow-up
            return f"{response} {random.choice(self.context_phrases['follow_up'])}"

        if random.random() < 0.2:  # 20% chance to add a transition
            return f"{response} {random.choice(self.context_phrases['transition'])}"

        return response

    def chat(self):
        """
        Enhanced chat function with improved context handling.
        """
        print(f"{self.name}: Hello! I'm {self.name}, an advanced NLP-powered chatbot. How can I help you today? (Type 'bye' to exit)")

        conversation_turn = 0

        while True:
            user_input = input("You: ")
            conversation_turn += 1

            if user_input.lower() == 'bye':
                print(f"{self.name}: Goodbye! Have a nice day!")
                break

            # Generate base response
            response = self.generate_response(user_input)

            # Add context extensions after the first few turns
            if conversation_turn > 2:
                response = self.add_context_extension(response)

            print(f"{self.name}: {response}")


# Example usage
if __name__ == "__main__":
    # Choose which chatbot to run
    # bot = NLPChatbot()  # Basic version
    bot = AdvancedNLPChatbot()  # Advanced version
    bot.chat()

AdvancedBot: Hello! I'm AdvancedBot, an advanced NLP-powered chatbot. How can I help you today? (Type 'bye' to exit)
You: My name is Vaibhav Sharma
AdvancedBot: Nice to meet you, Vaibhav! How can I help you today?
You: Whats the time right now
AdvancedBot: I don't have access to your local time. You can check the time on your device.
You: ok, who developed you
AdvancedBot: That's an interesting point. Could you tell me more?
You: I developed you
AdvancedBot: I'm thinking about that, Vaibhav...
You: Who developed you?
AdvancedBot: I'm not sure I understand. Could you rephrase that? Does that answer your question?
You: what is AI?
AdvancedBot: Artificial Intelligence (AI) is the simulation of human intelligence in machines programmed to think and learn like humans. Is that what you were looking for?
