In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string
import sqlite3
import pickle
import json
from datetime import datetime
import os
import warnings
warnings.filterwarnings('ignore')

# Download required NLTK data
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

class AdvancedUniversityFAQChatbot:
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))
        self.vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
        self.faq_data = self.create_comprehensive_faq_data()
        self.setup_database()
        self.preprocess_and_train()
        self.chat_history = []
        
    def setup_database(self):
        """Setup SQLite database with proper table structure"""
        self.conn = sqlite3.connect('chatbot_history.db', check_same_thread=False)
        cursor = self.conn.cursor()
        
        # Check if table exists and has the correct structure
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS chat_history (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp TEXT,
                user_query TEXT,
                bot_response TEXT,
                similarity_score REAL,
                category TEXT
            )
        ''')
        
        # Check if category column exists, if not add it
        try:
            cursor.execute("SELECT category FROM chat_history LIMIT 1")
        except sqlite3.OperationalError:
            # Category column doesn't exist, add it
            cursor.execute('ALTER TABLE chat_history ADD COLUMN category TEXT')
        
        self.conn.commit()
    
    def log_conversation(self, query, response, score, category):
        """Log conversation to database with error handling"""
        try:
            cursor = self.conn.cursor()
            cursor.execute('''
                INSERT INTO chat_history (timestamp, user_query, bot_response, similarity_score, category)
                VALUES (?, ?, ?, ?, ?)
            ''', (datetime.now().isoformat(), query, response, score, category))
            self.conn.commit()
        except sqlite3.Error as e:
            print(f"Database error: {e}")
            # Continue without logging if there's a database issue

    def create_comprehensive_faq_data(self):
        """Create extensive FAQ dataset with categorized questions"""
        data = {
            'Question': [
                # Admission Related
                'How much is the admission fee?',
                'What is the admission process?',
                'What documents are required for admission?',
                'What is the last date for admission?',
                'Is there an entrance exam for admission?',
                'What are the eligibility criteria?',
                'How can I check my admission status?',
                
                # Fees and Payments
                'How to pay tuition fees?',
                'What is the fee structure?',
                'When is the last date for fee payment?',
                'Are there any installment plans?',
                'What is the refund policy?',
                'How to get fee concession?',
                
                # Hostel and Accommodation
                'How can I apply for a hostel?',
                'What is the hostel fee?',
                'What facilities are available in hostel?',
                'Is hostel accommodation compulsory?',
                'How to get hostel refund?',
                
                # Academic and Exams
                'When will exams start?',
                'How to check exam results?',
                'What is the exam pattern?',
                'How to apply for revaluation?',
                'What is the attendance requirement?',
                'How to get hall ticket?',
                
                # Library
                'Where is the library located?',
                'What are the library timings?',
                'How many books can I borrow?',
                'Is there digital library access?',
                
                # Administrative
                'How to get a student ID card?',
                'What is the procedure for semester registration?',
                'How to contact the administration office?',
                'How to apply for leave?',
                'How to change my course?',
                'How to get a bonafide certificate?',
                
                # Campus Facilities
                'What are the canteen timings?',
                'How to get a bus pass?',
                'What is the dress code?',
                'Is there a gym facility?',
                'What are the sports facilities?',
                
                # Support Services
                'How to report an issue?',
                'Is there career counseling?',
                'How to access mental health services?',
                'What is the grievance redressal process?'
            ],
            'Answer': [
                # Admission Answers
                'Admission fee is ‚Çπ5000 for all courses payable during application.',
                'Admission process involves online application, document verification, and fee payment.',
                'Required documents: 10th/12th marksheet, transfer certificate, ID proof, and 4 passport photos.',
                'The last date for admission is 31st July for the current academic year.',
                'Yes, there is a university entrance exam for most courses. Check the prospectus for details.',
                'Eligibility requires minimum 60% in previous qualification. Specific criteria vary by course.',
                'Admission status can be checked on the university portal using your application number.',
                
                # Fees Answers
                'You can pay tuition fees online through the student portal or at the finance office.',
                'Complete fee structure is available on the university website. Contact finance office for details.',
                'The last date for fee payment is 15th August for the current semester.',
                'Yes, installment plans are available. Contact accounts department for approval.',
                'Refund policy allows 90% refund before classes start and 50% refund within first 15 days.',
                'Fee concession is available for economically weaker sections. Apply with relevant documents.',
                
                # Hostel Answers
                'Fill the hostel application form online at hostel.university.edu or visit hostel office.',
                'Hostel fee is ‚Çπ25,000 per semester including mess charges and basic amenities.',
                'Hostel facilities include WiFi, laundry, common room, and 24/7 security.',
                'Hostel accommodation is optional for local students but recommended for outstation students.',
                'Hostel refund requests must be submitted to warden office with proper justification.',
                
                # Academic Answers
                'Exams will begin in December for odd semester and May for even semester.',
                'Exam results can be checked on university website under student portal section.',
                'Exam pattern includes theory papers, practical exams, and internal assessments.',
                'Revaluation applications can be submitted within 15 days of result declaration.',
                'Minimum 75% attendance is required to appear for semester examinations.',
                'Hall tickets can be downloaded from student portal 15 days before exams.',
                
                # Library Answers
                'The central library is located in the main academic building, ground floor.',
                'Library timings are 8:00 AM to 8:00 PM on weekdays and 9:00 AM to 5:00 PM on Saturdays.',
                'Students can borrow up to 4 books for 15 days. Fine applicable for late returns.',
                'Yes, digital library access is available through LMS with your student credentials.',
                
                # Administrative Answers
                'Student ID cards are issued at administration office after completing admission formalities.',
                'Semester registration is done online through student portal during specified periods.',
                'Contact administration at admin@university.edu or call 0123-4567890 during office hours.',
                'Leave applications must be submitted through class coordinator with supporting documents.',
                'Course changes can be requested during first week of semester through academic advisor.',
                'Bonafide certificates are issued at student service center with ID proof.',
                
                # Campus Answers
                'Canteen timings are 8:00 AM to 6:00 PM on all working days.',
                'Bus passes are available at transport office with valid student ID and passport photo.',
                'Formal dress code is mandatory on weekdays. Casual wear allowed on weekends.',
                'Yes, fully equipped gym is available for students from 6 AM to 8 PM.',
                'Sports facilities include basketball court, football ground, and indoor games.',
                
                # Support Answers
                'Issues can be reported through grievance portal or by visiting student help desk.',
                'Career counseling services are available at placement cell. Book appointment online.',
                'Mental health counseling is available at wellness center. All services are confidential.',
                'Grievances can be submitted online or in writing. Resolution within 7 working days.'
            ],
            'Category': [
                'Admission', 'Admission', 'Admission', 'Admission', 'Admission', 'Admission', 'Admission',
                'Fees', 'Fees', 'Fees', 'Fees', 'Fees', 'Fees',
                'Hostel', 'Hostel', 'Hostel', 'Hostel', 'Hostel',
                'Academics', 'Academics', 'Academics', 'Academics', 'Academics', 'Academics',
                'Library', 'Library', 'Library', 'Library',
                'Administrative', 'Administrative', 'Administrative', 'Administrative', 'Administrative', 'Administrative',
                'Campus', 'Campus', 'Campus', 'Campus', 'Campus',
                'Support', 'Support', 'Support', 'Support'
            ]
        }
        return pd.DataFrame(data)
    
    def advanced_preprocess_text(self, text):
        """Advanced text preprocessing with spelling correction and expansion"""
        # Convert to lowercase
        text = text.lower()
        
        # Expand common abbreviations
        abbreviation_map = {
            r'\bdept\b': 'department',
            r'\badmin\b': 'administration',
            r'\bexam\b': 'examination',
            r'\bfee\b': 'fees',
            r'\binfo\b': 'information',
            r'\bappn\b': 'application',
            r'\bdoc\b': 'document',
            r'\breg\b': 'registration'
        }
        
        for pattern, replacement in abbreviation_map.items():
            text = re.sub(pattern, replacement, text)
        
        # Remove punctuation and special characters
        text = re.sub(r'[^\w\s]', ' ', text)
        
        # Tokenize
        tokens = nltk.word_tokenize(text)
        
        # Remove stopwords and lemmatize
        processed_tokens = [
            self.lemmatizer.lemmatize(token) 
            for token in tokens 
            if token not in self.stop_words and len(token) > 2
        ]
        
        return ' '.join(processed_tokens)
    
    def preprocess_and_train(self):
        """Preprocess questions and train TF-IDF vectorizer"""
        # Preprocess all questions
        self.processed_questions = [
            self.advanced_preprocess_text(question) 
            for question in self.faq_data['Question']
        ]
        
        # Fit TF-IDF vectorizer
        self.tfidf_matrix = self.vectorizer.fit_transform(self.processed_questions)
        
        # Save trained model
        try:
            with open('tfidf_model.pkl', 'wb') as f:
                pickle.dump({
                    'vectorizer': self.vectorizer,
                    'tfidf_matrix': self.tfidf_matrix,
                    'processed_questions': self.processed_questions
                }, f)
        except Exception as e:
            print(f"Model saving warning: {e}")
    
    def get_enhanced_response(self, user_query):
        """Get enhanced response with multiple matching options"""
        # Preprocess user query
        processed_query = self.advanced_preprocess_text(user_query)
        
        # Transform query to TF-IDF vector
        query_vector = self.vectorizer.transform([processed_query])
        
        # Calculate cosine similarity
        similarities = cosine_similarity(query_vector, self.tfidf_matrix)
        
        # Get top 3 matches
        top_indices = similarities[0].argsort()[-3:][::-1]
        top_scores = similarities[0][top_indices]
        
        best_match_idx = top_indices[0]
        best_similarity = top_scores[0]
        
        # Prepare response
        if best_similarity > 0.3:  # Lowered threshold for better matching
            response = {
                'answer': self.faq_data['Answer'].iloc[best_match_idx],
                'confidence': round(best_similarity * 100, 2),
                'category': self.faq_data['Category'].iloc[best_match_idx],
                'similar_questions': [
                    {
                        'question': self.faq_data['Question'].iloc[idx],
                        'answer': self.faq_data['Answer'].iloc[idx],
                        'score': round(score * 100, 2)
                    }
                    for idx, score in zip(top_indices[1:], top_scores[1:]) if score > 0.2
                ]
            }
        else:
            response = {
                'answer': "I'm sorry, I couldn't find a specific answer to your question. Please contact the administration office at admin@university.edu or call 0123-4567890 for further assistance.",
                'confidence': round(best_similarity * 100, 2),
                'category': 'Unknown',
                'similar_questions': []
            }
        
        # Log conversation
        self.log_conversation(user_query, response['answer'], best_similarity, response['category'])
        
        return response
    
    def get_chat_statistics(self):
        """Get chat statistics from database"""
        try:
            cursor = self.conn.cursor()
            cursor.execute('SELECT COUNT(*) FROM chat_history')
            total_chats = cursor.fetchone()[0]
            
            cursor.execute('SELECT AVG(similarity_score) FROM chat_history WHERE similarity_score > 0')
            avg_confidence = cursor.fetchone()[0] or 0
            
            cursor.execute('''
                SELECT category, COUNT(*) as count 
                FROM chat_history 
                WHERE category != 'Unknown' 
                GROUP BY category 
                ORDER BY count DESC
            ''')
            category_stats = cursor.fetchall()
            
            return {
                'total_conversations': total_chats,
                'average_confidence': round(avg_confidence * 100, 2),
                'category_distribution': category_stats
            }
        except sqlite3.Error as e:
            return {
                'total_conversations': 0,
                'average_confidence': 0,
                'category_distribution': []
            }
    
    def display_chat_interface(self):
        """Display interactive chat interface in console"""
        print("=" * 70)
        print("üéì ADVANCED UNIVERSITY FAQ CHATBOT")
        print("=" * 70)
        print("Type 'quit' to exit, 'stats' for statistics, 'help' for commands")
        print("\nCategories available: Admission, Fees, Hostel, Academics, Library, Administrative, Campus, Support")
        print("-" * 70)
        
        while True:
            try:
                user_input = input("\nüë§ You: ").strip()
                
                if user_input.lower() in ['quit', 'exit', 'bye']:
                    print("\nü§ñ Bot: Thank you for using University FAQ Chatbot. Have a great day!")
                    break
                
                elif user_input.lower() == 'stats':
                    self.show_statistics()
                    continue
                    
                elif user_input.lower() == 'help':
                    self.show_help()
                    continue
                    
                elif user_input.lower() == 'history':
                    self.show_recent_history()
                    continue
                    
                elif user_input.lower().startswith('category '):
                    category = user_input[9:].strip()
                    self.show_category_questions(category)
                    continue
                
                if user_input:
                    print("üîç Searching for best answer...")
                    response = self.get_enhanced_response(user_input)
                    
                    # Add to chat history
                    self.chat_history.append({
                        'query': user_input,
                        'response': response,
                        'timestamp': datetime.now()
                    })
                    
                    # Display response
                    print(f"\nü§ñ Bot: {response['answer']}")
                    print(f"   üìä Confidence: {response['confidence']}% | Category: {response['category']}")
                    
                    # Show similar questions if available
                    if response['similar_questions']:
                        print(f"\n   üîó Related Questions:")
                        for i, similar in enumerate(response['similar_questions'], 1):
                            print(f"      {i}. {similar['question']} (Match: {similar['score']}%)")
                else:
                    print("ü§ñ Bot: Please enter a question.")
                    
            except KeyboardInterrupt:
                print("\n\nü§ñ Bot: Session ended. Goodbye!")
                break
            except Exception as e:
                print(f"\nü§ñ Bot: Sorry, something went wrong. Error: {e}")
    
    def show_statistics(self):
        """Display chat statistics"""
        stats = self.get_chat_statistics()
        print("\n" + "=" * 50)
        print("üìà CHATBOT STATISTICS")
        print("=" * 50)
        print(f"Total Conversations: {stats['total_conversations']}")
        print(f"Average Confidence: {stats['average_confidence']}%")
        if stats['category_distribution']:
            print("\nCategory Distribution:")
            for category, count in stats['category_distribution']:
                print(f"  {category}: {count} queries")
        else:
            print("\nNo category data available yet.")
        print("=" * 50)
    
    def show_help(self):
        """Display help information"""
        print("\n" + "=" * 50)
        print("üÜò HELP - AVAILABLE COMMANDS")
        print("=" * 50)
        print("‚Ä¢ Ask any university-related question")
        print("‚Ä¢ 'stats' - Show chatbot statistics")
        print("‚Ä¢ 'history' - Show recent chat history")
        print("‚Ä¢ 'category <name>' - Show questions from specific category")
        print("‚Ä¢ 'quit' - Exit the chatbot")
        print("\nExample categories: admission, fees, hostel, academics, library")
        print("=" * 50)
    
    def show_recent_history(self):
        """Display recent chat history"""
        print("\n" + "=" * 60)
        print("üìù RECENT CHAT HISTORY")
        print("=" * 60)
        if not self.chat_history:
            print("No recent conversations.")
            return
            
        for i, chat in enumerate(self.chat_history[-5:], 1):  # Last 5 conversations
            print(f"\n{i}. Q: {chat['query']}")
            print(f"   A: {chat['response']['answer'][:100]}...")
            print(f"   üìä Confidence: {chat['response']['confidence']}% | Category: {chat['response']['category']}")
        print("=" * 60)
    
    def show_category_questions(self, category):
        """Show questions from a specific category"""
        category = category.title()
        available_categories = self.faq_data['Category'].unique()
        if category not in available_categories:
            print(f"\n‚ùå Category '{category}' not found. Available categories: {', '.join(available_categories)}")
            return
            
        category_questions = self.faq_data[self.faq_data['Category'] == category]
        print(f"\n" + "=" * 60)
        print(f"üìÇ QUESTIONS IN CATEGORY: {category.upper()}")
        print("=" * 60)
        for i, (_, row) in enumerate(category_questions.iterrows(), 1):
            print(f"{i}. {row['Question']}")
        print("=" * 60)

# Simple version without database for quick testing
class SimpleUniversityChatbot:
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))
        self.vectorizer = TfidfVectorizer(ngram_range=(1, 2))
        self.faq_data = AdvancedUniversityFAQChatbot().faq_data
        self.preprocess_and_train()
        
    def preprocess_text(self, text):
        """Basic text preprocessing"""
        text = text.lower()
        text = re.sub(r'[^\w\s]', ' ', text)
        tokens = nltk.word_tokenize(text)
        processed_tokens = [
            self.lemmatizer.lemmatize(token) 
            for token in tokens 
            if token not in self.stop_words and len(token) > 2
        ]
        return ' '.join(processed_tokens)
    
    def preprocess_and_train(self):
        """Preprocess and train the model"""
        self.processed_questions = [
            self.preprocess_text(question) 
            for question in self.faq_data['Question']
        ]
        self.tfidf_matrix = self.vectorizer.fit_transform(self.processed_questions)
    
    def get_response(self, user_query):
        """Get response for user query"""
        processed_query = self.preprocess_text(user_query)
        query_vector = self.vectorizer.transform([processed_query])
        similarities = cosine_similarity(query_vector, self.tfidf_matrix)
        best_match_idx = similarities.argmax()
        best_similarity = similarities[0, best_match_idx]
        
        if best_similarity > 0.3:
            return {
                'answer': self.faq_data['Answer'].iloc[best_match_idx],
                'confidence': round(best_similarity * 100, 2),
                'category': self.faq_data['Category'].iloc[best_match_idx]
            }
        else:
            return {
                'answer': "I'm sorry, I couldn't find a specific answer. Please contact admin@university.edu for help.",
                'confidence': round(best_similarity * 100, 2),
                'category': 'Unknown'
            }

def run_demo():
    """Run a demo of the chatbot"""
    print("üöÄ INITIALIZING UNIVERSITY FAQ CHATBOT...")
    print("‚è≥ Loading data and training model...")
    
    try:
        chatbot = AdvancedUniversityFAQChatbot()
        print("‚úÖ Advanced chatbot loaded successfully!")
    except Exception as e:
        print(f"‚ö†Ô∏è  Advanced chatbot failed: {e}")
        print("üîÑ Loading simple version...")
        chatbot = SimpleUniversityChatbot()
        print("‚úÖ Simple chatbot loaded successfully!")
    
    # Test some sample queries
    test_queries = [
        "What is the admission fee?",
        "How to apply for hostel?",
        "When do exams start?",
        "How to pay fees online?",
        "What are library timings?"
    ]
    
    print("\n" + "=" * 60)
    print("üß™ SAMPLE QUERIES TEST")
    print("=" * 60)
    
    for query in test_queries:
        response = chatbot.get_enhanced_response(query) if hasattr(chatbot, 'get_enhanced_response') else chatbot.get_response(query)
        print(f"\nQ: {query}")
        print(f"A: {response['answer']}")
        print(f"Confidence: {response['confidence']}% | Category: {response.get('category', 'N/A')}")
        print("-" * 50)
    
    # Start interactive chat
    if hasattr(chatbot, 'display_chat_interface'):
        chatbot.display_chat_interface()
    else:
        print("\n" + "=" * 50)
        print("üí¨ CHAT MODE - Type 'quit' to exit")
        print("=" * 50)
        while True:
            user_input = input("\nYou: ").strip()
            if user_input.lower() in ['quit', 'exit']:
                break
            if user_input:
                response = chatbot.get_response(user_input)
                print(f"Bot: {response['answer']}")
                print(f"Confidence: {response['confidence']}%")

if __name__ == "__main__":
    run_demo()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Gungun\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Gungun\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Gungun\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


üöÄ INITIALIZING UNIVERSITY FAQ CHATBOT...
‚è≥ Loading data and training model...
‚úÖ Advanced chatbot loaded successfully!

üß™ SAMPLE QUERIES TEST

Q: What is the admission fee?
A: Admission fee is ‚Çπ5000 for all courses payable during application.
Confidence: 70.38% | Category: Admission
--------------------------------------------------

Q: How to apply for hostel?
A: Fill the hostel application form online at hostel.university.edu or visit hostel office.
Confidence: 100.0% | Category: Hostel
--------------------------------------------------

Q: When do exams start?
A: Exams will begin in December for odd semester and May for even semester.
Confidence: 100.0% | Category: Academics
--------------------------------------------------

Q: How to pay fees online?
A: You can pay tuition fees online through the student portal or at the finance office.
Confidence: 57.49% | Category: Fees
--------------------------------------------------

Q: What are library timings?
A: Library timings


üë§ You:  help



üÜò HELP - AVAILABLE COMMANDS
‚Ä¢ Ask any university-related question
‚Ä¢ 'stats' - Show chatbot statistics
‚Ä¢ 'history' - Show recent chat history
‚Ä¢ 'category <name>' - Show questions from specific category
‚Ä¢ 'quit' - Exit the chatbot

Example categories: admission, fees, hostel, academics, library



üë§ You:  What is college fee?


üîç Searching for best answer...

ü§ñ Bot: Hostel fee is ‚Çπ25,000 per semester including mess charges and basic amenities.
   üìä Confidence: 48.86% | Category: Hostel

   üîó Related Questions:
      1. What is the fee structure? (Match: 44.04%)
      2. How much is the admission fee? (Match: 34.85%)
