In [1]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install faiss-gpu
!pip  install langchain-community
! pip install torch transformers sentence-transformers langchain faiss-cpu datasets pandas streamlit textblob

Looking in indexes: https://download.pytorch.org/whl/cu118
INFO: pip is looking at multiple versions of torch to determine which version is compatible with other requirements. This could take a while.
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.1%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (23.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nvidia-cuda-runtime-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (875 kB)
[2K     [9

In [3]:
!pip install huggingface_hub



In [4]:
!python -c "import nltk; nltk.download('punkt'); nltk.download('vader_lexicon')"

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
import streamlit as st

with open("customer_support_rag.py", "w") as f:
    f.write(""" 
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import sqlite3
import json


# Page config
st.set_page_config(
    page_title="AI Customer Support",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown(\"""
    <style>
    .main-title {
        font-size: 2.5rem;
        font-weight: bold;
        text-align: center;
        color: #2E86C1;
    }
    .sub-title {
        font-size: 1.5rem;
        text-align: center;
        color: #555555;
    }
    .footer {
        font-size: 0.9rem;
        text-align: center;
        margin-top: 30px;
        color: gray;
    }
    .sentiment-positive { color: #28a745; }
    .sentiment-negative { color: #dc3545; }
    .sentiment-neutral { color: #6c757d; }
    .sentiment-frustrated { color: #fd7e14; }
    .escalation-alert {
        background-color: #f8d7da;
        border: 1px solid #f5c6cb;
        color: #721c24;
        padding: 1rem;
        border-radius: 5px;
        margin: 1rem 0;
    }
</style>
\""", unsafe_allow_html=True)
     """)

# Initialize session state
def initialize_session_state():
    if 'support_bot' not in st.session_state:
        st.session_state.support_bot = None
    if 'chat_history' not in st.session_state:
        st.session_state.chat_history = []
    if 'current_user' not in st.session_state:
        st.session_state.current_user = "demo_user"
    if 'system_initialized' not in st.session_state:
        st.session_state.system_initialized = False
    if 'performance_metrics' not in st.session_state:
        st.session_state.performance_metrics = {}

@st.cache_resource
def load_support_system(use_lightweight: bool = True):
    """Load and initialize the support system"""
    try:
        # Load data
        data_loader = CustomerSupportDataLoader()
        documents = data_loader.load_all_available_datasets()
        
        if not documents:
            documents = data_loader.create_synthetic_support_data(100)
        
        # Initialize system
        if use_lightweight:
            support_bot = LightweightCustomerSupportRAG()
        else:
            support_bot = CustomerSupportRAG()
        
        # Load knowledge base
        support_bot.load_knowledge_base(documents)
        
        # Setup demo data
        setup_realistic_demo_data(support_bot)
        
        return support_bot, len(documents)
        
    except Exception as e:
        st.error(f"Error initializing system: {e}")
        return None, 0

def main():
    initialize_session_state()
    
    st.markdown('<h1 class="main-header">🤖 AI Customer Support System</h1>', unsafe_allow_html=True)
    
    # Sidebar
    with st.sidebar:
        st.header("⚙️ System Configuration")
        
        # Model selection
        use_lightweight = st.checkbox("Use Lightweight Models", value=True, 
                                    help="Recommended for faster response times")
        
        # Initialize system
        if st.button("🚀 Initialize System", type="primary"):
            with st.spinner("Loading AI models and data..."):
                support_bot, doc_count = load_support_system(use_lightweight)
                if support_bot:
                    st.session_state.support_bot = support_bot
                    st.session_state.system_initialized = True
                    st.success(f"✅ System initialized with {doc_count} documents!")
                else:
                    st.error("❌ Failed to initialize system")
        
        st.divider()
        
        # User selection
        st.header("👤 User Profile")
        user_profiles = {
            "demo_user": "Demo User",
            "premium_user": "Premium Customer",
            "new_user": "New Customer", 
            "frequent_user": "Frequent User",
            "enterprise_user": "Enterprise Customer"
        }
        
        selected_user = st.selectbox(
            "Select User Profile",
            options=list(user_profiles.keys()),
            format_func=lambda x: user_profiles[x],
            index=0
        )
        st.session_state.current_user = selected_user
        
        # Display user context if system is initialized
        if st.session_state.system_initialized and st.session_state.support_bot:
            user_context = st.session_state.support_bot.get_user_context(selected_user)
            
            if user_context["recent_purchases"]:
                st.subheader("🛍️ Recent Purchases")
                for purchase in user_context["recent_purchases"][:3]:
                    st.write(f"• {purchase[0]} ({purchase[1]})")
        
        st.divider()
        
        # System status
        st.header("📊 System Status")
        if st.session_state.system_initialized:
            st.success("✅ AI System Online")
            if st.session_state.support_bot:
                # Show some basic metrics
                db_path = st.session_state.support_bot.db_path
                if db_path:
                    conn = sqlite3.connect(db_path)
                    total_conversations = pd.read_sql_query(
                        "SELECT COUNT(*) as count FROM conversations", conn
                    ).iloc[0]['count']
                    conn.close()
                    st.metric("Total Conversations", total_conversations)
        else:
            st.warning("⚠️ System Not Initialized")

    # Main content area
    col1, col2 = st.columns([2, 1])
    
    with col1:
        st.header("💬 Customer Support Chat")
        
        if not st.session_state.system_initialized:
            st.info("👈 Please initialize the system using the sidebar first")
            return
        
        # Chat interface
        chat_container = st.container()
        
        with chat_container:
            # Display chat history
            for i, (user_msg, bot_response, metadata) in enumerate(st.session_state.chat_history):
                st.markdown(f'<div class="user-message">👤 {user_msg}</div>', unsafe_allow_html=True)
                
                sentiment_class = f"sentiment-{metadata.get('sentiment', 'neutral')}"
                st.markdown(f'<div class="bot-message">🤖 {bot_response}</div>', unsafe_allow_html=True)
                
                # Show sentiment and escalation info
                col_a, col_b, col_c = st.columns(3)
                with col_a:
                    sentiment = metadata.get('sentiment', 'neutral')
                    st.markdown(f'<span class="{sentiment_class}">Sentiment: {sentiment.title()}</span>', unsafe_allow_html=True)
                with col_b:
                    score = metadata.get('sentiment_score', 0)
                    st.text(f"Score: {score:.3f}")
                with col_c:
                    if metadata.get('escalate', False):
                        st.markdown('🚨 <span style="color: red;">Escalated</span>', unsafe_allow_html=True)
                
                st.divider()
        
        # Input area
        with st.form("chat_form", clear_on_submit=True):
            user_input = st.text_area("Enter your message:", placeholder="How can I help you today?", height=100)
            col_submit, col_clear = st.columns([1, 1])
            
            with col_submit:
                submit_button = st.form_submit_button("Send Message", type="primary", use_container_width=True)
            
            with col_clear:
                clear_button = st.form_submit_button("Clear Chat", use_container_width=True)
        
        if submit_button and user_input and st.session_state.support_bot:
            with st.spinner("AI is thinking..."):
                try:
                    # Get response from the bot
                    response = st.session_state.support_bot.chat(st.session_state.current_user, user_input)
                    
                    # Add to chat history
                    metadata = {
                        'sentiment': response['sentiment'],
                        'sentiment_score': response['sentiment_score'],
                        'escalate': response['escalate'],
                        'timestamp': datetime.now()
                    }
                    
                    st.session_state.chat_history.append((user_input, response['answer'], metadata))
                    
                    # Show escalation alert if needed
                    if response['escalate']:
                        st.markdown("""
                        <div class="escalation-alert">
                            🚨 <strong>Escalation Alert:</strong> This conversation has been flagged for human agent review.
                        </div>
                        """, unsafe_allow_html=True)
                    
                    st.rerun()
                    
                except Exception as e:
                    st.error(f"Error getting response: {e}")
        
        if clear_button:
            st.session_state.chat_history = []
            st.rerun()

    with col2:
        st.header("📈 Analytics Dashboard")
        
        if st.session_state.chat_history:
            # Sentiment analysis over time
            sentiments = [metadata['sentiment'] for _, _, metadata in st.session_state.chat_history]
            sentiment_counts = pd.Series(sentiments).value_counts()
            
            # Sentiment pie chart
            fig_pie = px.pie(
                values=sentiment_counts.values,
                names=sentiment_counts.index,
                title="Conversation Sentiment Distribution",
                color_discrete_map={
                    'positive': '#28a745',
                    'negative': '#dc3545', 
                    'neutral': '#6c757d',
                    'frustrated': '#fd7e14'
                }
            )
            st.plotly_chart(fig_pie, use_container_width=True)
            
            # Sentiment timeline
            if len(st.session_state.chat_history) > 1:
                timeline_data = []
                for i, (_, _, metadata) in enumerate(st.session_state.chat_history):
                    timeline_data.append({
                        'Message': i + 1,
                        'Sentiment Score': metadata['sentiment_score'],
                        'Sentiment': metadata['sentiment']
                    })
                
                df_timeline = pd.DataFrame(timeline_data)
                fig_line = px.line(
                    df_timeline, 
                    x='Message', 
                    y='Sentiment Score',
                    color='Sentiment',
                    title="Sentiment Trend Over Conversation",
                    markers=True
                )
                st.plotly_chart(fig_line, use_container_width=True)
            
            # Escalation metrics
            escalations = sum(1 for _, _, metadata in st.session_state.chat_history if metadata.get('escalate', False))
            total_messages = len(st.session_state.chat_history)
            escalation_rate = (escalations / total_messages) * 100 if total_messages > 0 else 0
            
            st.metric(
                "Escalation Rate",
                f"{escalation_rate:.1f}%",
                delta=f"{escalations} of {total_messages} messages"
            )
            
        else:
            st.info("Start a conversation to see analytics!")
        
        st.divider()
        
        # System performance metrics
        if st.session_state.system_initialized and st.session_state.support_bot:
            st.subheader("⚙️ System Performance")
            
            # Database metrics
            try:
                conn = sqlite3.connect(st.session_state.support_bot.db_path)
                
                # Total conversations
                total_convs = pd.read_sql_query(
                    "SELECT COUNT(*) as count FROM conversations", conn
                ).iloc[0]['count']
                
                # Average sentiment
                avg_sentiment = pd.read_sql_query("""
                    SELECT AVG(
                        CASE 
                            WHEN sentiment_score > 0.3 THEN 1
                            WHEN sentiment_score < -0.3 THEN -1
                            ELSE 0
                        END
                    ) as avg_sentiment
                    FROM support_tickets 
                    WHERE created_at >= datetime('now', '-7 days')
                """, conn).iloc[0]['avg_sentiment']
                
                conn.close()
                
                col_m1, col_m2 = st.columns(2)
                with col_m1:
                    st.metric("Total Conversations", total_convs)
                with col_m2:
                    if avg_sentiment is not None:
                        sentiment_label = "Positive" if avg_sentiment > 0 else "Negative" if avg_sentiment < 0 else "Neutral"
                        st.metric("7-Day Avg Sentiment", sentiment_label)
                
            except Exception as e:
                st.error(f"Error loading metrics: {e}")

# Additional features
def show_admin_dashboard():
    """Admin dashboard for system management"""
    st.header("🛠️ Admin Dashboard")
    
    if not st.session_state.system_initialized:
        st.warning("System must be initialized first")
        return
    
    tab1, tab2, tab3 = st.tabs(["📊 Analytics", "👥 Users", "⚙️ Settings"])
    
    with tab1:
        st.subheader("System Analytics")
        
        # Load conversation data
        try:
            conn = sqlite3.connect(st.session_state.support_bot.db_path)
            
            # Conversations over time
            conv_data = pd.read_sql_query("""
                SELECT DATE(timestamp) as date, COUNT(*) as conversations
                FROM conversations
                GROUP BY DATE(timestamp)
                ORDER BY date
            """, conn)
            
            if not conv_data.empty:
                fig = px.bar(conv_data, x='date', y='conversations', 
                           title="Daily Conversation Volume")
                st.plotly_chart(fig, use_container_width=True)
            
            # User engagement
            user_data = pd.read_sql_query("""
                SELECT user_id, COUNT(*) as message_count
                FROM conversations
                GROUP BY user_id
                ORDER BY message_count DESC
            """, conn)
            
            if not user_data.empty:
                fig2 = px.bar(user_data.head(10), x='user_id', y='message_count',
                            title="Top 10 Most Active Users")
                st.plotly_chart(fig2, use_container_width=True)
            
            conn.close()
            
        except Exception as e:
            st.error(f"Error loading analytics: {e}")
    
    with tab2:
        st.subheader("User Management")
        
        # User creation form
        with st.form("add_user_form"):
            st.write("Add New User")
            new_user_id = st.text_input("User ID")
            new_user_email = st.text_input("Email")
            new_user_name = st.text_input("Name")
            
            if st.form_submit_button("Add User"):
                if new_user_id and new_user_email and new_user_name:
                    st.session_state.support_bot.add_user(new_user_id, new_user_email, new_user_name)
                    st.success(f"User {new_user_name} added successfully!")
                else:
                    st.error("Please fill in all fields")
        
        # Show existing users
        try:
            conn = sqlite3.connect(st.session_state.support_bot.db_path)
            users_df = pd.read_sql_query("SELECT * FROM users", conn)
            conn.close()
            
            if not users_df.empty:
                st.dataframe(users_df, use_container_width=True)
            else:
                st.info("No users in system")
                
        except Exception as e:
            st.error(f"Error loading users: {e}")
    
    with tab3:
        st.subheader("System Settings")
        
        # Model configuration
        st.write("Model Configuration")
        current_model = "Lightweight" if isinstance(st.session_state.support_bot, LightweightCustomerSupportRAG) else "Full"
        st.info(f"Current Model: {current_model}")
        
        # Database management
        st.write("Database Management")
        if st.button("🗑️ Clear Chat History", type="secondary"):
            if st.checkbox("I understand this will delete all chat history"):
                try:
                    conn = sqlite3.connect(st.session_state.support_bot.db_path)
                    conn.execute("DELETE FROM conversations")
                    conn.commit()
                    conn.close()
                    st.session_state.chat_history = []
                    st.success("Chat history cleared!")
                except Exception as e:
                    st.error(f"Error clearing history: {e}")

# Main app navigation
def app():
    """Main app function with navigation"""
    
    # Navigation menu
    page = st.sidebar.selectbox(
        "Navigation",
        ["💬 Customer Chat", "🛠️ Admin Dashboard", "📚 Documentation"]
    )
    
    if page == "💬 Customer Chat":
        main()
    elif page == "🛠️ Admin Dashboard":
        show_admin_dashboard()
    elif page == "📚 Documentation":
        show_documentation()

def show_documentation():
    """Show system documentation"""
    st.header("📚 System Documentation")
    
    tab1, tab2, tab3 = st.tabs(["🚀 Quick Start", "🔧 Features", "❓ FAQ"])
    
    with tab1:
        st.markdown("""
        ## Quick Start Guide
        
        ### 1. Initialize the System
        - Click "Initialize System" in the sidebar
        - The system will automatically load AI models and customer support data
        - Wait for the "System initialized" message
        
        ### 2. Select User Profile
        - Choose from different customer types in the sidebar
        - Each profile has different purchase history and context
        
        ### 3. Start Chatting
        - Type your customer support question in the message box
        - The AI will provide contextual responses based on user history
        - Watch for sentiment analysis and escalation alerts
        
        ### 4. Monitor Analytics
        - View real-time sentiment analysis in the right panel
        - Track conversation trends and escalation rates
        - Use admin dashboard for deeper insights
        """)
    
    with tab2:
        st.markdown("""
        ## Key Features
        
        ### 🧠 Contextual AI Responses
        - Uses user purchase history and past interactions
        - Powered by Hugging Face transformers
        - Real-time sentiment analysis
        
        ### 📊 Sentiment Detection
        - Automatically detects frustrated customers
        - Triggers escalation for complex issues
        - Tracks sentiment trends over time
        
        ### 🚨 Smart Escalation
        - Automatically escalates based on:
          - Negative sentiment
          - Multiple unresolved tickets
          - Complex technical issues
          - High-value customer issues
        
        ### 📈 Analytics Dashboard
        - Real-time conversation analytics
        - Sentiment distribution tracking
        - User engagement metrics
        - Performance monitoring
        
        ### ⚡ Performance Options
        - Lightweight models for fast response
        - Full models for better accuracy
        - GPU acceleration support
        """)
    
    with tab3:
        st.markdown("""
        ## Frequently Asked Questions
        
        ### Q: Why use Hugging Face instead of OpenAI?
        **A:** Hugging Face models run locally, providing:
        - No API costs
        - Better privacy (data stays local)
        - No rate limits
        - Offline capability
        
        ### Q: How accurate is the sentiment analysis?
        **A:** The system uses state-of-the-art transformer models with ~85-90% accuracy on customer support text.
        
        ### Q: Can I add my own data?
        **A:** Yes! The system can load:
        - Custom CSV files
        - Kaggle datasets
        - Hugging Face Hub datasets
        - Your own support ticket exports
        
        ### Q: What if the system is slow?
        **A:** Try these solutions:
        - Use "Lightweight Models" option
        - Reduce chunk size in settings
        - Use GPU acceleration if available
        
        ### Q: How does user context work?
        **A:** The system stores:
        - Purchase history
        - Previous support tickets
        - Conversation sentiment
        - Resolution success rates
        
        This context helps provide personalized responses.
        """)

if __name__ == "__main__":
    app()



2025-08-19 14:39:41.260 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [7]:
with open("data_loader_script", "w") as f:
    f.write(""" 
import os
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Tuple
import json
import random

# Additional missing classes

class CustomerSupportDataLoader:
    \"\"\"
    Data loader for customer support datasets
    \"\"\"
    
    def __init__(self):
        self.available_datasets = []
    
    def load_from_csv(self, file_path: str) -> List[str]:
        \"\"\"Load customer support data from CSV file\"\"\"
        try:
            df = pd.read_csv(file_path)
            documents = []
            
            # Try different column name combinations
            question_cols = ['question', 'query', 'customer_message', 'issue']
            answer_cols = ['answer', 'response', 'solution', 'resolution']
            
            q_col = None
            a_col = None
            
            for col in question_cols:
                if col in df.columns:
                    q_col = col
                    break
            
            for col in answer_cols:
                if col in df.columns:
                    a_col = col
                    break
            
            if q_col and a_col:
                for _, row in df.iterrows():
                    if pd.notna(row[q_col]) and pd.notna(row[a_col]):
                        doc = f"Question: {row[q_col]}\\nAnswer: {row[a_col]}"
                        documents.append(doc)
            
            return documents
            
        except Exception as e:
            print(f"Error loading CSV: {e}")
            return []
    
    def create_synthetic_support_data(self, num_samples: int = 50) -> List[str]:
        \"\"\"Create synthetic customer support data for testing\"\"\"
        
        categories = {
            "Billing": [
                ("I was charged twice for my subscription", "I apologize for the billing error. I can see the duplicate charge on your account. I've initiated a refund for the extra charge, which will appear in your account within 3-5 business days."),
                ("How do I cancel my subscription?", "You can cancel your subscription by going to Account Settings > Billing > Cancel Subscription. You'll retain access until the end of your current billing period."),
                ("Can I get a refund for last month?", "Our refund policy allows refunds within 30 days of purchase. Since you're within that window, I can process a full refund for you. Would you like me to proceed?")
            ],
            "Technical Support": [
                ("The app keeps crashing", "I'm sorry you're experiencing crashes. Let's troubleshoot this: 1) Force close the app, 2) Restart your device, 3) Update to the latest app version. If issues persist, please send us your device logs."),
                ("I can't log into my account", "For login issues, try: 1) Clear your browser cache and cookies, 2) Reset your password, 3) Try incognito mode. If you're still having trouble, I can verify your account details."),
                ("My data isn't syncing", "Sync issues are usually network-related. Please check your internet connection and try manually syncing by pulling down on the main screen. Your data should update within a few seconds.")
            ],
            "Product Information": [
                ("What features are included in the premium plan?", "Premium includes: unlimited storage, priority support, advanced analytics, custom integrations, and team collaboration tools. You also get early access to new features."),
                ("Is there a free trial available?", "Yes! We offer a 14-day free trial with full access to premium features. No credit card required. You can start your trial from our website or mobile app."),
                ("What's the difference between plans?", "Basic includes core features for personal use. Premium adds advanced tools and integrations. Enterprise includes everything plus custom solutions and dedicated support.")
            ]
        }
        
        documents = []
        
        for category, qa_pairs in categories.items():
            for question, answer in qa_pairs:
                doc = f"Category: {category}\\n\\nCustomer: {question}\\nSupport: {answer}"
                documents.append(doc)
        
        # Add some general policies
        policy_docs = [
            \"\"\"
            Return Policy:
            - Items can be returned within 30 days of purchase
            - Original receipt and packaging required
            - Digital products are non-refundable after download
            - Refunds processed within 5-7 business days
            - Shipping costs are customer\\'s responsibility unless item is defective

            \"\"\",
            
            \"\"\"
            Privacy Policy Summary:
            - We collect minimal personal information
            - Data is never sold to third parties
            - You can request data deletion at any time
            - All data is encrypted in transit and at rest
            - We comply with GDPR and CCPA regulations
            \"\"\",
            
            \"\"\"
            Account Management:
            - Account verification required for security
            - Password must be at least 8 characters
            - Two-factor authentication is strongly recommended
            - Suspicious activity triggers automatic account protection
            - Account recovery typically takes 24-48 hours
            \"\"\"
        ]
        
        documents.extend(policy_docs)
        
        return documents
    
    def load_all_available_datasets(self) -> List[str]:
        \"\"\"Try to load data from various sources\"\"\"
        all_documents = []
        
        # Try to load from common file locations
        common_paths = [
            'customer_support_data.csv',
            'support_tickets.csv',
            'faq_data.csv'
        ]
        
        for path in common_paths:
            if os.path.exists(path):
                docs = self.load_from_csv(path)
                all_documents.extend(docs)
        
        # If no external data found, use synthetic data
        if not all_documents:
            print("No external datasets found, using synthetic data...")
            all_documents = self.create_synthetic_support_data(100)
        
        return all_documents

def setup_realistic_demo_data(support_bot):
    \"\"\"Setup realistic demo data for different user profiles\"\"\"
    
    # Demo users with different profiles
    users = [
        ("demo_user", "demo@example.com", "Demo User"),
        ("premium_user", "premium@example.com", "Premium Customer"),
        ("new_user", "newbie@example.com", "New Customer"),
        ("frequent_user", "frequent@example.com", "Frequent User"),
        ("enterprise_user", "enterprise@example.com", "Enterprise Customer")
    ]
    
    for user_id, email, name in users:
        support_bot.add_user(user_id, email, name)
    
    # Add realistic purchase history
    purchases = [
        # Demo user purchases
        ("demo_user", "ord_001", "Premium Subscription", "Subscription", 29.99, "active"),
        ("demo_user", "ord_002", "Mobile App Pro", "Software", 9.99, "completed"),
        
        # Premium user purchases  
        ("premium_user", "ord_003", "Enterprise License", "Software", 299.99, "active"),
        ("premium_user", "ord_004", "Priority Support", "Service", 49.99, "active"),
        ("premium_user", "ord_005", "Custom Integration", "Service", 199.99, "completed"),
        
        # New user purchases
        ("new_user", "ord_006", "Basic Plan", "Subscription", 9.99, "trial"),
        
        # Frequent user purchases
        ("frequent_user", "ord_007", "Pro Plan", "Subscription", 19.99, "active"),
        ("frequent_user", "ord_008", "Add-on Package", "Software", 14.99, "completed"),
        ("frequent_user", "ord_009", "Storage Upgrade", "Service", 5.99, "active"),
        
        # Enterprise user purchases
        ("enterprise_user", "ord_010", "Enterprise Suite", "Software", 999.99, "active"),
        ("enterprise_user", "ord_011", "Dedicated Support", "Service", 299.99, "active"),
    ]
    
    for user_id, purchase_id, product_name, category, amount, status in purchases:
        support_bot.add_purchase(user_id, purchase_id, product_name, category, amount, status)
    
    # Add some historical support tickets to create context
    sample_tickets = [
        ("premium_user", "I need help setting up the enterprise features", -0.2, False),
        ("frequent_user", "The app is running slowly lately", -0.1, True),
        ("demo_user", "How do I upgrade my subscription?", 0.3, True),
    ]
    
    conn = sqlite3.connect(support_bot.db_path)
    cursor = conn.cursor()
    
    for user_id, query, sentiment, resolved in sample_tickets:
        ticket_id = f"tick_{user_id}_{len(sample_tickets)}"
        cursor.execute(\"\"\"
            INSERT INTO support_tickets 
            (ticket_id, user_id, query, sentiment_score, resolved, created_at)
            VALUES (?, ?, ?, ?, ?, ?)
        \"\"\", (ticket_id, user_id, query, sentiment, resolved, datetime.now()))
    
    conn.commit()
    conn.close()

# Test and demo functions

def run_basic_demo():
    \"\"\"Run a basic demo of the customer support system\"\"\"
    
    print("🚀 Starting Customer Support RAG Demo")
    print("=" * 50)
    
    try:
        # Initialize system
        print("Loading models and data...")
        data_loader = CustomerSupportDataLoader()
        documents = data_loader.create_synthetic_support_data(50)
        
        # Use lightweight model for demo
        support_bot = LightweightCustomerSupportRAG()
        support_bot.load_knowledge_base(documents)
        
        # Setup demo data
        setup_realistic_demo_data(support_bot)
        
        print(f"✅ System ready with {len(documents)} knowledge base documents!")
        
        # Interactive demo
        demo_queries = [
            ("demo_user", "I want to upgrade to premium"),
            ("premium_user", "I'm having trouble with billing"),
            ("new_user", "How does the free trial work?"),
            ("frequent_user", "This is frustrating! My app keeps crashing!"),
        ]
        
        print("\\n🎯 Demo Conversations:")
        print("-" * 30)
        
        for user_id, query in demo_queries:
            print(f"\\n👤 {user_id}: {query}")
            
            response = support_bot.chat(user_id, query)
            
            print(f"🤖 Bot: {response['answer']}")
            print(f"📊 Sentiment: {response['sentiment']} ({response['sentiment_score']:.3f})")
            
            if response['escalate']:
                print("🚨 ESCALATED TO HUMAN AGENT")
            
            print("-" * 30)
        
        print("\\n✅ Demo completed successfully!")
        
    except Exception as e:
        print(f"❌ Demo failed: {e}")
        print("Make sure all dependencies are installed")

def test_system_components():
    \"\"\"Test individual system components\"\"\"
    
    print("🧪 Testing System Components")
    print("=" * 40)
    
    # Test 1: Data loading
    print("Test 1: Data Loading")
    loader = CustomerSupportDataLoader()
    docs = loader.create_synthetic_support_data(10)
    print(f"✅ Generated {len(docs)} synthetic documents")
    
    # Test 2: Database operations
    print("\\nTest 2: Database Operations")
    try:
        support_bot = LightweightCustomerSupportRAG(db_path="test_support.db")
        support_bot.add_user("test_user", "test@example.com", "Test User")
        support_bot.add_purchase("test_user", "test_order", "Test Product", "Test", 99.99)
        
        user_context = support_bot.get_user_context("test_user")
        print(f"✅ User context loaded: {len(user_context['recent_purchases'])} purchases")
        
    except Exception as e:
        print(f"❌ Database test failed: {e}")
    
    # Test 3: Sentiment analysis
    print("\\nTest 3: Sentiment Analysis")
    try:
        test_messages = [
            "I love this service!",
            "This is terrible, I want a refund!",
            "Can you help me with my account?",
            "I'm so frustrated with this app!"
        ]
        
        if 'support_bot' in locals():
            for msg in test_messages:
                sentiment_score, sentiment_label = support_bot.analyze_sentiment(msg)
                print(f"  '{msg}' -> {sentiment_label} ({sentiment_score:.3f})")
        
        print("✅ Sentiment analysis working")
        
    except Exception as e:
        print(f"❌ Sentiment test failed: {e}")
    
    print("\\n🎉 Component testing completed!")

# Utility functions for notebook usage

def quick_start():
    \"\"\"Quick start function for easy notebook usage\"\"\"
    print("🚀 Quick Start: Customer Support RAG Chatbot")
    print("=" * 50)
    print("Run the following commands to get started:")
    print()
    print("1. Basic Demo:")
    print("   run_basic_demo()")
    print()
    print("2. Test Components:")
    print("   test_system_components()")
    print()
    print("3. Interactive Chat:")
    print("   # Initialize system first")
    print("   loader = CustomerSupportDataLoader()")
    print("   docs = loader.create_synthetic_support_data()")
    print("   bot = LightweightCustomerSupportRAG()")
    print("   bot.load_knowledge_base(docs)")
    print("   setup_realistic_demo_data(bot)")
    print()
    print("   # Then chat")
    print("   response = bot.chat('demo_user', 'I need help with billing')")
    print("   print(response['answer'])")

def create_sample_dataset(filename: str = "sample_support_data.csv"):
    \"\"\"Create a sample CSV dataset for testing\"\"\"
    
    data = []
    
    # Generate sample Q&A pairs
    qa_pairs = [
        ("How do I reset my password?", "Go to the login page and click 'Forgot Password'. Enter your email and follow the instructions sent to you."),
        ("What is your refund policy?", "We offer full refunds within 30 days of purchase. Contact support with your order number to process a refund."),
        ("How do I cancel my subscription?", "You can cancel anytime in your account settings under 'Billing'. Your access continues until the end of the current period."),
        ("The app is not working", "Please try restarting the app and your device. If issues persist, check for app updates or contact technical support."),
        ("I was charged twice", "I apologize for the billing error. Please provide your transaction details and I'll process a refund for the duplicate charge."),
        ("How do I upgrade my plan?", "You can upgrade your plan in Account Settings > Subscription. Changes take effect immediately with prorated billing."),
        ("Is my data secure?", "Yes, we use bank-level encryption and never share your data with third parties. Your privacy is our top priority."),
        ("Do you offer customer support?", "Yes! We provide 24/7 email support and live chat during business hours. Premium users get priority support."),
        ("What features are in the premium plan?", "Premium includes unlimited storage, advanced analytics, priority support, and access to beta features."),
        ("How do I contact support?", "You can reach us via email at support@example.com, live chat on our website, or through the in-app help section.")
    ]
    
    categories = ["Account", "Billing", "Technical", "General", "Features"]
    
    for i, (question, answer) in enumerate(qa_pairs):
        data.append({
            'id': i + 1,
            'question': question,
            'answer': answer,
            'category': random.choice(categories),
            'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
        })
    
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"✅ Sample dataset created: {filename}")
    print(f"   Contains {len(data)} Q&A pairs")
    
    return filename

# Main execution
if __name__ == "__main__":
    # Show quick start guide
    quick_start()
    
    print("\\n" + "=" * 50)
    print("Ready to run! Try: run_basic_demo()")
 """)

In [8]:
!pip uninstall -y torch torchvision torchaudio


Found existing installation: torch 2.6.0+cu118
Uninstalling torch-2.6.0+cu118:
  Successfully uninstalled torch-2.6.0+cu118
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124


In [9]:
!pip install torch==2.3.1+cu118 torchvision==0.18.1+cu118 torchaudio==2.3.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html


Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==2.3.1+cu118
  Downloading https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp311-cp311-linux_x86_64.whl (839.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m839.7/839.7 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchvision==0.18.1+cu118
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.18.1%2Bcu118-cp311-cp311-linux_x86_64.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m102.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting torchaudio==2.3.1+cu118
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.3.1%2Bcu118-cp311-cp311-linux_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m91.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting nvidia-cudnn-cu11==8.7.0.84 (from torch==2.3.1+cu118)
  Downlo

In [10]:
!pip install --upgrade sentence-transformers


Collecting sentence-transformers
  Downloading sentence_transformers-5.1.0-py3-none-any.whl.metadata (16 kB)
Downloading sentence_transformers-5.1.0-py3-none-any.whl (483 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m483.4/483.4 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
  Attempting uninstall: sentence-transformers
    Found existing installation: sentence-transformers 4.1.0
    Uninstalling sentence-transformers-4.1.0:
      Successfully uninstalled sentence-transformers-4.1.0
Successfully installed sentence-transformers-5.1.0


In [11]:
import os
import sqlite3
import json
import random
import re
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
import math
from collections import Counter

class SimpleTextSimilarity:
    """Simple text similarity using TF-IDF and cosine similarity"""
    
    def __init__(self):
        self.documents = []
        self.vocab = set()
        self.doc_vectors = []
        
    def preprocess_text(self, text):
        """Simple text preprocessing"""
        # Convert to lowercase and remove special characters
        text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text.lower())
        # Split into words and remove empty strings
        words = [word for word in text.split() if word and len(word) > 2]
        return words
    
    def compute_tf(self, words):
        """Compute term frequency"""
        word_count = len(words)
        tf = Counter(words)
        # Normalize by document length
        for word in tf:
            tf[word] = tf[word] / word_count
        return tf
    
    def compute_idf(self, documents_words):
        """Compute inverse document frequency"""
        N = len(documents_words)
        idf = {}
        all_words = set(word for doc in documents_words for word in doc)
        
        for word in all_words:
            containing_docs = sum(1 for doc in documents_words if word in doc)
            idf[word] = math.log(N / containing_docs) if containing_docs > 0 else 0
        
        return idf
    
    def compute_tfidf(self, tf, idf):
        """Compute TF-IDF vector"""
        tfidf = {}
        for word, tf_val in tf.items():
            tfidf[word] = tf_val * idf.get(word, 0)
        return tfidf
    
    def cosine_similarity(self, vec1, vec2):
        """Compute cosine similarity between two vectors"""
        # Get all unique words
        all_words = set(vec1.keys()) | set(vec2.keys())
        
        # Convert to vectors
        v1 = [vec1.get(word, 0) for word in all_words]
        v2 = [vec2.get(word, 0) for word in all_words]
        
        # Compute dot product
        dot_product = sum(a * b for a, b in zip(v1, v2))
        
        # Compute magnitudes
        mag1 = math.sqrt(sum(a * a for a in v1))
        mag2 = math.sqrt(sum(a * a for a in v2))
        
        # Avoid division by zero
        if mag1 == 0 or mag2 == 0:
            return 0
        
        return dot_product / (mag1 * mag2)
    
    def fit(self, documents):
        """Fit the similarity model on documents"""
        self.documents = documents
        
        # Preprocess all documents
        documents_words = [self.preprocess_text(doc) for doc in documents]
        
        # Compute IDF
        idf = self.compute_idf(documents_words)
        
        # Compute TF-IDF for each document
        self.doc_vectors = []
        for words in documents_words:
            tf = self.compute_tf(words)
            tfidf = self.compute_tfidf(tf, idf)
            self.doc_vectors.append(tfidf)
        
        print(f"Fitted similarity model on {len(documents)} documents")
    
    def search(self, query, top_k=3, min_similarity=0.1):
        """Search for similar documents"""
        if not self.doc_vectors:
            return []
        
        # Preprocess query
        query_words = self.preprocess_text(query)
        if not query_words:
            return []
        
        # Compute query TF
        query_tf = self.compute_tf(query_words)
        
        # Create query vector (using same vocab as documents)
        query_vector = {}
        for word in query_tf:
            query_vector[word] = query_tf[word]
        
        # Compute similarities
        similarities = []
        for i, doc_vector in enumerate(self.doc_vectors):
            sim = self.cosine_similarity(query_vector, doc_vector)
            if sim >= min_similarity:
                similarities.append((i, sim))
        
        # Sort by similarity and return top results
        similarities.sort(key=lambda x: x[1], reverse=True)
        
        results = []
        for i, (doc_idx, sim) in enumerate(similarities[:top_k]):
            results.append({
                'document': self.documents[doc_idx],
                'score': sim,
                'index': doc_idx
            })
        
        return results

class CustomerSupportDataLoader:
    """Generate synthetic customer support data"""
    
    def create_synthetic_support_data(self, num_docs=30):
        """Create synthetic customer support documents"""
        base_faqs = [
            "How do I reset my password? Go to the login page and click 'Forgot Password', then follow the email instructions.",
            "What are your business hours? We are open Monday-Friday 9AM-6PM EST, and Saturday 10AM-4PM EST.",
            "How do I cancel my subscription? Go to Account Settings > Billing > Manage Subscription > Cancel.",
            "How do I contact support? Email us at support@company.com, call 1-800-SUPPORT, or use live chat.",
            "What payment methods do you accept? We accept all major credit cards, PayPal, and bank transfers.",
            "How do I update my billing information? Go to Account Settings > Billing > Update Payment Method.",
            "Is there a mobile app? Yes, download our app from the App Store or Google Play Store.",
            "How do I change my email address? Go to Account Settings > Profile > Edit Email Address.",
            "What is your refund policy? We offer full refunds within 30 days of purchase for most products.",
            "How do I delete my account? Contact support to request account deletion - we'll process it within 48 hours.",
            "How do I upgrade my plan? Go to Account Settings > Billing > Change Plan to see upgrade options.",
            "What happens if I exceed my usage limits? You'll receive notifications and can upgrade or purchase additional credits.",
            "How do I download my data? Go to Account Settings > Privacy > Export Data to download your information.",
            "Can I share my account with team members? Yes, our Team and Enterprise plans support multiple users.",
            "How do I set up two-factor authentication? Go to Account Settings > Security > Enable 2FA.",
            "What browsers do you support? We support Chrome, Firefox, Safari, and Edge (latest versions).",
            "How do I integrate with third-party tools? Check our API documentation or contact our integration team.",
            "What are your data security practices? We use enterprise-grade encryption and SOC 2 compliance.",
            "How do I report a bug? Use the 'Report Issue' button in the app or email bugs@company.com.",
            "Can I customize my dashboard? Yes, drag and drop widgets to personalize your dashboard layout.",
            "How do I export my data? Go to Settings > Data Export > Choose format and download your data.",
            "What happens to my data when I cancel? Your data is retained for 90 days then permanently deleted.",
            "How do I add team members? Go to Team Settings > Add Members and send invitation emails.",
            "Can I use your API? Yes, check our developer documentation for API keys and endpoints.",
            "How do I change my plan? Go to Billing > Change Plan to upgrade or downgrade your subscription.",
            "What is your uptime guarantee? We maintain 99.9% uptime with automatic failover systems.",
            "How do I enable notifications? Go to Settings > Notifications > Configure your preferences.",
            "Can I use custom domains? Yes, custom domains are available on Business and Enterprise plans.",
            "How do I backup my data? We automatically backup all data daily with 30-day retention.",
            "What support channels do you offer? We offer email, live chat, phone support, and help documentation."
        ]
        
        return base_faqs[:num_docs]

class SimpleCustomerSupportRAG:
    """Simple RAG system for customer support without heavy dependencies"""
    
    def __init__(self, db_path="customer_support.db"):
        self.similarity_model = SimpleTextSimilarity()
        self.documents = []
        self.db_path = db_path
        self.init_database()
        
    def init_database(self):
        """Initialize SQLite database for user data"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # Users table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS users (
                user_id TEXT PRIMARY KEY,
                email TEXT,
                name TEXT,
                signup_date TEXT,
                plan_type TEXT DEFAULT 'free'
            )
        ''')
        
        # Purchase history table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS purchases (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                user_id TEXT,
                order_id TEXT,
                product_name TEXT,
                category TEXT,
                amount REAL,
                purchase_date TEXT,
                FOREIGN KEY (user_id) REFERENCES users (user_id)
            )
        ''')
        
        # Conversation history table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS conversations (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                user_id TEXT,
                message TEXT,
                response TEXT,
                sentiment TEXT,
                sentiment_score REAL,
                escalated BOOLEAN,
                timestamp TEXT,
                FOREIGN KEY (user_id) REFERENCES users (user_id)
            )
        ''')
        
        conn.commit()
        conn.close()
    
    def load_knowledge_base(self, documents):
        """Load knowledge base from documents"""
        self.documents = documents
        print(f"Processing {len(documents)} documents...")
        
        # Fit the similarity model
        self.similarity_model.fit(documents)
        
        print(f"Knowledge base created with {len(documents)} documents using simple text similarity")
    
    def add_user(self, user_id, email, name, plan_type='free'):
        """Add a new user to the database"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT OR REPLACE INTO users (user_id, email, name, signup_date, plan_type)
            VALUES (?, ?, ?, ?, ?)
        ''', (user_id, email, name, datetime.now().isoformat(), plan_type))
        
        conn.commit()
        conn.close()
    
    def add_purchase(self, user_id, order_id, product_name, category, amount):
        """Add a purchase record"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT INTO purchases (user_id, order_id, product_name, category, amount, purchase_date)
            VALUES (?, ?, ?, ?, ?, ?)
        ''', (user_id, order_id, product_name, category, amount, datetime.now().isoformat()))
        
        conn.commit()
        conn.close()
    
    def get_user_context(self, user_id):
        """Get user context including purchase history"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # Get user info
        cursor.execute('SELECT * FROM users WHERE user_id = ?', (user_id,))
        user = cursor.fetchone()
        
        # Get recent purchases
        cursor.execute('''
            SELECT * FROM purchases WHERE user_id = ?
            ORDER BY purchase_date DESC LIMIT 5
        ''', (user_id,))
        purchases = cursor.fetchall()
        
        # Get recent conversations
        cursor.execute('''
            SELECT * FROM conversations WHERE user_id = ?
            ORDER BY timestamp DESC LIMIT 3
        ''', (user_id,))
        conversations = cursor.fetchall()
        
        conn.close()
        
        return {
            'user_info': user,
            'recent_purchases': purchases,
            'recent_conversations': conversations
        }
    
    def analyze_sentiment(self, text):
        """Simple sentiment analysis"""
        negative_words = ['angry', 'frustrated', 'terrible', 'awful', 'hate', 'worst', 'broken', 'useless', 'ridiculous', 'disappointed', 'horrible', 'annoying', 'stupid', 'crap']
        positive_words = ['great', 'awesome', 'love', 'excellent', 'amazing', 'perfect', 'wonderful', 'fantastic', 'satisfied', 'happy', 'good', 'thanks', 'helpful']
        
        text_lower = text.lower()
        
        negative_score = sum(1 for word in negative_words if word in text_lower)
        positive_score = sum(1 for word in positive_words if word in text_lower)
        
        if negative_score > positive_score:
            return 'negative', -0.5 - (negative_score * 0.2)
        elif positive_score > negative_score:
            return 'positive', 0.5 + (positive_score * 0.2)
        else:
            return 'neutral', 0.0
    
    def should_escalate(self, message, sentiment, sentiment_score, user_context):
        """Determine if conversation should be escalated"""
        escalation_triggers = [
            sentiment == 'negative' and abs(sentiment_score) > 0.7,
            'manager' in message.lower(),
            'supervisor' in message.lower(),
            'legal' in message.lower(),
            'lawsuit' in message.lower(),
            'lawyer' in message.lower(),
            'cancel' in message.lower() and ('everything' in message.lower() or 'all' in message.lower()),
            len(user_context.get('recent_conversations', [])) > 2,  # Frequent contact
            'refund' in message.lower() and 'immediately' in message.lower()
        ]
        
        return any(escalation_triggers)
    
    def search_knowledge_base(self, query, top_k=3):
        """Search for relevant documents"""
        return self.similarity_model.search(query, top_k=top_k, min_similarity=0.1)
    
    def generate_response(self, message, user_context, knowledge_results):
        """Generate response based on context and knowledge"""
        if not knowledge_results:
            return "I'm sorry, I couldn't find specific information about that. Let me connect you with a human agent who can better assist you."
        
        # Use the best matching document
        best_match = knowledge_results[0]['document']
        confidence = knowledge_results[0]['score']
        
        # Customize response based on user context
        user_info = user_context.get('user_info')
        if user_info and len(user_info) > 4:  # Has plan_type
            plan_type = user_info[4]
            if plan_type in ['premium', 'enterprise']:
                prefix = f"As a {plan_type} customer, I'm happy to help you with priority support. "
            elif plan_type == 'basic':
                prefix = "I'm here to help you. "
            else:
                prefix = ""
        else:
            prefix = ""
        
        # Add confidence-based messaging
        if confidence > 0.5:
            return f"{prefix}{best_match}"
        elif confidence > 0.3:
            return f"{prefix}Based on your question, here's what I found: {best_match}\n\nIf this doesn't fully answer your question, please let me know and I can connect you with a specialist."
        else:
            return f"{prefix}I found some related information: {best_match}\n\nHowever, for the most accurate help with your specific question, I'd recommend connecting with one of our human agents."
    
    def chat(self, user_id, message):
        """Main chat function"""
        # Get user context
        user_context = self.get_user_context(user_id)
        
        # Analyze sentiment
        sentiment, sentiment_score = self.analyze_sentiment(message)
        
        # Search knowledge base
        knowledge_results = self.search_knowledge_base(message)
        
        # Generate response
        response = self.generate_response(message, user_context, knowledge_results)
        
        # Check if escalation is needed
        escalate = self.should_escalate(message, sentiment, sentiment_score, user_context)
        
        if escalate:
            response += "\n\n🚨 I'm connecting you with one of our human agents who can provide more personalized assistance."
        
        # Save conversation
        self.save_conversation(user_id, message, response, sentiment, sentiment_score, escalate)
        
        return {
            'answer': response,
            'sentiment': sentiment,
            'sentiment_score': sentiment_score,
            'escalate': escalate,
            'user_context': user_context,
            'knowledge_matches': len(knowledge_results),
            'best_match_score': knowledge_results[0]['score'] if knowledge_results else 0
        }
    
    def save_conversation(self, user_id, message, response, sentiment, sentiment_score, escalated):
        """Save conversation to database"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT INTO conversations (user_id, message, response, sentiment, sentiment_score, escalated, timestamp)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        ''', (user_id, message, response, sentiment, sentiment_score, escalated, datetime.now().isoformat()))
        
        conn.commit()
        conn.close()

def setup_realistic_demo_data(support_system):
    """Set up realistic demo data"""
    # Add demo users
    support_system.add_user('demo_user', 'demo@example.com', 'Demo User', 'premium')
    support_system.add_user('new_user', 'newbie@example.com', 'New User', 'free')
    support_system.add_user('frequent_user', 'frequent@example.com', 'Frequent User', 'basic')
    support_system.add_user('enterprise_user', 'enterprise@company.com', 'Enterprise User', 'enterprise')
    
    # Add some purchase history
    support_system.add_purchase('demo_user', 'ORD-001', 'Premium Plan', 'subscription', 29.99)
    support_system.add_purchase('demo_user', 'ORD-002', 'Extra Storage', 'addon', 9.99)
    support_system.add_purchase('enterprise_user', 'ORD-003', 'Enterprise License', 'subscription', 299.99)

def test_system_components():
    """Test individual system components"""
    print("🔧 Testing system components...")
    
    try:
        # Test data loader
        loader = CustomerSupportDataLoader()
        docs = loader.create_synthetic_support_data(5)
        print(f"✅ Data loader: Generated {len(docs)} documents")
        
        # Test RAG system initialization
        rag = SimpleCustomerSupportRAG(db_path="test_support.db")
        print("✅ RAG system: Initialized successfully")
        
        # Test knowledge base
        rag.load_knowledge_base(docs)
        print("✅ Knowledge base: Created successfully")
        
        # Test similarity search
        results = rag.search_knowledge_base("reset password")
        print(f"✅ Search: Found {len(results)} results for 'reset password'")
        
        print("🎉 All components working!")
        
    except Exception as e:
        print(f"❌ Component test failed: {e}")
        import traceback
        traceback.print_exc()

def run_basic_demo():
    """Run a basic demo of the RAG system"""
    try:
        print("🎯 Starting Customer Support RAG Demo (Simple Version)")
        print("Loading data...")
        
        # Create data
        loader = CustomerSupportDataLoader()
        documents = loader.create_synthetic_support_data(20)
        
        # Initialize RAG system
        rag = SimpleCustomerSupportRAG(db_path="demo_simple.db")
        rag.load_knowledge_base(documents)
        
        print("\n" + "="*50)
        print("✅ RAG SYSTEM READY!")
        print("="*50)
        
        # Add a demo user
        rag.add_user('demo_user', 'demo@example.com', 'Demo User')
        
        # Demo queries
        test_questions = [
            "How can I reset my password?",
            "What are your business hours?", 
            "This is terrible! Nothing works!",
            "I want to speak to a manager right now!",
            "How do I cancel my account?"
        ]
        
        for question in test_questions:
            print(f"\n❓ Q: {question}")
            response = rag.chat('demo_user', question)
            print(f"🤖 A: {response['answer']}")
            print(f"📊 Sentiment: {response['sentiment']} ({response['sentiment_score']:.2f})")
            print(f"🎯 Match Score: {response['best_match_score']:.3f}")
            if response['escalate']:
                print("🚨 ESCALATED TO HUMAN AGENT")
        
        print("\n" + "="*50)
        print("✅ Demo completed successfully!")
        print("="*50)
        
    except Exception as e:
        print(f"❌ Demo failed: {e}")
        import traceback
        traceback.print_exc()

# Main execution
if __name__ == "__main__":
    print("🎯 Testing the Simple RAG Customer Support System")
    print("=" * 55)
    
    try:
        run_basic_demo()
    except Exception as e:
        print(f"Demo error: {e}")
        print("Let's try component testing instead...")
        test_system_components()
    
    print("\n" + "="*55)
    print("🎮 Interactive Example - Try the system yourself!")
    print("="*55)
    
    try:
        # Initialize the system
        print("Setting up interactive example...")
        loader = CustomerSupportDataLoader()
        documents = loader.create_synthetic_support_data(30)
        
        # Use simple system (no heavy dependencies)
        support_system = SimpleCustomerSupportRAG(db_path="demo_support_simple.db")
        support_system.load_knowledge_base(documents)
        
        # Add demo data
        setup_realistic_demo_data(support_system)
        
        print(f"✅ System ready with {len(documents)} knowledge base documents!")
        
        # Example interactions
        test_cases = [
            {
                "user": "demo_user",
                "message": "I need help with my premium subscription billing",
                "expected": "Should recognize premium customer"
            },
            {
                "user": "new_user", 
                "message": "How does your free trial work?",
                "expected": "Should provide trial information"
            },
            {
                "user": "frequent_user",
                "message": "This is ridiculous! The app crashes every time I try to sync!",
                "expected": "Should detect frustration and escalate"
            },
            {
                "user": "enterprise_user",
                "message": "We need custom integration support for our team",
                "expected": "Should recognize enterprise customer"
            }
        ]
        
        print("\n🗣️ Example Conversations:")
        print("-" * 40)
        
        for i, test_case in enumerate(test_cases, 1):
            print(f"\n💬 Conversation {i}:")
            print(f"👤 User ({test_case['user']}): {test_case['message']}")
            
            # Get AI response
            response = support_system.chat(test_case['user'], test_case['message'])
            
            print(f"🤖 AI Support: {response['answer']}")
            
            # Show analysis
            print(f"📊 Analysis:")
            print(f"   • Sentiment: {response['sentiment']} (score: {response['sentiment_score']:.3f})")
            print(f"   • Match confidence: {response['best_match_score']:.3f}")
            print(f"   • Escalation needed: {'Yes' if response['escalate'] else 'No'}")
            
            if response['escalate']:
                print("   🚨 This conversation was flagged for human agent review!")
            
            print(f"   • Expected: {test_case['expected']}")
            print("-" * 40)
        
        print("\n✅ Interactive example completed successfully!")
        print("\n🎯 How to use this system:")
        print("response = support_system.chat('user_id', 'message')")
        
    except Exception as e:
        print(f"❌ Interactive example failed: {e}")
        import traceback
        traceback.print_exc()
    
    print("\n🎉 System demonstration complete!")
    print("The simple RAG customer support chatbot is ready to use!")

🎯 Testing the Simple RAG Customer Support System
🎯 Starting Customer Support RAG Demo (Simple Version)
Loading data...
Processing 20 documents...
Fitted similarity model on 20 documents
Knowledge base created with 20 documents using simple text similarity

✅ RAG SYSTEM READY!

❓ Q: How can I reset my password?
🤖 A: Based on your question, here's what I found: How do I reset my password? Go to the login page and click 'Forgot Password', then follow the email instructions.

If this doesn't fully answer your question, please let me know and I can connect you with a specialist.
📊 Sentiment: neutral (0.00)
🎯 Match Score: 0.427

❓ Q: What are your business hours?
🤖 A: Based on your question, here's what I found: What are your business hours? We are open Monday-Friday 9AM-6PM EST, and Saturday 10AM-4PM EST.

If this doesn't fully answer your question, please let me know and I can connect you with a specialist.
📊 Sentiment: neutral (0.00)
🎯 Match Score: 0.460

❓ Q: This is terrible! Nothing wo

In [12]:
import os
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle

class CustomerSupportRAG:
    def __init__(self):
        self.device = "cpu"  # Force CPU usage for compatibility
        self.encoder = None
        self.index = None
        self.documents = []
        self.embeddings = None
        
    def load_models(self):
        """Load the sentence transformer model"""
        print("Loading lightweight models...")
        print(f"Device set to use {self.device.upper()}")
        
        # Use a lightweight model that works well on CPU
        self.encoder = SentenceTransformer('all-MiniLM-L6-v2', device=self.device)
        print("Lightweight models loaded!")
        
    def load_knowledge_base(self, documents=None):
        """Load or create knowledge base from documents"""
        if documents is None:
            # Default sample documents if none provided
            documents = [
                "How do I reset my password? Go to the login page and click 'Forgot Password'.",
                "What are your business hours? We are open Monday-Friday 9AM-5PM EST.",
                "How do I cancel my subscription? Go to Account Settings > Billing > Cancel Subscription.",
                "How do I contact support? Email us at support@company.com or call 1-800-SUPPORT.",
                "What payment methods do you accept? We accept all major credit cards and PayPal.",
                "How do I update my billing information? Go to Account Settings > Billing > Update Payment Method.",
                "Is there a mobile app? Yes, download our app from the App Store or Google Play.",
                "How do I change my email address? Go to Account Settings > Profile > Email Address.",
                "What is your refund policy? We offer full refunds within 30 days of purchase.",
                "How do I delete my account? Contact support to request account deletion."
            ]
        
        self.documents = documents
        print(f"Processing {len(documents)} documents...")
        
        # Create embeddings
        self.embeddings = self.encoder.encode(documents)
        
        # Create FAISS index
        dimension = self.embeddings.shape[1]
        self.index = faiss.IndexFlatIP(dimension)  # Inner product for cosine similarity
        
        # Normalize embeddings for cosine similarity
        faiss.normalize_L2(self.embeddings)
        self.index.add(self.embeddings.astype('float32'))
        
        print(f"Knowledge base created with {len(documents)} documents")
        
    def search(self, query, top_k=3):
        """Search for relevant documents"""
        if self.encoder is None or self.index is None:
            return ["Please load models and knowledge base first."]
            
        # Encode query
        query_embedding = self.encoder.encode([query])
        faiss.normalize_L2(query_embedding)
        
        # Search
        scores, indices = self.index.search(query_embedding.astype('float32'), top_k)
        
        # Return relevant documents
        results = []
        for i, idx in enumerate(indices[0]):
            if idx != -1:  # Valid index
                results.append({
                    'document': self.documents[idx],
                    'score': float(scores[0][i])
                })
        
        return results
    
    def answer_question(self, question):
        """Generate answer based on retrieved documents"""
        results = self.search(question, top_k=2)
        
        if not results:
            return "I'm sorry, I couldn't find relevant information for your question."
        
        # Simple answer generation using the top result
        best_match = results[0]['document']
        
        return f"Based on our knowledge base: {best_match}"

def run_basic_demo():
    """Run a basic demo of the RAG system"""
    try:
        print("Starting Customer Support RAG Demo")
        print("Loading models and data...")
        
        # Initialize RAG system
        rag = CustomerSupportRAG()
        
        # Load models
        rag.load_models()
        
        # Load knowledge base (now with default documents)
        rag.load_knowledge_base()  # No arguments needed - uses defaults
        
        print("\n" + "="*50)
        print("RAG SYSTEM READY!")
        print("="*50)
        
        # Demo queries
        test_questions = [
            "How can I reset my password?",
            "What are your business hours?",
            "How do I cancel my account?",
            "What payment methods do you accept?"
        ]
        
        for question in test_questions:
            print(f"\nQ: {question}")
            answer = rag.answer_question(question)
            print(f"A: {answer}")
        
        print("\n" + "="*50)
        print("Demo completed successfully!")
        print("="*50)
        
    except Exception as e:
        print(f"Demo failed: {e}")
        print("Make sure all dependencies are installed:")
        print("pip install sentence-transformers faiss-cpu numpy")

if __name__ == "__main__":
    run_basic_demo()

2025-08-19 14:42:59.516177: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755614579.695665      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755614579.749069      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Starting Customer Support RAG Demo
Loading models and data...
Loading lightweight models...
Device set to use CPU


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Lightweight models loaded!
Processing 10 documents...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Knowledge base created with 10 documents

RAG SYSTEM READY!

Q: How can I reset my password?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

A: Based on our knowledge base: How do I reset my password? Go to the login page and click 'Forgot Password'.

Q: What are your business hours?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

A: Based on our knowledge base: What are your business hours? We are open Monday-Friday 9AM-5PM EST.

Q: How do I cancel my account?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

A: Based on our knowledge base: How do I cancel my subscription? Go to Account Settings > Billing > Cancel Subscription.

Q: What payment methods do you accept?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

A: Based on our knowledge base: What payment methods do you accept? We accept all major credit cards and PayPal.

Demo completed successfully!
