SETUP & LIBRARIES

In [104]:
# !pip install --upgrade google-generativeai pandas scikit-learn

import google.generativeai as genai
from google.colab import userdata
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json
import uuid
import datetime
import re
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# 1. Setup API
try:
    GOOGLE_API_KEY = userdata.get("GOOGLE_API_KEY")
    genai.configure(api_key=GOOGLE_API_KEY)

    # Using the standard flash model
    model = genai.GenerativeModel('gemini-2.5-flash')
    print("‚úÖ System Initialized: Google Gemini Connected.")

except Exception as e:
    print(f"‚ùå CRITICAL SETUP ERROR: {e}")
    print("Please set 'GOOGLE_API_KEY' in the Colab Secrets (Key icon on the left).")

‚úÖ System Initialized: Google Gemini Connected.


DATA INFRASTRUCTURE

In [105]:
def nlu_service(message: str):
    """
    Robust NLU that handles JSON errors.
    """
    prompt = f"""
    Analyze this support message: "{message}"

    Return a valid JSON object with keys:
    - "intent": ["technical", "billing", "account", "human_escalate", "greeting"]
    - "confidence": float (0.0 to 1.0)
    - "escalate_reason": string (if intent is human_escalate)

    Escalate ONLY for threats, lawsuits, or explicit requests for a human.
    """
    try:
        response = model.generate_content(prompt)
        text = response.text

        # Smart JSON Extraction (Finds { ... })
        match = re.search(r"\{.*\}", text, re.DOTALL)
        if match:
            return json.loads(match.group(0))
        else:
            raise ValueError("No JSON found")

    except Exception as e:
        # Fallback: Treat as technical so we try to answer it first
        return {"intent": "technical", "confidence": 0.5, "escalate_reason": ""}

def response_generator(user_text, nlu_data, context_docs, history_text):
    system_prompt = f"""
    You are CyberCare AI.

    CONTEXT FROM DATABASE:
    {context_docs if context_docs else "No specific database match."}

    USER INTENT: {nlu_data.get('intent')}
    HISTORY: {history_text}
    CURRENT MESSAGE: "{user_text}"

    Instructions:
    1. If 'CONTEXT FROM DATABASE' has a fix, USE IT and paraphrase politely.
    2. If no context, provide general helpful advice.
    3. Be concise.
    """
    try:
        return model.generate_content(system_prompt).text
    except:
        return "I am currently overloaded. Please try again in a moment."


KNOWLEDGE RETRIEVER

In [106]:
class KnowledgeRetriever:
    """
    Hybrid Retriever:
    - Loads 'QnA.json' for specific NordVPN context (High Priority).
    - Loads 'tech_support_dataset.csv' for general tech support training (Low Priority).
    - Uses TF-IDF to find the best match across both datasets.
    """
    def __init__(self, json_path=None, csv_path=None):
        self.df = pd.DataFrame(columns=["question", "answer"])

        # 1. Load Specific Context (JSON)
        if json_path:
            try:
                with open(json_path, 'r') as f:
                    data = json.load(f)

                # Check structure: {"support": [...]}
                if "support" in data:
                    json_df = pd.DataFrame(data['support'])
                    # Ensure columns are named consistently
                    json_df = json_df.rename(columns={"question": "question", "answer": "answer"})
                    self.df = pd.concat([self.df, json_df], ignore_index=True)
                    print(f"‚úÖ Knowledge Base: Loaded {len(json_df)} specific NordVPN rules from {json_path}.")
                else:
                    print("‚ö†Ô∏è JSON format error: Key 'support' not found.")
            except FileNotFoundError:
                print(f"‚ö†Ô∏è Specific Context file '{json_path}' not found.")
            except Exception as e:
                print(f"‚ö†Ô∏è Error loading JSON: {e}")

        # 2. Load General Training (CSV)
        if csv_path:
            try:
                csv_df = pd.read_csv(csv_path, on_bad_lines='skip')

                # Map CSV columns to 'question' and 'answer'
                # We prioritize 'Customer_Issue' and 'Tech_Response' based on your previous file
                q_col = 'Customer_Issue' if 'Customer_Issue' in csv_df.columns else csv_df.columns[0]
                a_col = 'Tech_Response' if 'Tech_Response' in csv_df.columns else csv_df.columns[1]

                # Normalize and Append
                csv_df = csv_df[[q_col, a_col]].rename(columns={q_col: "question", a_col: "answer"})
                self.df = pd.concat([self.df, csv_df], ignore_index=True)
                print(f"‚úÖ Knowledge Base: Augmented with {len(csv_df)} general tech support examples.")
            except FileNotFoundError:
                print(f"‚ö†Ô∏è General Training file '{csv_path}' not found.")
            except Exception as e:
                print(f"‚ö†Ô∏è Error loading CSV: {e}")

        # 3. Build Search Index
        if not self.df.empty:
            self.df['question'] = self.df['question'].fillna('')
            self.df['answer'] = self.df['answer'].fillna('No detailed solution provided.')

            self.vectorizer = TfidfVectorizer(stop_words='english')
            self.tfidf_matrix = self.vectorizer.fit_transform(self.df['question'])
        else:
            print("‚ùå CRITICAL: Knowledge Base is empty. RAG will not work.")
            self.vectorizer = None

    def search(self, query: str, top_k=2):
        if self.vectorizer is None or self.df.empty:
            return None

        # Create vector for user query
        query_vec = self.vectorizer.transform([query])

        # Calculate similarity against ALL docs (JSON + CSV)
        similarities = cosine_similarity(query_vec, self.tfidf_matrix).flatten()

        # Threshold: matches below 0.15 are likely irrelevant noise
        if np.max(similarities) < 0.15:
            return None

        top_indices = similarities.argsort()[-top_k:][::-1]

        results = []
        for idx in top_indices:
            q_text = self.df.iloc[idx]['question']
            a_text = self.df.iloc[idx]['answer']
            results.append(f"Q: {q_text}\nA: {a_text}")

        return "\n---\n".join(results)

AI CORE (NLU & Generation)

In [112]:
def nlu_service(message: str):
    """
    Analyzes Intent + Sentiment + Profanity.
    Designed to be tolerant of bad grammar and typos.
    """
    prompt = f"""
    You are the NLU module for CyberCare. Analyze this user message: "{message}"

    Return a valid JSON object with keys:
    - "intent": ["technical", "billing", "account", "human_escalate", "greeting"]
    - "confidence": float (0.0 to 1.0)
    - "escalate_reason": string (if escalating, explain why)

    GUIDELINES FOR ANALYSIS:
    1. ROBUSTNESS: Users will have typos, bad grammar, and slang. (e.g., "wifi no worky" -> technical). Do NOT lower confidence for bad grammar.
    2. FRUSTRATION VS HOSTILITY:
       - "This app is garbage, it never connects!" -> Intent: "technical" (User is frustrated with the product. Do NOT escalate. Help them.)
       - "You are stupid, let me talk to a person." -> Intent: "human_escalate" (User is attacking the bot or asking for a human).

    ESCALATION RULES (Set intent to "human_escalate"):
    1. User explicitly asks for a human/agent.
    2. User mentions "lawsuit", "legal action", "fraud", "police".
    3. EXTREME PROFANITY/ABUSE: Direct insults towards the support team.

    Otherwise, categorize as technical/billing/account.
    """
    try:
        response = model.generate_content(prompt)
        text = response.text

        # Smart JSON Extraction
        match = re.search(r"\{.*\}", text, re.DOTALL)
        if match:
            return json.loads(match.group(0))
        else:
            raise ValueError("No JSON found")

    except Exception as e:
        # Fallback: Treat as technical so we try to answer it first
        return {"intent": "technical", "confidence": 0.5, "escalate_reason": ""}

def response_generator(user_text, nlu_data, context_docs, history_text):
    system_prompt = f"""
    You are CyberCare AI. You are talking to a user who might be non-technical or typing in broken English.

    CONTEXT FROM DATABASE:
    {context_docs if context_docs else "No specific database match."}

    USER INTENT: {nlu_data.get('intent')}
    HISTORY: {history_text}
    CURRENT MESSAGE: "{user_text}"

    Instructions:
    1. Understand the user's core issue, even if grammar is bad.
    2. If 'CONTEXT FROM DATABASE' has a fix, USE IT and paraphrase politely and simply.
    3. Do not mention their grammar errors. Just help them.
    4. Be concise.
    """
    try:
        return model.generate_content(system_prompt).text
    except:
        return "I am currently overloaded. Please try again in a moment."

DIALOG MANAGER

In [108]:
class DialogManager:
    def __init__(self, db, kb, crm):
        self.db = db
        self.kb = kb
        self.crm = crm

    def process_message(self, user_id, convo_id, text):
        # 1. Log User
        self.db.log_message(convo_id, "user", text)

        # 2. NLU
        nlu = nlu_service(text)

        # 3. Escalation Check
        if nlu.get("intent") == "human_escalate" or nlu.get("confidence", 0) < 0.4:
            ticket = self.crm.create_ticket(user_id, text, nlu.get("escalate_reason"))
            reply = f"I've escalated this to a human agent. Ticket #{ticket}"
            self.db.log_message(convo_id, "bot", reply)
            return reply

        # 4. RAG Search
        context = self.kb.search(text)

        # 5. History
        hist = "\n".join([f"{m['sender']}: {m['text']}" for m in self.db.messages[-4:]])

        # 6. Generate
        reply = response_generator(text, nlu, context, hist)

        # 7. Log Bot
        self.db.log_message(convo_id, "bot", reply)
        return reply

INITIALIZATION & TESTING (The Simulation)

In [110]:
# 1. Initialize Components
print("\n--- üöÄ STARTING CYBERCARE SYSTEM (NORDVPN EDITION) ---")
db = MockDatabase()

# LOAD BOTH DATASETS HERE
# QnA.json = Specific Rules (Priority)
# tech_support_dataset.csv = General Training (Fallback)
kb = KnowledgeRetriever(json_path="QnA.json", csv_path="tech_support_dataset.csv")

crm = MockCRM()
bot = DialogManager(db, kb, crm)

# 2. User Setup
uid = db.create_user("customer@nordvpn.com", "VPN User")
cid = db.start_conversation(uid)

print(f"\n‚úÖ Simulation Ready! Knowledge Base size: {len(kb.df)} entries.")

# 3. Test Loop
# We mix specific NordVPN questions with generic tech support issues
queries = [
    "I forgot my password",             # Generic (exists in both, will likely pick JSON if query matches better)
    "How do I use Meshnet?",            # Specific to NordVPN (JSON)
    "I want to cancel auto-renewal",    # Specific to NordVPN (JSON)
    "My printer is not responding",     # Generic Tech Support (CSV)
    "Can I watch Netflix?",             # Specific to NordVPN (JSON)
    "You dirty clanker"                # Escalation Trigger
]

for q in queries:
    print(f"\nüë§ User: {q}")
    ans = bot.process_message(uid, cid, q)
    print(f"ü§ñ Bot: {ans}")


--- üöÄ STARTING CYBERCARE SYSTEM (NORDVPN EDITION) ---
‚úÖ Knowledge Base: Loaded 15 specific NordVPN rules from QnA.json.
‚úÖ Knowledge Base: Augmented with 1896 general tech support examples.

‚úÖ Simulation Ready! Knowledge Base size: 1911 entries.

üë§ User: I forgot my password
ü§ñ Bot: To reset your password, please look for a "Forgot password" or "Reset password" link on the login page. This will guide you through the process, often by sending a reset link to your registered email.

üë§ User: How do I use Meshnet?
ü§ñ Bot: To use Meshnet:
1. Turn on Meshnet in the NordVPN app on all devices you wish to connect.
2. Link these devices by logging into the same Nord Account or by sending an invitation to another user.
3. Then, use the assigned Nord name or IP address to access the device remotely.

üë§ User: I want to cancel auto-renewal
ü§ñ Bot: To cancel your auto-renewal, please log in to your Nord Account, go to the Billing tab, and then click 'Cancel' next to Auto-rene

In [111]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# 1. Initialize System Components
print("--- ‚öôÔ∏è INITIALIZING SYSTEM ---")
db = MockDatabase()
# Load your specific NordVPN rules + General Tech Support CSV
kb = KnowledgeRetriever(json_path="QnA.json", csv_path="tech_support_dataset.csv")
crm = MockCRM()
bot = DialogManager(db, kb, crm)

# 2. Create Session
uid = db.create_user("tester@example.com", "Beta Tester")
cid = db.start_conversation(uid)

print(f"‚úÖ System Ready! Knowledge Base Size: {len(kb.df)} entries.")
print("üëá INTERACTIVE CHAT LOADING BELOW üëá\n")

# --- UI WIDGETS ---

# Output area for the conversation history
chat_history = widgets.Output(layout={'border': '1px solid #ccc', 'height': '400px', 'overflow_y': 'scroll', 'padding': '10px'})

# Input text box
user_input = widgets.Text(
    placeholder='Type your issue here (e.g., "How do I use Meshnet?")',
    layout=widgets.Layout(width='80%')
)

# Send Button
send_btn = widgets.Button(
    description='Send',
    button_style='primary',
    layout=widgets.Layout(width='18%')
)

# Container for input + button
input_box = widgets.HBox([user_input, send_btn])

# Function to handle sending messages
def on_send_click(b):
    message = user_input.value.strip()
    if not message:
        return

    # Clear input box immediately
    user_input.value = ''

    with chat_history:
        # Display User Message (Right Aligned)
        print(f"üë§ YOU: {message}")

        # Get Bot Response
        # We wrap this in a try/except to keep the UI alive even if API hiccups
        try:
            response = bot.process_message(uid, cid, message)

            # Check if it was an escalation (simple check based on text content)
            prefix = "ü§ñ BOT: "
            if "Ticket #" in response:
                prefix = "üö® SYSTEM: " # Highlight escalations

            print(f"{prefix}{response}")
            print("-" * 40) # Separator
        except Exception as e:
            print(f"‚ùå SYSTEM ERROR: {e}")
            print("-" * 40)

# Bind events
send_btn.on_click(on_send_click)
user_input.on_submit(on_send_click) # Allow pressing 'Enter' to send

# Display the UI
display(chat_history, input_box)

print("\nüí° TIP: Type 'I want to sue you' to test the Profanity/Hostility filter.")

--- ‚öôÔ∏è INITIALIZING SYSTEM ---
‚úÖ Knowledge Base: Loaded 15 specific NordVPN rules from QnA.json.
‚úÖ Knowledge Base: Augmented with 1896 general tech support examples.
‚úÖ System Ready! Knowledge Base Size: 1911 entries.
üëá INTERACTIVE CHAT LOADING BELOW üëá



Output(layout=Layout(border='1px solid #ccc', height='400px', overflow_y='scroll', padding='10px'))

HBox(children=(Text(value='', layout=Layout(width='80%'), placeholder='Type your issue here (e.g., "How do I u‚Ä¶


üí° TIP: Type 'I want to sue you' to test the Profanity/Hostility filter.
