In [14]:
!pip install chromadb langchain langchain-community langchain_google_genai sentence_transformers langdetect

Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.5-py3-none-any.whl.metadata (5.2 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain_google_genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-

In [16]:
#Import Libraries

import pandas as pd
import numpy as np
import chromadb
from sentence_transformers import SentenceTransformer
from langdetect import detect
from typing import List, Dict
from dataclasses import dataclass
from datetime import datetime
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
import os

In [17]:
# Define Data Structure for Output

@dataclass
class TicketResponse:
    answer: str
    confidence: float
    source_tickets: List[Dict]
    language: str
    reasoning: str

In [18]:
# Initialize the LLM

from langchain_community.chat_models import ChatOllama
from langchain.schema import HumanMessage
import os

from google.colab import userdata

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")


In [25]:
# Define Multilingual class

class MultilingualRAGSystem:
    def __init__(self, embedding_model_name: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", llm=None):
        self.embedding_model = SentenceTransformer(embedding_model_name)
        self.vector_stores = {}
        self.ticket_databases = {}
        self.language_mappings = {
            'en': 'english',
            'de': 'german',
            'fr': 'french',
            'es': 'spanish'
        }
        self.supported_languages = ['en', 'de', 'fr', 'es']
        self.llm = llm

    def load_datasets(self, data_path: str = "./"):
        for lang_code, lang_name in self.language_mappings.items():
            try:
                df = pd.read_csv(f"{data_path}rag_ready_{lang_name}_tickets.csv")
                if len(df) < 50:
                    continue
                self.ticket_databases[lang_code] = df
            except FileNotFoundError:
                pass
        try:
            self.ticket_databases['combined'] = pd.read_csv(f"{data_path}rag_ready_combined_tickets.csv")
        except FileNotFoundError:
            pass

    def create_vector_stores(self, persist_directory: str = "./chroma_db"):
        """
        Create Chroma vector stores for each dataset (per language).
        """
        os.makedirs(persist_directory, exist_ok=True)

        # Set up embedding model (compatible with LangChain)
        embedding_model = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        )

        for lang_code, df in self.ticket_databases.items():
            print(f"📊 Building ChromaDB store for: {lang_code}")

            if 'rag_content' not in df.columns:
                print(f"❌ Skipping {lang_code} — 'rag_content' missing")
                continue

            texts = df['rag_content'].fillna('').tolist()
            metadata = df.to_dict('records')

            # Create LangChain Documents
            documents = [
                Document(page_content=text, metadata=meta)
                for text, meta in zip(texts, metadata)
            ]

            # Use Chroma
            vectordb = Chroma.from_documents(
                documents=documents,
                embedding=embedding_model,
                persist_directory=os.path.join(persist_directory, lang_code),
                collection_name=f"support_{lang_code}"
            )
            vectordb.persist()

            self.vector_stores[lang_code] = vectordb

        print("✅ All Chroma vector stores created.")


    def detect_query_language(self, query: str) -> str:
        try:
            lang = detect(query)
            return lang if lang in self.supported_languages else 'en'
        except:
            return 'en'

    def retrieve_similar_tickets(self, query: str, language: str = None, top_k: int = 5, similarity_threshold: float = 0.5) -> List[Dict]:
        if language is None:
            language = self.detect_query_language(query)

        if language not in self.vector_stores:
            print(f"⚠️ No vector store for language '{language}', falling back to 'combined'")
            language = 'combined'

        vectordb = self.vector_stores.get(language)
        if not vectordb:
            print("❌ No Chroma store found.")
            return []

        results = vectordb.similarity_search_with_score(query, k=top_k)

        # Filter by similarity threshold
        tickets = []
        for doc, score in results:
            if score >= similarity_threshold:
                meta = doc.metadata.copy()
                meta['similarity_score'] = score
                tickets.append(meta)

        return tickets


    def generate_response(self, query: str, similar_tickets: List[Dict], language: str = 'en') -> str:
      if not similar_tickets:
          return self._generate_fallback_response(language)

      # Format context
      context = ""
      for i, ticket in enumerate(similar_tickets, 1):
          context += (
              f"\n--- TICKET {i} ---\n"
              f"Subject: {ticket.get('subject_enhanced', '')}\n"
              f"Problem: {ticket.get('body', '')[:300]}\n"
              f"Resolution: {ticket.get('answer', '')[:400]}\n"
          )

      # Use more assertive prompt instructions
      prompt = f"""
      You are an experienced IT support assistant. A user has submitted the following problem:

      USER QUERY:
      {query}

      You have access to {len(similar_tickets)} resolved support tickets that are similar to the user's issue.

      SIMILAR RESOLVED TICKETS:
      {context}

      TASK:
      - Analyze the user's problem.
      - Synthesize a precise, helpful, step-by-step solution using information from the similar tickets.
      - Do NOT just restate what is in the tickets. Combine insights into one concise answer.
      - Write in the same language as the user's query: {language.upper()}.

      RESPONSE:"""

      try:
          response = self.llm.invoke(prompt)
          return response.content
      except Exception as e:
          print(f"❌ LLM call failed: {e}")
          return self._generate_fallback_response(language)



    def _generate_fallback_response(self, language: str) -> str:
        fallback = {
            'en': "No similar tickets found. Please contact support.",
            'de': "Keine ähnlichen Tickets gefunden. Bitte wenden Sie sich an den Support.",
            'fr': "Aucun ticket similaire trouvé. Veuillez contacter le support.",
            'es': "No se encontraron tickets similares. Contacte al soporte."
        }
        return fallback.get(language, fallback['en'])

    def query(self, user_query: str, language: str = None, top_k: int = 5, similarity_threshold: float = 0.5) -> TicketResponse:
        language = self.detect_query_language(user_query) if language is None else language
        similar_tickets = self.retrieve_similar_tickets(user_query, language, top_k, similarity_threshold)
        answer = self.generate_response(user_query, similar_tickets, language)
        confidence = self._calculate_confidence(similar_tickets)
        reasoning = self._create_reasoning(similar_tickets, language)
        return TicketResponse(answer=answer, confidence=confidence, source_tickets=similar_tickets, language=language, reasoning=reasoning)

    def _calculate_confidence(self, similar_tickets: List[Dict]) -> float:
        if not similar_tickets:
            return 0.0
        max_score = max(t['similarity_score'] for t in similar_tickets)
        boost = min(sum(1 for t in similar_tickets if t['similarity_score'] > 0.7) * 0.1, 0.3)
        return min(max_score + boost, 1.0)

    def _create_reasoning(self, similar_tickets: List[Dict], language: str) -> str:
        if not similar_tickets:
            return "No similar tickets found."
        min_score = min(t['similarity_score'] for t in similar_tickets)
        max_score = max(t['similarity_score'] for t in similar_tickets)
        return f"Found {len(similar_tickets)} similar tickets with similarity scores between {min_score:.2f} and {max_score:.2f}."


In [20]:
rag_system = MultilingualRAGSystem(llm=llm)

In [21]:
rag_system.load_datasets("./")

In [22]:
rag_system.create_vector_stores()

📊 Building ChromaDB store for: en
📊 Building ChromaDB store for: de
📊 Building ChromaDB store for: combined
✅ All Chroma vector stores created.


In [23]:
print("Available vector stores:", list(rag_system.vector_stores.keys()))

Available vector stores: ['en', 'de', 'combined']


In [26]:
queries = [
    ("My computer is freezing and slow", "en"),
    ("Mon ordinateur est lent", "fr"),
    ("Mi PC va muy lenta", "es"),
    ("Mein Computer reagiert nicht", "de")
]

for query, lang in queries:
    print("="*50)
    print(f"Query: {query}")
    result = rag_system.query(query, top_k=3)
    print(f"Language Detected: {result.language}")
    print(f"Confidence: {result.confidence:.2f}")
    print(f"Answer:\n{result.answer}")
    print(f"Reasoning: {result.reasoning}")

Query: My computer is freezing and slow
Language Detected: en
Confidence: 1.00
Answer:
Okay, I understand your computer is freezing and running slowly. Here's a breakdown of potential causes and troubleshooting steps, based on similar issues:

**Possible Causes:**

*   **Resource Overload:** Too many programs running at once.
*   **Integration Issues:** Problems with software integrations.
*   **Overheating:** Especially if you're running resource-intensive applications.
*   **Software Issues:** Bugs or needed updates

**Troubleshooting Steps:**

1.  **Restart Your Computer:** This is the simplest and often most effective first step.

2.  **Close Unnecessary Programs:** Close any applications you aren't actively using. Check the system tray (bottom right corner of your screen) for running programs.

3.  **Check Resource Usage (Task Manager/Activity Monitor):**
    *   **Windows:** Press `Ctrl + Shift + Esc` to open Task Manager. Check the "Processes" tab and sort by CPU, Memory, and Di