In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import tool
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.chains import RetrievalQA
from pathlib import Path
import gradio as gr
from langchain.tools import Tool
import os

In [None]:
class ProjectRootDetector:
    def __init__(self):
        self.root_indicators = [
            '.git',        
            'package.json',  
            'requirements.txt', 
            'pyproject.toml', 
            'Cargo.toml',     
            'go.mod',       
            'pom.xml',       
            'build.gradle',  
            'Dockerfile',  
            'docker-compose.yml',
            'README.md',
            'setup.py',
            'CMakeLists.txt'
        ]
    
    def find_root(self, use_file_path_as_root=False, root_path=None):
        """Find project root starting from any file path"""
        if root_path: 
            return root_path

        if use_file_path_as_root:
            return str(Path.cwd())
            
        current_path = Path.cwd()
        
        while current_path != current_path.parent:
            for indicator in self.root_indicators:
                if (current_path / indicator).exists():
                    return str(current_path)
            current_path = current_path.parent
       
        return str(Path.cwd())

In [None]:
class RepoIntelligentAssistant:
    system_prompt = """
    You are an intelligent software engineer and codebase assistant with deep expertise in all programming domains (software engineering, data science, web development, etc.).

    BEHAVIOR GUIDELINES:

    1. QUESTION EVALUATION:
    - First determine if the question requires repository context or general coding knowledge
    - For repo-specific questions: Use tools to fetch relevant code/files
    - For general coding questions: Use your expertise to answer directly

    2. CODE EXPLANATION:
    - When explaining code: Break it down line-by-line
    - Show the actual code and explain what each line does
    - Focus on the specific implementation in the repository
    - Explain the purpose and context within the codebase

    3. CODE IMPROVEMENTS:
    - When suggesting improvements: Analyze the repository structure first
    - If suggesting new functions/classes: Write complete, working code
    - Base suggestions on the existing codebase patterns and architecture
    - Show the user the complete code implementation

    4. CODE MODIFICATION:
    - NEVER modify files directly
    - Always show the user the updated code
    - Let the user decide whether to implement changes
    - Provide clear instructions on where/how to apply changes

    5. RESPONSE SEQUENCE:
    a) Evaluate question type (repo-specific vs general)
    b) If repo-specific: Fetch relevant context using tools
    c) Provide detailed explanation with code examples
    d) If suggesting improvements: Show complete code implementation
    e) Always explain the reasoning behind suggestions

    Your tone: Precise, technical, and helpful - like a senior developer mentoring a colleague.
    """

    MODEL = "gpt-4o-mini"

    def __init__(self, use_file_path_as_root = False, root_path=None):
        self.llm = ChatOpenAI(
            temperature=0.7, 
            callbacks=[StreamingStdOutCallbackHandler()],
            model_name=self.MODEL
        )
        self.root_folder = ProjectRootDetector().find_root(root_path=root_path, use_file_path_as_root=use_file_path_as_root)
        self.agent = None 

       
    def get_file(self, file_name: str):
        matches = []
        
        # Normalize the input file name
        file_name = file_name.strip()
        
        for root, dirs, files in os.walk(self.root_folder):
            for f in files:
                # Case-insensitive comparison
                if "." in file_name:
                    # Exact match for files with extension (case-insensitive)
                    if f.lower() == file_name.lower():
                        matches.append(os.path.join(root, f))
                else:
                    # Match base name without extension (case-insensitive)
                    base_name = f.split(".")[0]
                    if file_name.lower() == base_name.lower():
                        matches.append(os.path.join(root, f))

        if not matches:
            # Try fuzzy matching for better results
            fuzzy_matches = []
            for root, dirs, files in os.walk(self.root_folder):
                for f in files:
                    if file_name.lower() in f.lower():
                        fuzzy_matches.append(os.path.join(root, f))
            
            if fuzzy_matches:
                listed = "\n".join(fuzzy_matches[:10])  # Limit to 10 results
                return (
                    f"No exact match for '{file_name}' found. "
                    f"Did you mean one of these?\n\n{listed}"
                )
            else:
                return f"No file named '{file_name}' found in the repository."

        if len(matches) > 1:
            listed = "\n".join(matches)
            return (
                f"There are multiple files named '{file_name}'. "
                f"Please specify the full path.\n\n{listed}"
            )

        file_path = matches[0]
        try:
            with open(file_path, "r", encoding="utf-8") as file:
                content = file.read()
            return f"Content of '{file_name}' (from {file_path}):\n\n{content}"
        except Exception as e:
            return f"Error reading file '{file_path}': {e}"
        
    
    def _get_all_folders_recursive(self, root_path):
        """Get all folders recursively from root path"""
        all_folders = []
        restricted_extensions = {
            '.exe', '.dll', '.so', '.dylib', '.bin', '.app', '.deb', '.rpm',
            '.zip', '.tar', '.gz', '.rar', '.7z', '.bz2', '.xz',
            '.pyc', '.pyo', '.pyd', '.class', '.jar', '.war',
            '.db', '.sqlite', '.sqlite3', '.mdb', '.accdb'
        }

        skip_folders = {
            '.git', '.svn', '.hg', '.bzr', 'node_modules', 'bower_components', 'vendor', '.venv',
            '__pycache__', '.pytest_cache', '.coverage', 'venv', '.venv', 'env', '.env',
            'dist', 'build', 'target', 'out', 'bin', 'obj', '.vscode', '.idea', '.vs', '.eclipse',
            '.DS_Store', 'Thumbs.db', '.Trash', 'temp', 'tmp', 'cache', 'logs', '.cache', '.tmp',
            'packages', 'lib', 'libs', 'dependencies', 'generated', 'auto-generated', '.generated',
            'backup', 'backups', '.backup', 'coverage', '.nyc_output', 'htmlcov','docs/_build', 'site', '_site'
        }
        
        for root, dirs, files in os.walk(root_path):
            dirs[:] = [d for d in dirs if d not in skip_folders]
            
            root_name = os.path.basename(root)
            if root_name in skip_folders:
                continue
                
            has_restricted = any(os.path.splitext(file)[1].lower() in restricted_extensions for file in files)
            if has_restricted:
                continue

            all_folders.append(root)
                
        return all_folders

    def create_doc(self):
        folders = self._get_all_folders_recursive(self.root_folder)
        documents = []
        for folder in folders:
            doc_type = os.path.basename(folder)
            print(doc_type)
            loader = DirectoryLoader(
                folder, 
                glob=["**/*.py", "**/*.ipynb", "**/*.md", "**/*.txt", "**/*.json", "**/*.csv", "**/*.html", "**/*.css", "**/*.js", "**/*.ts", "**/*.java", "**/*.c", "**/*.cpp", "**/*.go", "**/*.rust", "**/*.scala", "**/*.swift", "**/*.kotlin"], 
                loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"}
            )
            folder_docs = loader.load()
            for doc in folder_docs:
                doc.metadata["doc_type"] = doc_type
                documents.append(doc)
        return documents
    
    def create_retriever(self):
        embeddings =  OpenAIEmbeddings()

        documents = self.create_doc()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = text_splitter.split_documents(documents)

        vectorstore = FAISS.from_documents(chunks, embeddings)

        return vectorstore.as_retriever(k=30)

    def create_agent(self):
        retriever = self.create_retriever()
        rag_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            retriever=retriever,
            chain_type="stuff"
        )

        rag_tool = Tool(
            name="repo_context_search",
            func=lambda q: rag_chain.run(q),
            description="Search the repository for specific code, functions, classes, or implementation details. Use this when the user asks about something specific in the codebase like 'what does this function do', 'explain this code', or 'how is X implemented'."
        )

        @tool("get_file", return_direct=False)
        def get_file(x):
            """
            Fetch the content of a specific file by name to analyze its code implementation.
            Use when user asks to 'explain this file', 'show me the code for X', or 'what's in this file'.
            If multiple files share the same name, list their paths instead.
            """
            return self.get_file(x)

        tools = [rag_tool, get_file]
        
        memory = ConversationBufferMemory(
            memory_key="chat_history", 
            input_key="input", 
            return_messages=True,
            system_message=self.system_prompt 
        )

        agent = initialize_agent(
            tools,
            self.llm,
            agent_type=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
            handle_parsing_errors=True,
            memory=memory,
            verbose=True
        )

        return agent

    def run(self, question, history=None):
        try:
            # Create agent only once
            if not self.agent:
                self.agent = self.create_agent()
            
            response = self.agent.stream(question)
            for chunk in response:
                if chunk.get("output"):
                    yield chunk["output"]
        except Exception as e:
            yield f"Error: {str(e)}"

            

In [None]:
root = os.path.abspath("../tourist-guide")
G = RepoIntelligentAssistant(root_path=root)

In [None]:
def chatInterface(fn, chat_type="messages"):
    with gr.Blocks(
        title="🤖 Intelligent Codebase Assistant",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
            margin: auto !important;
        }
        .chat-message {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        }
        """
    ) as ui:
        
        # Header Section
        with gr.Row():
            gr.Markdown("""
            # 🤖 Intelligent Codebase Assistant
            
            **Your AI-powered coding companion for understanding and improving codebases**
            
            ---
            """)
        
        # Main Chat Interface
        with gr.Row(height="77dvh", equal_height=True):
            with gr.Column(scale=4):
                gr.ChatInterface(
                    fn, 
                    type=chat_type,
                    title="Chat with your codebase",
                    description="Ask questions about your code, request explanations, or get improvement suggestions",
                    examples=[
                        "What is the codebase about?",
                        "Explain the key file in the codebase",
                        "Explain the architecture of the codebase",
                        "Suggest improvements for the codebase",
                        "What design patterns are used?"
                    ],
                    cache_examples=False,
                    fill_height=True
                )
            
            # Sidebar with info
            with gr.Column(scale=1):
                gr.Markdown("""
                ## 💡 Tips
                
                **Ask about:**
                - Specific functions or classes
                - Code explanations
                - Architecture questions
                - Improvement suggestions
                - Design patterns
                
                **Examples:**
                - "What does the login function do?"
                - "Explain this code line by line"
                - "How can I improve this function?"
                - "What's the overall architecture?"
                """)
                
                gr.Markdown("""
                ## 🛠️ Capabilities
                
                ✅ **Code Analysis**  
                ✅ **Line-by-line Explanations**  
                ✅ **Improvement Suggestions**  
                ✅ **Multi-language Support**  
                ✅ **Memory Persistence**  
                ✅ **Repository Context**  
                """)

    ui.launch(inbrowser=True)

In [None]:
interface = chatInterface(G.run, chat_type="messages")