In [None]:
!pip install -q fastapi uvicorn pyngrok gitpython transformers torch langchain langchain-community chromadb bitsandbytes nest_asyncio

# ============================
# 2️⃣ Imports
# ============================
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from pydantic import BaseModel
from pathlib import Path
import tempfile
import torch
from git import Repo
from typing import List, Optional

# LangChain imports
try:
    from langchain_community.document_loaders import DirectoryLoader, TextLoader
    from langchain_community.vectorstores import Chroma
    from langchain_community.llms import HuggingFacePipeline
except Exception:
    from langchain.document_loaders import DirectoryLoader, TextLoader
    from langchain.vectorstores import Chroma
    from langchain.llms import HuggingFacePipeline

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.embeddings.base import Embeddings
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, pipeline

# ============================
# 3️⃣ Embeddings
# ============================
class HFEncoder(Embeddings):
    def __init__(
        self,
        model_name: str = "BAAI/bge-small-en-v1.5",
        device: Optional[str] = None,
        batch_size: int = 16,
        normalize: bool = True,
        max_length: int = 512,
    ):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        self.model = AutoModel.from_pretrained(model_name).to(self.device)
        self.batch_size = batch_size
        self.normalize = normalize
        self.max_length = max_length
        self.doc_prefix = "passage: "
        self.query_prefix = "query: "

    def _mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output.last_hidden_state
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        summed = torch.sum(token_embeddings * input_mask_expanded, dim=1)
        counts = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9)
        return summed / counts

    def _encode_texts(self, texts: List[str]) -> List[List[float]]:
        vecs = []
        for i in range(0, len(texts), self.batch_size):
            batch = texts[i : i + self.batch_size]
            enc = self.tokenizer(batch, padding=True, truncation=True, max_length=self.max_length, return_tensors="pt").to(self.device)
            with torch.no_grad():
                out = self.model(**enc)
            pooled = self._mean_pooling(out, enc["attention_mask"])
            if self.normalize:
                pooled = torch.nn.functional.normalize(pooled, p=2, dim=1)
            vecs.extend(pooled.cpu().tolist())
        return vecs

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        texts = [self.doc_prefix + t for t in texts]
        return self._encode_texts(texts)

    def embed_query(self, text: str) -> List[float]:
        text = self.query_prefix + text
        return self._encode_texts([text])[0]

# ============================
# 4️⃣ LLM loader
# ============================
def get_llm():
    model_name = "Qwen/Qwen2.5-1.5B-Instruct"
    tok = AutoTokenizer.from_pretrained(model_name, use_fast=True)

    load_kwargs = {}
    if torch.cuda.is_available():
        try:
            load_kwargs = dict(device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True)
            import bitsandbytes as _bnb
            load_kwargs.update(dict(load_in_4bit=True))
        except Exception:
            pass
    else:
        load_kwargs = dict(device_map=None, torch_dtype=torch.float32)

    model = AutoModelForCausalLM.from_pretrained(model_name, **load_kwargs)
    gen_pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tok,
        max_new_tokens=700,
        do_sample=False,
        temperature=0.1,
        top_p=0.9,
        repetition_penalty=1.05,
        pad_token_id=tok.eos_token_id,
    )
    return HuggingFacePipeline(pipeline=gen_pipe), tok

# ============================
# 5️⃣ Prompts
# ============================
EXPLAIN_PROMPT = PromptTemplate(
    template=(
        "You are a helpful assistant that explains code clearly.\n\n"
        "User Question:\n{question}\n\n"
        "Relevant Code Snippets:\n{context}\n\n"
        "Instructions:\n"
        "- Explain what the code does in simple terms.\n"
        "- If asking about a whole file, cover purpose, key functions/components, and how it fits the app.\n"
        "- If asking about a specific function or component, explain its logic step-by-step.\n"
        "- Avoid just repeating the code; provide reasoning.\n\n"
        "Answer:"
    ),
    input_variables=["question", "context"],
)

# ============================
# 6️⃣ Repo utils
# ============================
def get_repo(user_input: str) -> str:
    if user_input.startswith("http") and "github.com" in user_input:
        tmp = tempfile.mkdtemp()
        Repo.clone_from(user_input, tmp)
        return tmp
    p = Path(user_input)
    if p.exists():
        return str(p.resolve())
    raise ValueError("❌ Invalid input! Provide a local path or a GitHub URL.")

def load_documents(repo_path: str):
    patterns = ["**/*.js", "**/*.jsx", "**/*.ts", "**/*.tsx", "**/*.py", "**/*.md"]
    docs = []
    for pat in patterns:
        loader = DirectoryLoader(repo_path, glob=pat, loader_cls=TextLoader, show_progress=True)
        try:
            docs.extend(loader.load())
        except Exception as e:
            print(f"⚠️ Skipped pattern {pat}: {e}")
    if not docs:
        raise ValueError("No documents found in repository.")
    return docs

def split_docs(documents):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1200,
        chunk_overlap=150,
        separators=["\n\n", "\n", " ", ""],
    )
    return splitter.split_documents(documents)

def build_db(docs):
    embeddings = HFEncoder(model_name="BAAI/bge-small-en-v1.5")
    db = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
    db.persist()
    return db

def build_explainer_qa(db, llm):
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        chain_type_kwargs={"prompt": EXPLAIN_PROMPT},
        return_source_documents=False,
    )
    return qa

def find_file_anywhere(repo_root: str, query_path: str) -> Optional[str]:
    q = query_path.strip().replace("\\", "/").lower()
    candidates = []
    for p in Path(repo_root).rglob("*"):
        if p.is_file():
            rel = str(p.relative_to(repo_root)).replace("\\", "/").lower()
            abs_p = str(p.resolve())
            if rel.endswith(q) or Path(rel).name == Path(q).name:
                candidates.append(abs_p)
    candidates.sort(key=lambda x: len(x))
    return candidates[0] if candidates else None

def chunk_text_by_tokens(text: str, tok: AutoTokenizer, max_context_tokens: int, reserve_for_prompt: int = 2000) -> List[str]:
    max_tokens = max(512, max_context_tokens - reserve_for_prompt)
    ids = tok.encode(text, add_special_tokens=False)
    chunks = []
    for i in range(0, len(ids), max_tokens):
        sub = ids[i: i + max_tokens]
        chunks.append(tok.decode(sub))
    return chunks

def explain_file(file_query: str, repo_root: str, llm: HuggingFacePipeline, tok: AutoTokenizer) -> str:
    target = find_file_anywhere(repo_root, file_query)
    if not target:
        return f"❌ File not found: {file_query}"

    with open(target, "r", encoding="utf-8", errors="ignore") as f:
        code = f.read()

    max_ctx = getattr(llm.pipeline.model.config, "max_position_embeddings", 32768)
    chunks = chunk_text_by_tokens(code, tok, max_context_tokens=max_ctx, reserve_for_prompt=1500)

    if len(chunks) == 1:
        prompt = (
            f"Explain the following file in detail.\n\n"
            f"File: {target}\n\n"
            f"{code}\n\n"
            f"Instructions:\n"
            f"- Purpose of the file and how it fits the project\n"
            f"- Key functions/components and their logic\n"
            f"- Any notable patterns, libraries, and data flow"
        )
        return llm(prompt)

    parts = []
    for idx, ch in enumerate(chunks, 1):
        prompt = (
            f"Explain PART {idx} of this file clearly (out of {len(chunks)} parts). "
            f"Focus on what this part does and how it connects to the rest.\n\n"
            f"File: {target}\n\n"
            f"{ch}\n\n"
            f"Return a concise explanation for this part."
        )
        parts.append(llm(prompt))

    synthesizer = (
        "You are given multiple part-wise explanations of a source file. "
        "Combine them into a single coherent explanation that reads like one document. "
        "Include: file purpose, key components/functions, data flow, and how it fits in the project.\n\n"
        "Parts:\n" + "\n\n".join(f"- Part {i+1}: {p}" for i, p in enumerate(parts))
    )
    final = llm(synthesizer)
    return f"📄 Explanation of {target}:\n\n{final}"

# ============================
# 7️⃣ FastAPI setup
# ============================
app = FastAPI(title="Codebase Explainer API", description="Upload repo and ask questions", version="1.0.0")
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])

@app.get("/", include_in_schema=False)
def root():
    return RedirectResponse(url="/docs")

class AskRequest(BaseModel):
    question: str

llm, tok = get_llm()
explainer_chain = None
db = None
repo_root = None

# ============================
# 8️⃣ Endpoints
# ============================
@app.post("/load_repo")
def load_repo_endpoint(repo_url: str):
    global db, explainer_chain, repo_root
    try:
        repo_root = get_repo(repo_url)
        documents = load_documents(repo_root)
        docs = split_docs(documents)
        db = build_db(docs)
        explainer_chain = build_explainer_qa(db, llm)
        return {"status": "success", "repo_root": repo_root}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.post("/ask")
def ask_question(req: AskRequest):
    if not explainer_chain or not repo_root:
        raise HTTPException(status_code=400, detail="Repo not loaded yet. Call /load_repo first.")
    q = req.question.strip()
    if q.lower().startswith("explain file"):
        parts = q.split(" ", 2)
        if len(parts) < 3:
            raise HTTPException(status_code=400, detail="Usage: explain file <relative/or/partial/path>")
        file_query = parts[2]
        ans = explain_file(file_query, repo_root, llm, tok)
    else:
        ans = explainer_chain.run(q)
    return {"answer": ans}

@app.get("/health")
def health():
    return {"status": "ok"}

# ============================
# 9️⃣ Run FastAPI via ngrok
# ============================
nest_asyncio.apply()

# 🔑 Set your ngrok token (replace with your actual token)
ngrok.set_auth_token("32QsCHAh1caySQL0nyEFubn3CmG_81behFVFxpRx3b5xHG8Vc")

# Start a tunnel on port 8000
port = 8000
public_url = ngrok.connect(port)

print(f"🔗 Public URL: {public_url}/api (Swagger UI)")

uvicorn.run(app, host="0.0.0.0", port=port)


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-1' coro=<Server.serve() done, defined at /usr/local/lib/python3.12/dist-packages/uvicorn/server.py:69> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/server.py", line 67, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run
    se

🔗 Public URL: NgrokTunnel: "https://5dbddf6dfd40.ngrok-free.app" -> "http://localhost:8000"/api (Swagger UI)


INFO:     Started server process [304]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     39.49.113.139:0 - "GET / HTTP/1.1" 307 Temporary Redirect
INFO:     39.49.113.139:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "POST /docs/load_repo HTTP/1.1" 404 Not Found


100%|██████████| 4/4 [00:00<00:00, 3116.70it/s]
100%|██████████| 12/12 [00:00<00:00, 6429.69it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 2603.54it/s]


INFO:     39.49.113.139:0 - "POST /load_repo?repo_url=https%3A%2F%2Fgithub.com%2Fhassan-jamshaid10%2FPortfolio-Fullstack.git HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "POST /load_repo HTTP/1.1" 422 Unprocessable Entity
INFO:     39.49.113.139:0 - "POST /load_repo HTTP/1.1" 422 Unprocessable Entity
INFO:     39.49.113.139:0 - "POST /load_repo HTTP/1.1" 422 Unprocessable Entity
INFO:     39.49.113.139:0 - "POST /load_repo HTTP/1.1" 422 Unprocessable Entity


100%|██████████| 4/4 [00:00<00:00, 1690.06it/s]
100%|██████████| 12/12 [00:00<00:00, 6367.87it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 1216.09it/s]


INFO:     39.49.113.139:0 - "POST /load_repo?repo_url=https://github.com/hassan-jamshaid10/Portfolio-Fullstack.git HTTP/1.1" 200 OK


100%|██████████| 4/4 [00:00<00:00, 1656.19it/s]
100%|██████████| 12/12 [00:00<00:00, 6096.37it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 2774.01it/s]


INFO:     39.49.113.139:0 - "POST /load_repo?repo_url=https%3A%2F%2Fgithub.com%2Fhassan-jamshaid10%2FPortfolio-Fullstack.git HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "OPTIONS /ask HTTP/1.1" 200 OK


  ans = explainer_chain.run(q)


INFO:     39.49.113.139:0 - "POST /ask HTTP/1.1" 200 OK


100%|██████████| 4/4 [00:00<00:00, 2037.31it/s]
100%|██████████| 12/12 [00:00<00:00, 5209.78it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 1515.83it/s]


INFO:     39.49.113.139:0 - "POST /load_repo?repo_url=https%3A%2F%2Fgithub.com%2Fhassan-jamshaid10%2FPortfolio-Fullstack.git HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "POST /ask HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "OPTIONS /ask HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "POST /ask HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "POST /ask HTTP/1.1" 200 OK
INFO:     39.49.113.139:0 - "POST /ask HTTP/1.1" 200 OK
