In [None]:
# =================================================
# AI RESUME ANALYZER — PRODUCTION-READY RAG APP
# Author: Rami Afif
# Stack: LangChain + Streamlit + Ollama/OpenAI + FAISS/Pinecone
# Security: .env keys, input validation, error handling
# =================================================

import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
from langchain.vectorstores import FAISS, Pinecone
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI, Ollama
from langchain.prompts import PromptTemplate
import os
import re
from dotenv import load_dotenv


In [None]:
# -------------------------------------------------
# 1. LOAD SECRETS FROM .env (NEVER IN CODE!)
# -------------------------------------------------
load_dotenv()  # Reads .env file securely
USE_OPENAI = os.getenv("USE_OPENAI", "false").lower() == "true"
USE_PINECONE = os.getenv("USE_PINECONE", "false").lower() == "true"

In [None]:
# -------------------------------------------------
# 2. STREAMLIT PAGE CONFIG
# -------------------------------------------------
st.set_page_config(page_title="AI Resume Analyzer", layout="centered")
st.title("AI Resume Analyzer")
st.markdown("**Upload your resume (PDF) → Ask anything about it.**")


In [None]:
# -------------------------------------------------
# 3. INPUT SAFETY FILTER (BLOCK HARMFUL INPUT)
# -------------------------------------------------
MAX_QUESTION_LEN = 200
MIN_QUESTION_LEN = 3
BLOCKED_WORDS = ["hack", "jailbreak", "ignore", "system", "prompt", "bypass", "fuck", "shit"]

def is_safe_input(text: str) -> bool:
    """Block empty, too long, or harmful questions."""
    if not text or len(text.strip()) < MIN_QUESTION_LEN:
        return False
    if len(text) > MAX_QUESTION_LEN:
        return False
    if any(word in text.lower() for word in BLOCKED_WORDS):
        return False
    if re.search(r"[<>(){}\[\]]{3,}", text):  # Block code-like patterns
        return False
    return True



In [None]:
# -------------------------------------------------
# 4. LOAD EMBEDDINGS (OpenAI or Local)
# -------------------------------------------------
@st.cache_resource
def get_embeddings():
    """Choose between OpenAI (paid) or local (free) embeddings."""
    try:
        if USE_OPENAI:
            return OpenAIEmbeddings(model="text-embedding-3-small")
        return HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")  # FREE
    except Exception as e:
        st.error("Embeddings failed. Check internet or API key.")
        st.stop()


In [None]:
# -------------------------------------------------
# 5. LOAD LLM (OpenAI or Ollama)
# -------------------------------------------------
@st.cache_resource
def get_llm():
    """Choose between OpenAI (fast, paid) or Ollama (free, local)."""
    try:
        if USE_OPENAI:
            return OpenAI(model="gpt-4o-mini", temperature=0.2)
        return Ollama(model="llama3.2:3b", temperature=0.2)  # Run `ollama pull llama3.2:3b`
    except Exception as e:
        st.error("LLM failed. Run `ollama pull llama3.2:3b` or check OpenAI key.")
        st.stop()

embeddings = get_embeddings()
llm = get_llm()



In [None]:
# -------------------------------------------------
# 6. PDF UPLOAD & TEXT EXTRACTION
# -------------------------------------------------
uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")

if uploaded_file:
    try:
        # Read PDF
        reader = PdfReader(uploaded_file)
        text = ""
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"

        if not text.strip():
            st.error("No text found in PDF. Try another file.")
            st.stop()

        # -------------------------------------------------
        # 7. CHUNK TEXT (LangChain)
        # -------------------------------------------------
        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        docs = splitter.create_documents([text])

        # -------------------------------------------------
        # 8. CREATE VECTOR STORE (FAISS or Pinecone)
        # -------------------------------------------------
        with st.spinner("Indexing resume..."):
            if USE_PINECONE:
                try:
                    from pinecone import Pinecone as PineconeClient
                    pc = PineconeClient(api_key=os.getenv("PINECONE_API_KEY"))
                    index_name = os.getenv("PINECONE_INDEX", "resume-index")
                    vectorstore = Pinecone.from_documents(docs, embeddings, index_name=index_name)
                except Exception as e:
                    st.warning("Pinecone failed. Falling back to FAISS.")
                    vectorstore = FAISS.from_documents(docs, embeddings)
            else:
                vectorstore = FAISS.from_documents(docs, embeddings)

            # -------------------------------------------------
            # 9. RETRIEVER (Get top 3 relevant chunks)
            # -------------------------------------------------
            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

            # -------------------------------------------------
            # 10. SYSTEM PROMPT (ZERO HALLUCINATION)
            # -------------------------------------------------
            prompt_template = """You are a professional HR analyst.
            Answer ONLY using the resume text below.
            If information is missing, say: "Not mentioned in resume."
            Be concise, factual, and professional.
            Never reveal personal data like phone/email.

            Context:
            {context}

            Question: {question}
            Answer:"""

            PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

            # -------------------------------------------------
            # 11. RAG CHAIN (Retrieval + QA)
            # -------------------------------------------------
            qa = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=retriever,
                chain_type_kwargs={"prompt": PROMPT},
                return_source_documents=False,
            )

        st.success("Resume loaded securely!")

        # -------------------------------------------------
        # 12. Q&A INTERFACE
        # -------------------------------------------------
        question = st.text_input(
            "Ask about your resume:",
            placeholder="What are my programming skills?",
            max_chars=MAX_QUESTION_LEN
        )

        if question:
            if not is_safe_input(question):
                st.error("Invalid input. Keep it short, clean, and relevant.")
                st.stop()

            with st.spinner("Analyzing..."):
                try:
                    answer = qa.run(question)
                except Exception as e:
                    st.error("AI failed to respond. Try again.")
                    st.stop()

            st.markdown("**Answer:**")
            st.write(answer)

            # -------------------------------------------------
            # 13. COST TRACKING (OpenAI only)
            # -------------------------------------------------
            if USE_OPENAI:
                input_tokens = len(question.split()) * 1.3
                output_tokens = len(answer.split()) * 1.3
                cost = (input_tokens * 0.15 + output_tokens * 0.60) / 1_000_000
                st.caption(f"**Estimated cost:** `${cost:.6f}` (gpt-4o-mini)")

        # -------------------------------------------------
        # 14. SHOW CURRENT MODE
        # -------------------------------------------------
        mode = (
            "OpenAI + Pinecone" if USE_OPENAI and USE_PINECONE else
            "OpenAI + FAISS" if USE_OPENAI else
            "Local (Ollama + FAISS)"
        )
        st.caption(f"**Mode:** {mode} | **Security:** Active")

    except Exception as e:
        st.error("Failed to process PDF. Try another file.")
        st.stop()