<a href="https://colab.research.google.com/github/ashivashankars/Team_4Musketeers_capstone_project/blob/Archana_resume_AI_Agents_chatbot/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import gradio as gr
import json
import os
from openai import OpenAI
from google.colab import userdata

# Install pypdf if not already installed
try:
    from pypdf import PdfReader
except ImportError:
    !pip install pypdf
    from pypdf import PdfReader


# 1. SETUP & CONFIGURATION
# ---------------------------------------------------------
try:
    OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")
    client = OpenAI(api_key=OPENAI_API_KEY)
except Exception as e:
    print(f"Warning: API Key not found. {e}")
    client = None

# Defines the schema we want to extract
TARGET_SCHEMA = {
    "graduation_date": "NULL",
    "current_degree_major": "NULL",
    "current_degree_gpa": "NULL", # New field
    "us_citizenship": "NULL",
    "visa_type": "NULL", # Re-added field for visa type
    "programming_languages": "NULL", # New field, replaces technical_skills
    "experience_software": "NULL", # New field
    "tools_frameworks": "NULL", # New field
    "leadership": "NULL", # New field
    "job_preference": "NULL"
}

# 2. AGENT 1: THE EXTRACTOR
# ---------------------------------------------------------
def agent_extractor(file_path):
    """
    Reads PDF and uses LLM to extract initial JSON data.
    """
    # Step A: OCR / Text Extraction
    try:
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text() or ""
    except Exception as e:
        return {"error": f"PDF Read Error: {e}"}

    # Step B: LLM Extraction
    prompt = f"""
    You are a Resume Parser Agent. Extract the following fields from the resume text.
    Return ONLY valid JSON matching this structure exactly.
    Use the string "NULL" if the information is not explicitly found.
    For 'leadership', if found, output a list of dictionaries with 'role', 'organization', and 'description'. Otherwise, output 'NULL'.

    Target Structure:
    {json.dumps(TARGET_SCHEMA, indent=2)}

    RESUME TEXT:
    {text[:4000]}
    """

    try:
        response = client.chat.completions.create(
            model="gpt-4o", # or gpt-3.5-turbo
            messages=[{"role": "system", "content": "You are a JSON extractor."},
                      {"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        extracted_data = json.loads(response.choices[0].message.content)
        return extracted_data
    except Exception as e:
        print(f"Extraction Error: {e}")
        return TARGET_SCHEMA # Return empty schema on fail to prevent crash

# 3. AGENT 2: THE INTERVIEWER
# ---------------------------------------------------------
def agent_interviewer(current_data, user_response=None, current_field=None):
    """
    Analyzes data, updates with user response, and determines the next question.
    Returns: (updated_data, next_field_to_ask, question_text)
    """

    # 1. Update data if user provided an answer
    if user_response and current_field:
        current_data[current_field] = user_response

    # Special handling for us_citizenship = "no"
    if current_field == "us_citizenship" and user_response and user_response.lower() in ["no", "n", "false"]:
        if current_data.get("visa_type") in ["NULL", "", None, "null"]:
            current_data["visa_type"] = "NULL" # Explicitly mark as missing to ensure it's asked next
    # If user says yes to citizenship, ensure visa_type is not asked
    elif current_field == "us_citizenship" and user_response and user_response.lower() in ["yes", "y", "true"]:
        current_data["visa_type"] = "N/A" # Not applicable if US Citizen

    # 2. Find the next missing field
    next_field = None
    question = None

    # Priority order for questions
    fields_to_check = list(TARGET_SCHEMA.keys())

    for field in fields_to_check:
        val = current_data.get(field)
        # Check if value is missing (NULL, empty, or None)
        if val in ["NULL", "", None, "null"]:
            next_field = field

            # Generate a friendly question based on the field
            human_field = field.replace("_", " ").title()
            question = f"I noticed your resume is missing **{human_field}**. could you please provide that?"

            # Special phrasing for specific fields
            if field == "us_citizenship":
                question = "Are you a **US Citizen**? (Yes/No)"
            elif field == "graduation_date":
                question = "When is your expected **Graduation Date**?"
            elif field == "current_degree_gpa":
                question = "What is your **GPA** for your Current Degree Only?"
            elif field == "job_preference":
                question = "What is your specific **Job Preference** (e.g., Full-time / Internship / Both)?"
            elif field == "visa_type":
                question = "What is your **Visa Type** (e.g., H1B, F1-OPT)? Or if you don't need sponsorship, please state that."

            break # Stop at the first missing field

    return current_data, next_field, question

# 4. AGENT 3: THE FINALIZER
# ---------------------------------------------------------
def agent_finalizer(final_data):
    """
    Clean up data and generate a file for download.
    """
    # Here you could add an LLM call to normalize dates or format text standardly

    filename = "candidate_profile.json"
    with open(filename, "w") as f:
        json.dump(final_data, f, indent=4)

    return filename, f"**Interview Complete!**\n\nI have generated your profile.\n\n```json\n{json.dumps(final_data, indent=2)}\n```"

# 5. GRADIO ORCHESTRATOR (UI LOGIC)
# ---------------------------------------------------------

def process_upload(file, history, state):
    """Triggered when file is uploaded"""
    if not file:
        return history, state, None

    # Run Agent 1
    extracted_data = agent_extractor(file)

    # Run Agent 2 (Initial Check)
    updated_data, next_field, question = agent_interviewer(extracted_data)

    # Update State
    state["data"] = updated_data
    state["current_field"] = next_field

    # Update Chat UI
    history.append((None, "Resume parsed! Checking for missing info..."))
    if question:
        history.append((None, question))
        return history, state, None # Don't clear file_upload yet if more questions
    else:
        # If by magic the resume was perfect
        fname, msg = agent_finalizer(updated_data)
        history.append((None, msg))
        return history, state, fname # Clear file_upload if complete


def process_chat(user_msg, history, state):
    """Triggered when user types a message"""
    if not user_msg:
        return history, state, None

    # Get context from state
    data = state.get("data", TARGET_SCHEMA.copy())
    current_field = state.get("current_field")

    # Add user message to chat history
    history.append((user_msg, None))

    # Run Agent 2 (Update & Get Next Question)
    updated_data, next_field, question = agent_interviewer(data, user_msg, current_field)

    # Update State
    state["data"] = updated_data
    state["current_field"] = next_field

    # Check if we are done or need to ask more
    if next_field:
        history.append((None, question))
        return history, state, None
    else:
        # Run Agent 3 (Finalize)
        filename, final_msg = agent_finalizer(updated_data)
        history.append((None, final_msg))
        return history, state, filename

# 6. UI CONSTRUCTION
# ---------------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:

    # State stores the JSON data and the current field being asked
    state = gr.State({"data": {}, "current_field": None})

    gr.Markdown("# ðŸ¤– Agentic Resume Screener")
    gr.Markdown("Upload a resume. The agents will extract data and interview you for missing details.")

    with gr.Row():
        with gr.Column(scale=1):
            file_upload = gr.File(label="1. Upload Resume (PDF)", type="filepath")
            download_btn = gr.File(label="3. Download Profile", interactive=False)

        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500, label="2. Interview Agent", bubble_full_width=False)
            msg_input = gr.Textbox(label="Your Answer", placeholder="Type here and press enter...")

    # Event: File Upload
    file_upload.change(
        fn=process_upload,
        inputs=[file_upload, chatbot, state],
        outputs=[chatbot, state, download_btn]
    )

    # Event: User Chat
    msg_input.submit(
        fn=process_chat,
        inputs=[msg_input, chatbot, state],
        outputs=[chatbot, state, download_btn]
    ).then(
        lambda: "", outputs=msg_input # Clear box
    )

demo.launch(debug=True)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:
  chatbot = gr.Chatbot(height=500, label="2. Interview Agent", bubble_full_width=False)
  chatbot = gr.Chatbot(height=500, label="2. Interview Agent", bubble_full_width=False)
  chatbot = gr.Chatbot(height=500, label="2. Interview Agent", bubble_full_width=False)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://04104d6b194e209a34.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1133, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py",

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7863 <> https://04104d6b194e209a34.gradio.live


