In [None]:
!pip install requests beautifulsoup4 gradio PyMuPDF pypdf groq


In [None]:
!pip install pypdf
!pip install groq
!pip install gradio
!pip install fitz
!pip install pypdf2

In [None]:
import unicodedata
import fitz  # PyMuPDF
from pypdf import PdfReader
from groq import Groq


In [None]:
# === PART 1: URL Scraper + JD Extractor ===
import requests
from bs4 import BeautifulSoup
import json

# 🔐 Config
GROQ_API_KEY = "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"
GROQ_MODEL = "llama3-8b-8192"

# 🌐 Universal Scraper
def scrape_job_page_generic(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
    except Exception as e:
        return {"error": f"Failed to fetch page: {str(e)}"}

    soup = BeautifulSoup(response.content, "html.parser")
    body_text = soup.body.get_text(separator="\n", strip=True) if soup.body else "No body text found"
    return {"content": body_text}

# 🤖 Clean with LLaMA3
def clean_with_llama3(raw_data):
    prompt = f"""
You are a smart job information extractor.

From the below raw text scraped from a job detail page, extract the following fields clearly:
- "Role"
- "Job Description"
- "Qualification"
- "Locations"
- "Additional Information"
- "About"
- "Important Notice"

Output the result in this format (line-by-line):

"Role": ...
"Job Description": ...
"Qualification": ...
"Locations": ...
"Additional Information": ...
"About": ...
"Important Notice": ...

If some data is not available, just write "Not found".

Raw Scraped Content:
{json.dumps(raw_data, indent=2)}
    """

    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": GROQ_MODEL,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that extracts job content into labeled fields."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.3
    }

    try:
        response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        return f"Error during LLM call: {str(e)}"

# 🚀 Run on URL
def extract_job_details_from_url(url):
    scraped = scrape_job_page_generic(url)
    if "error" in scraped:
        return scraped["error"]
    return clean_with_llama3(scraped)

# ✅ Example
print(extract_job_details_from_url("https://www.accenture.com/in-en/careers/jobdetails?id=ATCI-4995350-S1864578_en&title=Software+Development+Lead"))


In [None]:
!pip install pypdf
!pip install groq
!pip install gradio
!pip install fitz
!pip install pypdf2

In [None]:
# === PART 2: PDF/TXT Extractor + Cleaner + Rewriter ===
import os
import re
import unicodedata
#import fitz  # PyMuPDF
from pypdf import PdfReader
from groq import Groq
import zipfile

# 🔐 API Setup
os.environ["GROQ_API_KEY"] ="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"
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# 📄 Extract text from file
def extract_text_from_path(file_path):
    try:
        if file_path.endswith('.pdf'):
            text = ""
            with fitz.open(file_path) as doc:
                for page in doc:
                    text += page.get_text()
            if text.strip():
                return text.strip()

            reader = PdfReader(file_path)
            fallback = "\n".join(page.extract_text() or "" for page in reader.pages)
            return fallback.strip()
        elif file_path.endswith('.txt'):
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read().strip()
    except Exception as e:
        return f"Error: {e}"
    return "Unsupported file"

# 🧼 Clean text
def sanitize_text(text):
    clean = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
    clean = re.sub(r'\s+', ' ', clean)
    return clean.strip()

# 🤖 Rewrite JD using LLM
def rewrite_jd_with_llm(jd_text):
    prompt = f"""
You are a skilled HR content writer.

Your job is to rewrite the JD below:
- Professional & clear
- ATS-friendly
- Structured using markdown (**bold**, *italic*, - bullet points, etc.)

--- JD START ---
{jd_text}
--- JD END ---

Rewrite now:
"""
    try:
        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"LLM Error: {e}"

# 🧾 Handle single file
def handle_single(file):
    if file is None:
        return "No file", "No cleaned", "No rewrite", None

    extracted = extract_text_from_path(file.name)
    cleaned = sanitize_text(extracted)
    rewritten = rewrite_jd_with_llm(cleaned)

    out_path = file.name.replace(".pdf", "_rewritten.md").replace(".txt", "_rewritten.md")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(rewritten)

    return extracted[:1000], cleaned[:1000], rewritten[:1500], out_path

# 📦 Handle multiple files
def handle_multiple(files):
    results = []
    zipf = zipfile.ZipFile("All_Rewritten_JDs.zip", "w", zipfile.ZIP_DEFLATED)

    for file in files:
        fname = os.path.basename(file.name)
        raw = extract_text_from_path(file.name)
        cleaned = sanitize_text(raw)
        rewritten = rewrite_jd_with_llm(cleaned)

        rewritten_path = f"rewritten_{fname}.md"
        with open(rewritten_path, "w", encoding="utf-8") as f:
            f.write(rewritten)

        zipf.write(rewritten_path)
        results.append((fname, raw[:600], cleaned[:600], rewritten[:1000], rewritten_path))

    zipf.close()
    return results, "All_Rewritten_JDs.zip"


## FINAL CODE

In [None]:
import gradio as gr

# 🎨 Final Custom CSS
custom_css = """

@keyframes dash-light {
  0% {
    border-color: #b6d0e2;
    box-shadow: 0 0 3px #b6d0e2;
  }
  50% {
    border-color: #ffb6c1;
    box-shadow: 0 0 6px #ffb6c1;
  }
  100% {
    border-color: #b6d0e2;
    box-shadow: 0 0 3px #b6d0e2;
  }
}

#title-box {
    border: 2px dashed #b6d0e2;
    border-radius: 4px;
    padding: 10px 15px;
    background-color: #fff7fb;
    text-align: center;
    font-weight: 600;
    font-size: 18px;
    color: #ff1493;  /* 🌸 Pink text */
    margin: 20px auto;
    width: 70%;
    animation: dash-light 6s infinite ease-in-out;
}





/* Optional entire background */
body {
    background-color: #b6d0e2 !important;
}



@keyframes animatedGradient {
    0% { background-position: 0% 50%; }
    50% { background-position: 100% 50%; }
    100% { background-position: 0% 50%; }
}
body {
    background: linear-gradient(-45deg, #f8e1ff, #fdf4ff, #ffe6f0, #f3f0ff);
    background-size: 400% 400%;
    animation: animatedGradient 15s ease infinite;
}
h1, h2, h3, .prose h2 {
    color: #ff69b4;
    font-weight: bold;
    text-align: center;
    animation: fadeIn 1s ease;
}
/* 🟣 Purple Tab Borders Only */
button[role="tab"] {
    background-color: transparent; /* No background fill */
    color: #22c55e;               /* Optional: Text matches border */
    border: 2px solid #22c55e;    /* Border in purple */
    font-weight: bold;
    border-radius: 10px;
    margin: 5px;
    padding: 6px 12px;            /* Optional: Adds some space inside button */
}
button[role="tab"]:hover {
    transform: scale(1.05);
}
button.process-btn {
    background-color: #3b82f6;
    color: white;
    font-weight: bold;
    border-radius: 10px;
    padding: 10px 20px;
    transition: all 0.3s ease-in-out;
}
button.process-btn:hover {
    background-color: #4ade80;
    box-shadow: 0 0 10px #4ade80;
    transform: scale(1.05);
}
.upload-box {
    border: 2px solid #8a2be2;
    border-radius: 12px;
    padding: 15px;
    background-color: #f9f7ff;
    margin-bottom: 15px;
    transition: box-shadow 0.3s ease;
}
.upload-box:hover {
    box-shadow: 0 0 15px #a78bfa;
}

/* ✅ GREEN LABELS ONLY for Single JD tab */
.single-jd label, .single-jd span {
    color: #22c55e !important;
    font-weight: bold;
}

/* ⛔ Don't touch Multiple JD tab */
textarea, .gr-textbox {
    border: 2px solid #8a2be2 !important;
    border-radius: 12px;
    background-color: #fdfdff;
}
footer, #footer {
    background-color: #ffe6f0;
    color: #ff1493;
    font-weight: bold;
    text-align: center;
    border-radius: 12px;
   /* 🌈 Animated Gradient for Header Row in Dataframe */
th {
  background: linear-gradient(-45deg, #93c5fd, #bfdbfe, #dbeafe);
  background-size: 600% 600%;
  animation: gradientFlow 8s ease infinite;
  color: #1d4ed8 !important;
  font-weight: bold;
  text-align: center;
}

/* 💙 Blue Border Around Each Row in Dataframe */
tr td {
  border: 2px solid #3b82f6;
  background-color: #f0f9ff !important;
  padding: 8px !important;
  vertical-align: top;
}

/* 🔄 Animation */
@keyframes gradientFlow {
  0% { background-position: 0% 50%; }
  50% { background-position: 100% 50%; }
  100% { background-position: 0% 50%; }
}

"""

# ✅ Final UI Layout
with gr.Blocks(css=custom_css) as app:
    with gr.Column(elem_classes=["app-border"]):
        gr.HTML('<div id="title-box">JD Rewriter + File Upload + LLaMA3 </div>')

        with gr.Tabs():
            ...
            # ✅ SINGLE JD SECTION - GREEN
            with gr.Tab("📄 Single JD"):
                with gr.Column(elem_classes=["upload-box", "single-jd"]):
                    file_in = gr.File(label="Upload JD", file_types=[".pdf", ".txt"])
                    go_btn = gr.Button("✨ Process JD", elem_classes=["process-btn"])

                with gr.Column(elem_classes=["single-jd"]):
                    raw = gr.Textbox(label="📝 Raw Extracted", lines=6, elem_id="raw")
                clean = gr.Textbox(label="🧼 Cleaned", lines=6, elem_id="clean")
                final = gr.Textbox(label="📝 Rewritten JD", lines=10, elem_id="final")


                go_btn.click(handle_single, inputs=file_in, outputs=[raw, clean, final, download])

            # ✅ MULTIPLE JD SECTION - DEFAULT BLACK
            with gr.Tab("📁 Multiple JDs"):
                with gr.Column(elem_classes=["upload-box"]):
                    multi_in = gr.File(label="Upload JDs", file_types=[".pdf", ".txt"], file_count="multiple")
                    multi_btn = gr.Button("🚀 Process All JDs", elem_classes=["process-btn"])

                output_df = gr.Dataframe(headers=["File", "Raw", "Cleaned", "Rewritten", "Download"], wrap=True)
                zip_download = gr.File(label="⬇ Download All in ZIP")

                def run_multi(files):
                    data, zipfile_path = handle_multiple(files)
                    return [[d[0], d[1], d[2], d[3], d[4]] for d in data], zipfile_path

                multi_btn.click(run_multi, inputs=multi_in, outputs=[output_df, zip_download])


# ✅ Launch app
app.launch()


In [None]:
#new

In [None]:
# === PART 2: PDF/TXT Extractor + Cleaner + Rewriter ===
import os
import re
import unicodedata
#import fitz  # PyMuPDF
from pypdf import PdfReader
from groq import Groq
import zipfile

# 🔐 API Setup
os.environ["GROQ_API_KEY"] ="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"
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# 📄 Extract text from file
def extract_text_from_path(file_path):
    try:
        if file_path.endswith('.pdf'):
            text = ""
            with fitz.open(file_path) as doc:
                for page in doc:
                    text += page.get_text()
            if text.strip():
                return text.strip()

            reader = PdfReader(file_path)
            fallback = "\n".join(page.extract_text() or "" for page in reader.pages)
            return fallback.strip()
        elif file_path.endswith('.txt'):
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read().strip()
    except Exception as e:
        return f"Error: {e}"
    return "Unsupported file"

# 🧼 Clean text
def sanitize_text(text):
    clean = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
    clean = re.sub(r'\s+', ' ', clean)
    return clean.strip()

# 🤖 Rewrite JD using LLM
def rewrite_jd_with_llm(jd_text):
    prompt = f"""
You are a skilled HR content writer.

Your job is to rewrite the JD below:
- Professional & clear
- ATS-friendly
- Structured using markdown (**bold**, *italic*, - bullet points, etc.)

--- JD START ---
{jd_text}
--- JD END ---

Rewrite now:
"""
    try:
        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"LLM Error: {e}"

# 🧾 Handle single file
def handle_single(file):
    if file is None:
        return "No file", "No cleaned", "No rewrite", None

    extracted = extract_text_from_path(file.name)
    cleaned = sanitize_text(extracted)
    rewritten = rewrite_jd_with_llm(cleaned)

    out_path = file.name.replace(".pdf", "_rewritten.md").replace(".txt", "_rewritten.md")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(rewritten)

    return extracted[:1000], cleaned[:1000], rewritten[:1500], out_path

# 📦 Handle multiple files
def handle_multiple(files):
    results = []
    zipf = zipfile.ZipFile("All_Rewritten_JDs.zip", "w", zipfile.ZIP_DEFLATED)

    for file in files:
        fname = os.path.basename(file.name)
        raw = extract_text_from_path(file.name)
        cleaned = sanitize_text(raw)
        rewritten = rewrite_jd_with_llm(cleaned)

        rewritten_path = f"rewritten_{fname}.md"
        with open(rewritten_path, "w", encoding="utf-8") as f:
            f.write(rewritten)

        zipf.write(rewritten_path)
        results.append((fname, raw[:600], cleaned[:600], rewritten[:1000], rewritten_path))

    zipf.close()
    return results, "All_Rewritten_JDs.zip"


In [None]:
import gradio as gr

# 🎨 Final Custom CSS
custom_css = """

@keyframes dash-light {
  0% {
    border-color: #b6d0e2;
    box-shadow: 0 0 3px #b6d0e2;
  }
  50% {
    border-color: #ffb6c1;
    box-shadow: 0 0 6px #ffb6c1;
  }
  100% {
    border-color: #b6d0e2;
    box-shadow: 0 0 3px #b6d0e2;
  }
}

#title-box {
    border: 2px dashed #b6d0e2;
    border-radius: 4px;
    padding: 10px 15px;
    background-color: #fff7fb;
    text-align: center;
    font-weight: 600;
    font-size: 18px;
    color: #ff1493;  /* 🌸 Pink text */
    margin: 20px auto;
    width: 70%;
    animation: dash-light 6s infinite ease-in-out;
}





/* Optional entire background */
body {
    background-color: #b6d0e2 !important;
}



@keyframes animatedGradient {
    0% { background-position: 0% 50%; }
    50% { background-position: 100% 50%; }
    100% { background-position: 0% 50%; }
}
body {
    background: linear-gradient(-45deg, #f8e1ff, #fdf4ff, #ffe6f0, #f3f0ff);
    background-size: 400% 400%;
    animation: animatedGradient 15s ease infinite;
}
h1, h2, h3, .prose h2 {
    color: #ff69b4;
    font-weight: bold;
    text-align: center;
    animation: fadeIn 1s ease;
}
/* 🟣 Purple Tab Borders Only */
button[role="tab"] {
    background-color: transparent; /* No background fill */
    color: #22c55e;               /* Optional: Text matches border */
    border: 2px solid #22c55e;    /* Border in purple */
    font-weight: bold;
    border-radius: 10px;
    margin: 5px;
    padding: 6px 12px;            /* Optional: Adds some space inside button */
}
button[role="tab"]:hover {
    transform: scale(1.05);
}
button.process-btn {
    background-color: #3b82f6;
    color: white;
    font-weight: bold;
    border-radius: 10px;
    padding: 10px 20px;
    transition: all 0.3s ease-in-out;
}
button.process-btn:hover {
    background-color: #4ade80;
    box-shadow: 0 0 10px #4ade80;
    transform: scale(1.05);
}
.upload-box {
    border: 2px solid #8a2be2;
    border-radius: 12px;
    padding: 15px;
    background-color: #f9f7ff;
    margin-bottom: 15px;
    transition: box-shadow 0.3s ease;
}
.upload-box:hover {
    box-shadow: 0 0 15px #a78bfa;
}

/* ✅ GREEN LABELS ONLY for Single JD tab */
.single-jd label, .single-jd span {
    color: #22c55e !important;
    font-weight: bold;
}

/* ⛔ Don't touch Multiple JD tab */
textarea, .gr-textbox {
    border: 2px solid #8a2be2 !important;
    border-radius: 12px;
    background-color: #fdfdff;
}
footer, #footer {
    background-color: #ffe6f0;
    color: #ff1493;
    font-weight: bold;
    text-align: center;
    border-radius: 12px;
   /* 🌈 Animated Gradient for Header Row in Dataframe */
th {
  background: linear-gradient(-45deg, #93c5fd, #bfdbfe, #dbeafe);
  background-size: 600% 600%;
  animation: gradientFlow 8s ease infinite;
  color: #1d4ed8 !important;
  font-weight: bold;
  text-align: center;
}

/* 💙 Blue Border Around Each Row in Dataframe */
tr td {
  border: 2px solid #3b82f6;
  background-color: #f0f9ff !important;
  padding: 8px !important;
  vertical-align: top;
}

/* 🔄 Animation */
@keyframes gradientFlow {
  0% { background-position: 0% 50%; }
  50% { background-position: 100% 50%; }
  100% { background-position: 0% 50%; }
}
/* 🔄 Spinner animation on processing */
.loading::after {
  content: "⏳ Processing...";
  color: #8a2be2;
  font-weight: bold;
  margin-left: 10px;
  animation: blink 1s infinite;
}

@keyframes blink {
  0%, 100% { opacity: 1; }
  50% { opacity: 0.4; }
}


"""

# ✅ Final UI Layout
with gr.Blocks(css=custom_css) as app:
    with gr.Column(elem_classes=["app-border"]):
        gr.HTML('<div id="title-box">JD Rewriter + File Upload + LLaMA3 </div>')

        with gr.Tabs():
            ...
            # ✅ SINGLE JD SECTION - GREEN
            with gr.Tab("📄 Single JD"):
                with gr.Column(elem_classes=["upload-box", "single-jd"]):
                    file_in = gr.File(label="Upload JD", file_types=[".pdf", ".txt"])
                    go_btn = gr.Button("✨ Process JD", elem_classes=["process-btn"])

                with gr.Column(elem_classes=["single-jd"]):
                    raw = gr.Textbox(label="📝 Raw Extracted", lines=6, elem_id="raw")
                clean = gr.Textbox(label="🧼 Cleaned", lines=6, elem_id="clean")
                final = gr.Textbox(label="📝 Rewritten JD", lines=10, elem_id="final")


                go_btn.click(handle_single, inputs=file_in, outputs=[raw, clean, final, download])

            # ✅ MULTIPLE JD SECTION - DEFAULT BLACK
            with gr.Tab("📁 Multiple JDs"):
                    with gr.Column(elem_classes=["upload-box"]):
                     multi_btn = gr.Button("Process All JDs", elem_classes=["process-btn"])

                     output_df = gr.Dataframe(headers=["File", "Raw", "Cleaned", "Rewritten", "Download"], wrap=True)
                     zip_download = gr.File(label="⬇ Download All in ZIP")

            def run_multi(files):
                    data, zipfile_path = handle_multiple(files)
                    return [[d[0], d[1], d[2], d[3], d[4]] for d in data], zipfile_path

                    multi_btn.click(run_multi, inputs=multi_in, outputs=[output_df, zip_download])


# ✅ Launch app
app.launch()


correct code to final code for jdddd

In [None]:
!pip install requests beautifulsoup4 gradio PyMuPDF pypdf groq pypdf2

In [None]:
# === PART 1: URL Scraper + JD Extractor ===

# Set Groq API Key securely from Colab's userdata (better than hardcoding)
os.environ["GROQ_API_KEY"] = userdata.get("GROQ_API_KEY")   # Secure API key access
GROQ_API_KEY = os.getenv("GROQ_API_KEY")                    #  Load key from environment
GROQ_MODEL = "llama3-8b-8192"                               #  Use LLaMA3 8B model for extraction

# 🌐 Function to scrape any job page URL and extract full body text
def scrape_job_page_generic(url):
    headers = {"User-Agent": "Mozilla/5.0"}  # Spoof browser user-agent
    try:
        response = requests.get(url, headers=headers, timeout=15)  # Make HTTP GET request
        response.raise_for_status()  # Raise error for bad status codes (4xx, 5xx)
    except Exception as e:
        return {"error": f"Failed to fetch page: {str(e)}"}  #  Return error if fetch fails

    soup = BeautifulSoup(response.content, "html.parser")  # Parse HTML content
    body_text = soup.body.get_text(separator="\n", strip=True) if soup.body else "No body text found"  #  Extract visible text
    return {"content": body_text}  # Return extracted content in dict

#  Use LLaMA3 to clean and extract structured job fields from raw scraped text
def clean_with_llama3(raw_data):
    prompt = f"""
You are a smart job information extractor.

From the below raw text scraped from a job detail page, extract the following fields clearly:
- "Role"
- "Job Description"
- "Qualification"
- "Locations"
- "Additional Information"
- "About"
- "Important Notice"

Output the result in this format (line-by-line):

"Role": ...
"Job Description": ...
"Qualification": ...
"Locations": ...
"Additional Information": ...
"About": ...
"Important Notice": ...

If some data is not available, just write "Not found".

Raw Scraped Content:
{json.dumps(raw_data, indent=2)}
    """  # 📋 Prompt tells the LLM how to extract clean fields

    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",  #  Auth header
        "Content-Type": "application/json"          #  Set content type for JSON API
    }

    payload = {
        "model": GROQ_MODEL,  #  Use selected LLaMA3 model
        "messages": [         # Chat-style messages (system + user prompt)
            {"role": "system", "content": "You are a helpful assistant that extracts job content into labeled fields."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.3    # Low temperature for accurate, stable output
    }

    try:
        response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)  #  Send request to Groq LLM
        response.raise_for_status()  #  Raise error if API fails
        return response.json()["choices"][0]["message"]["content"].strip()  # Return cleaned output
    except Exception as e:
        return f"Error during LLM call: {str(e)}"  #  Return error string if call fails

# 🔄 Wrapper function to combine scraping and cleaning steps
def extract_job_details_from_url(url):
    scraped = scrape_job_page_generic(url)        #  Scrape raw job content
    if "error" in scraped:                         #  Handle errors in scraping
        return scraped["error"]
    return clean_with_llama3(scraped)              #  Clean and extract fields

# ✅ Example Run
print(extract_job_details_from_url("https://www.accenture.com/in-en/careers/jobdetails?id=ATCI-4995350-S1864578_en&title=Software+Development+Lead"))  # 🧪 Test with a real job URL


In [None]:
# === PART 2: PDF/TXT Extractor + Cleaner + Rewriter ===
# API Setup
#os.environ["GROQ_API_KEY"] ="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"
client = Groq(api_key=os.environ["GROQ_API_KEY"])

#  Extract text from file
def extract_text_from_path(file_path):
    try:
        if file_path.endswith('.pdf'):
            text = ""
            with pymupdf.open(file_path) as doc:
                for page in doc:
                    text += page.get_text()
            if text.strip():
                return text.strip()

            reader = PdfReader(file_path)
            fallback = "\n".join(page.extract_text() or "" for page in reader.pages)
            return fallback.strip()
        elif file_path.endswith('.txt'):
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read().strip()
    except Exception as e:
        return f"Error: {e}"
    return "Unsupported file"

#  Clean text
def sanitize_text(text):
    clean = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
    clean = re.sub(r'\s+', ' ', clean)
    return clean.strip()

#  Rewrite JD using LLM
def rewrite_jd_with_llm(jd_text):
    prompt = f"""
You are a skilled HR content writer.

Your job is to rewrite the JD below:
- Professional & clear
- ATS-friendly
- Structured using markdown (**bold**, *italic*, - bullet points, etc.)

--- JD START ---
{jd_text}
--- JD END ---

Rewrite now:
"""
    try:
        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"LLM Error: {e}"

# 🧾 Handle single file
def handle_single(file):
    if file is None:
        return "No file", "No cleaned", "No rewrite", None

    extracted = extract_text_from_path(file.name)
    cleaned = sanitize_text(extracted)
    rewritten = rewrite_jd_with_llm(cleaned)

    out_path = file.name.replace(".pdf", "_rewritten.md").replace(".txt", "_rewritten.md")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(rewritten)

    return extracted[:1000], cleaned[:1000], rewritten[:1500], out_path

#  Handle multiple files
def handle_multiple(files):
    results = []
    zipf = zipfile.ZipFile("All_Rewritten_JDs.zip", "w", zipfile.ZIP_DEFLATED)

    for file in files:
        fname = os.path.basename(file.name)
        raw = extract_text_from_path(file.name)
        cleaned = sanitize_text(raw)
        rewritten = rewrite_jd_with_llm(cleaned)

        rewritten_path = f"rewritten_{fname}.md"
        with open(rewritten_path, "w", encoding="utf-8") as f:
            f.write(rewritten)

        zipf.write(rewritten_path)
        results.append((fname, raw[:600], cleaned[:600], rewritten[:1000], rewritten_path))

    zipf.close()
    return results, "All_Rewritten_JDs.zip"


In [None]:
import gradio as gr

# 🎨 Final Custom CSS
custom_css = """

@keyframes dash-light {
  0% {
    border-color: #b6d0e2;
    box-shadow: 0 0 3px #b6d0e2;
  }
  50% {
    border-color: #ffb6c1;
    box-shadow: 0 0 6px #ffb6c1;
  }
  100% {
    border-color: #b6d0e2;
    box-shadow: 0 0 3px #b6d0e2;
  }
}

#title-box {
    border: 2px dashed #b6d0e2;
    border-radius: 4px;
    padding: 10px 15px;
    background-color: #fff7fb;
    text-align: center;
    font-weight: 600;
    font-size: 18px;
    color: #ff1493;  /* Pink text */
    margin: 20px auto;
    width: 70%;
    animation: dash-light 6s infinite ease-in-out;
}





/* Optional entire background */
body {
    background-color: #b6d0e2 !important;
}



@keyframes animatedGradient {
    0% { background-position: 0% 50%; }
    50% { background-position: 100% 50%; }
    100% { background-position: 0% 50%; }
}
body {
    background: linear-gradient(-45deg, #f8e1ff, #fdf4ff, #ffe6f0, #f3f0ff);
    background-size: 400% 400%;
    animation: animatedGradient 15s ease infinite;
}
h1, h2, h3, .prose h2 {
    color: #ff69b4;
    font-weight: bold;
    text-align: center;
    animation: fadeIn 1s ease;
}
/* Purple Tab Borders Only */
button[role="tab"] {
    background-color: transparent; /* No background fill */
    color: #22c55e;               /* Optional: Text matches border */
    border: 2px solid #22c55e;    /* Border in purple */
    font-weight: bold;
    border-radius: 10px;
    margin: 5px;
    padding: 6px 12px;            /* Optional: Adds some space inside button */
}
button[role="tab"]:hover {
    transform: scale(1.05);
}
button.process-btn {
    background-color: #3b82f6;
    color: white;
    font-weight: bold;
    border-radius: 10px;
    padding: 10px 20px;
    transition: all 0.3s ease-in-out;
}
button.process-btn:hover {
    background-color: #4ade80;
    box-shadow: 0 0 10px #4ade80;
    transform: scale(1.05);
}
.upload-box {
    border: 2px solid #8a2be2;
    border-radius: 12px;
    padding: 15px;
    background-color: #f9f7ff;
    margin-bottom: 15px;
    transition: box-shadow 0.3s ease;
}
.upload-box:hover {
    box-shadow: 0 0 15px #a78bfa;
}

/* ✅ GREEN LABELS ONLY for Single JD tab */
.single-jd label, .single-jd span {
    color: #22c55e !important;
    font-weight: bold;
}

/* Don't touch Multiple JD tab */
textarea, .gr-textbox {
    border: 2px solid #8a2be2 !important;
    border-radius: 12px;
    background-color: #fdfdff;
}
footer, #footer {
    background-color: #ffe6f0;
    color: #ff1493;
    font-weight: bold;
    text-align: center;
    border-radius: 12px;
}
"""

# ✅ Final UI Layout
with gr.Blocks(css=custom_css) as app:
    with gr.Column(elem_classes=["app-border"]):
        gr.HTML('<div id="title-box">JD PARSER FROM PDFS </div>')

        with gr.Tabs():
            ...
            # ✅ SINGLE JD SECTION - GREEN
            with gr.Tab("📄 Single JD"):
                with gr.Column(elem_classes=["upload-box", "single-jd"]):
                    file_in = gr.File(label="Upload JD", file_types=[".pdf", ".txt"])
                    go_btn = gr.Button("✨ Process JD", elem_classes=["process-btn"])

                with gr.Column(elem_classes=["single-jd"]):
                    raw = gr.Textbox(label="📝 Raw Extracted", lines=6, elem_id="raw")
                clean = gr.Textbox(label="🧼 Cleaned", lines=6, elem_id="clean")
                final = gr.Textbox(label="📝 Rewritten JD", lines=10, elem_id="final")


                download = gr.File(label="⬇ Download Final JD")

                go_btn.click(handle_single, inputs=file_in, outputs=[raw, clean, final, download])

            # ✅ MULTIPLE JD SECTION - DEFAULT BLACK
            with gr.Tab("📁 Multiple JDs"):
                with gr.Column(elem_classes=["upload-box"]):
                    multi_in = gr.File(label="Upload JDs", file_types=[".pdf", ".txt"], file_count="multiple")
                    multi_btn = gr.Button("Process All JDs", elem_classes=["process-btn"])

                output_df = gr.Dataframe(headers=["File", "Raw", "Cleaned", "Rewritten", "Download"], wrap=True)
                zip_download = gr.File(label="⬇ Download All in ZIP")

                def run_multi(files):
                    data, zipfile_path = handle_multiple(files)
                    return [[d[0], d[1], d[2], d[3], d[4]] for d in data], zipfile_path

                multi_btn.click(run_multi, inputs=multi_in, outputs=[output_df, zip_download])


# ✅ Launch app
app.launch()


above code is correct

In [None]:
#new

In [None]:
# ✅ Gradio UI Setup with animation, color tweaks for header, upload, and output DataFrame
import gradio as gr

custom_css = """
#title-box {
  animation: pulse-title 3s infinite;
  font-size: 26px;
  color: #ff1493;
  padding: 10px;
  border: 3px dashed #ff69b4;
  background: #fff0f5;
  border-radius: 12px;
  text-align: center;
  font-weight: bold;
}

@keyframes pulse-title {
  0% { box-shadow: 0 0 5px #ff69b4; }
  50% { box-shadow: 0 0 15px #ff69b4; }
  100% { box-shadow: 0 0 5px #ff69b4; }
}

.upload-box {
  background-color: #ffe6f2 !important;
}

.single-jd label, .single-jd span {
  color: #22c55e !important;
  font-weight: bold;
}

/* 🎨 Dataframe purple border only for multiple JD output */
div[data-testid="dataframe"] table {
  border: 2px solid #8a2be2;
}
div[data-testid="dataframe"] th,
div[data-testid="dataframe"] td {
  border: 1px solid #8a2be2;
  padding: 8px;
  background-color: #f8f0ff;
  color: black;
}
"""

with gr.Blocks(css=custom_css) as app:
    with gr.Column():
        gr.HTML('<div id="title-box">✨ JD PARSER FROM PDF ✨</div>')

        with gr.Tabs():
            with gr.Tab("📄 Single JD"):
                with gr.Column(elem_classes=["upload-box", "single-jd"]):
                    file_in = gr.File(label="Upload JD", file_types=[".pdf", ".txt"])
                    go_btn = gr.Button("✨ Process JD")

                raw = gr.Textbox(label="📝 Raw Extracted", lines=6)
                clean = gr.Textbox(label="🧼 Cleaned", lines=6)
                final = gr.Textbox(label="📝 Rewritten JD", lines=10)
                download = gr.File(label="⬇ Download Final JD")

                go_btn.click(handle_single, inputs=file_in, outputs=[raw, clean, final, download])

            with gr.Tab("📁 Multiple JDs"):
                with gr.Column(elem_classes=["upload-box"]):
                    multi_in = gr.File(label="Upload JDs", file_types=[".pdf", ".txt"], file_count="multiple")
                    multi_btn = gr.Button("Process All JDs")

                output_df = gr.Dataframe(headers=["File", "Raw", "Cleaned", "Rewritten", "Download"], wrap=True)
                zip_download = gr.File(label="⬇ Download All in ZIP")

                def run_multi(files):
                    data, zipfile_path = handle_multiple(files)
                    return [[d[0], d[1], d[2], d[3], d[4]] for d in data], zipfile_path

                multi_btn.click(run_multi, inputs=multi_in, outputs=[output_df, zip_download])

# ✅ Launch the Gradio app
app.launch()

In [None]:
# ✅ SETUP: API Key (from Google Colab secrets)
os.environ["GROQ_API_KEY"] = userdata.get("GROQ_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = "llama3-8b-8192"

# ✅ PART 1: SCRAPE FROM JOB URL
def scrape_job_page_generic(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
    except Exception as e:
        return {"error": f"Failed to fetch page: {str(e)}"}

    soup = BeautifulSoup(response.content, "html.parser")
    body_text = soup.body.get_text(separator="\n", strip=True) if soup.body else "No body text found"
    return {"content": body_text}

# ✅ CLEAN SCRAPED TEXT USING GROQ LLaMA3
def clean_with_llama3(raw_data):
    prompt = f"""
You are a smart job information extractor.

From the below raw text scraped from a job detail page, extract the following fields clearly:
- "Role"
- "Job Description"
- "Qualification"
- "Locations"
- "Additional Information"
- "About"
- "Important Notice"

Output the result in this format (line-by-line):
"Role": ...
"Job Description": ...
... (and so on)

If some data is not available, just write "Not found".

Raw Scraped Content:
{json.dumps(raw_data, indent=2)}
"""

    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": GROQ_MODEL,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that extracts job content into labeled fields."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.3
    }

    try:
        response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        return f"Error during LLM call: {str(e)}"

# ✅ WRAPPER FUNCTION FOR URL-BASED JD EXTRACTION
def extract_job_details_from_url(url):
    scraped = scrape_job_page_generic(url)
    if "error" in scraped:
        return scraped["error"]
    return clean_with_llama3(scraped)

# ✅ PART 2: PDF/TXT PROCESSING
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# ✅ Extract text from uploaded file
def extract_text_from_path(file_path):
    try:
        if file_path.endswith('.pdf'):
            text = ""
            with pymupdf.open(file_path) as doc:
                for page in doc:
                    text += page.get_text()
            if text.strip():
                return text.strip()
            reader = PdfReader(file_path)
            fallback = "\n".join(page.extract_text() or "" for page in reader.pages)
            return fallback.strip()
        elif file_path.endswith('.txt'):
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read().strip()
    except Exception as e:
        return f"Error: {e}"
    return "Unsupported file"

# ✅ Basic text cleaning
def sanitize_text(text):
    clean = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
    clean = re.sub(r'\s+', ' ', clean)
    return clean.strip()

# ✅ Rewrite JD using GROQ LLaMA3
def rewrite_jd_with_llm(jd_text):
    prompt = f"""
You are a skilled HR content writer.

Your job is to rewrite the JD below:
- Professional & clear
- ATS-friendly
- Structured using markdown (**bold**, *italic*, - bullet points, etc.)

--- JD START ---
{jd_text}
--- JD END ---

Rewrite now:
"""
    try:
        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"LLM Error: {e}"

# ✅ For single file upload
def handle_single(file):
    if file is None:
        return "No file", "No cleaned", "No rewrite", None

    extracted = extract_text_from_path(file.name)
    cleaned = sanitize_text(extracted)
    rewritten = rewrite_jd_with_llm(cleaned)

    out_path = file.name.replace(".pdf", "_rewritten.md").replace(".txt", "_rewritten.md")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(rewritten)

    return extracted[:1000], cleaned[:1000], rewritten[:1500], out_path

# ✅ For multiple file upload
def handle_multiple(files):
    results = []
    zipf = zipfile.ZipFile("All_Rewritten_JDs.zip", "w", zipfile.ZIP_DEFLATED)

    for file in files:
        fname = os.path.basename(file.name)
        raw = extract_text_from_path(file.name)
        cleaned = sanitize_text(raw)
        rewritten = rewrite_jd_with_llm(cleaned)

        rewritten_path = f"rewritten_{fname}.md"
        with open(rewritten_path, "w", encoding="utf-8") as f:
            f.write(rewritten)

        zipf.write(rewritten_path)
        results.append((fname, raw[:600], cleaned[:600], rewritten[:1000], rewritten_path))

    zipf.close()
    return results, "All_Rewritten_JDs.zip"

# ✅ CUSTOM CSS
custom_css = """
/* HEADER ANIMATION */
#title-box {
  animation: dash-light 6s infinite ease-in-out;
  border: 2px dashed #b6d0e2;
  border-radius: 4px;
  padding: 10px 15px;
  background-color: #fff7fb;
  text-align: center;
  font-weight: 600;
  font-size: 18px;
  color: #ff1493;
  margin: 20px auto;
  width: 70%;
}

/* BACKGROUND */
body {
  background: linear-gradient(-45deg, #f8e1ff, #fdf4ff, #ffe6f0, #f3f0ff);
  background-size: 400% 400%;
  animation: animatedGradient 15s ease infinite;
}

/* UPLOAD BOX - PINK CENTER AREA */
.upload-box {
  border: 2px solid #8a2be2;
  border-radius: 12px;
  padding: 15px;
  background-color: #ffeef8;
  margin-bottom: 15px;
  transition: box-shadow 0.3s ease;
}

/* GREEN LABELS ONLY FOR SINGLE JD */
.single-jd label, .single-jd span {
  color: #22c55e !important;
  font-weight: bold;
}

/* PURPLE COLOR FOR MULTIPLE JD TABLE */
tr th, tr td {
  border: 2px solid #8a2be2 !important;
}

/* Button Styling */
button[role="tab"] {
  background-color: transparent;
  color: #22c55e;
  border: 2px solid #22c55e;
  font-weight: bold;
  border-radius: 10px;
  margin: 5px;
  padding: 6px 12px;
}

button.process-btn {
  background-color: #3b82f6;
  color: white;
  font-weight: bold;
  border-radius: 10px;
  padding: 10px 20px;
  transition: all 0.3s ease-in-out;
}
button.process-btn:hover {
  background-color: #4ade80;
  box-shadow: 0 0 10px #4ade80;
  transform: scale(1.05);
}
/* 🌟 Beautify Header with Animated Glow */
#title-box {
    border: 2px dashed #b6d0e2;
    border-radius: 8px;
    padding: 12px 18px;
    background-color: #fff0f5;
    text-align: center;
    font-weight: 800;
    font-size: 24px;
    color: #9400d3;
    margin: 20px auto;
    width: 75%;
    animation: glow-header 4s ease-in-out infinite;
    box-shadow: 0 0 10px #dda0dd, 0 0 20px #ba55d3;
}

@keyframes glow-header {
    0% { box-shadow: 0 0 5px #dda0dd; }
    50% { box-shadow: 0 0 20px #ba55d3; }
    100% { box-shadow: 0 0 5px #dda0dd; }
}

/* 🎨 Purple border for Multi JD Output Dataframe */
.output-table table,
.output-table th,
.output-table td {
    border: 2px solid #8a2be2 !important;
    border-collapse: collapse;
    padding: 10px;
}

/* Optional: Add alternating background for rows */
.output-table tr:nth-child(even) {
    background-color: #f3e8ff;
}
.output-table tr:nth-child(odd) {
    background-color: #ffffff;
}

"""

# ✅ FINAL UI WITH GRADIO
with gr.Blocks(css=custom_css) as app:
    with gr.Column():
        gr.HTML('<div id="title-box">💼 JD PARSER FROM PDFS</div>')

        with gr.Tabs():
            # ✅ SINGLE JD
            with gr.Tab("📄 Single JD"):
                with gr.Column(elem_classes=["upload-box", "single-jd"]):
                    file_in = gr.File(label="Upload JD", file_types=[".pdf", ".txt"])
                    go_btn = gr.Button("✨ Process JD", elem_classes=["process-btn"])
                with gr.Column(elem_classes=["single-jd"]):
                    raw = gr.Textbox(label="📝 Raw Extracted", lines=6)
                    clean = gr.Textbox(label="🧼 Cleaned", lines=6)
                    final = gr.Textbox(label="📝 Rewritten JD", lines=10)
                    download = gr.File(label="⬇ Download Final JD")
                go_btn.click(handle_single, inputs=file_in, outputs=[raw, clean, final, download])

            # ✅ MULTIPLE JD
            with gr.Tab("📁 Multiple JDs"):
                with gr.Column(elem_classes=["upload-box"]):
                    multi_in = gr.File(label="Upload JDs", file_types=[".pdf", ".txt"], file_count="multiple")
                    multi_btn = gr.Button("Process All JDs", elem_classes=["process-btn"])

                output_df = gr.Dataframe(headers=["File", "Raw", "Cleaned", "Rewritten", "Download"], wrap=True, elem_classes=["output-table"])
                zip_download = gr.File(label="⬇ Download All in ZIP")

                def run_multi(files):
                    data, zipfile_path = handle_multiple(files)
                    return [[d[0], d[1], d[2], d[3], d[4]] for d in data], zipfile_path

                multi_btn.click(run_multi, inputs=multi_in, outputs=[output_df, zip_download])

# ✅ LAUNCH APP
app.launch()
