In [None]:
# Import necessary libraries
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from datetime import datetime
import gradio as gr
from docx import Document  # For reading .docx files
import PyPDF2  # For reading .pdf files

# Load tokenizer and model for resume analysis
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

# Set pad_token to eos_token if it is not defined
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Define token limit for the model (this depends on the model you're using, e.g., GPT-2 has 1024 tokens)
MAX_TOKENS = tokenizer.model_max_length

# Function to safely tokenize and truncate long input
def safe_tokenize(text, max_length=MAX_TOKENS):
    # Tokenize the input text and truncate it to fit within the max token length
    tokens = tokenizer(text, truncation=True, max_length=max_length, padding='max_length', return_tensors="pt")
    return tokens

# Define the CSV file for storing job application history
history_file = "job_applications.csv"

# Function to extract text from uploaded resume (supports .docx and .pdf)
def extract_resume_text(file):
    if file.name.endswith(".docx"):
        doc = Document(file.name)
        return "\n".join([para.text for para in doc.paragraphs])
    elif file.name.endswith(".pdf"):
        pdf_reader = PyPDF2.PdfReader(file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        return text
    else:
        return "Unsupported file format. Please upload a .docx or .pdf file."

# Function to analyze resume against job description
def analyze_resume(resume, job_desc):
    prompt = f"Analyze the following resume against the given job description.\n\nResume:\n{resume}\n\nJob Description:\n{job_desc}"
    
    # Tokenize and truncate both resume and job description
    tokens = safe_tokenize(prompt)

    # Check if the tokenized input has any content
    if tokens.input_ids.shape[1] == 0:
        return "Error: The tokenized input is empty. Please check the input content."

    # Generate analysis using the model
    try:
        response = model.generate(
            input_ids=tokens.input_ids,
            attention_mask=tokens.attention_mask,
            max_new_tokens=100,
            pad_token_id=tokenizer.eos_token_id
        )
        
        # Ensure that the response has valid content
        if response is None or len(response) == 0:
            return "Error: The model failed to generate a response."
        
        # Decode and return analysis
        analysis_text = tokenizer.decode(response[0], skip_special_tokens=True)
        return analysis_text
    except IndexError:
        return "Error: The model's response is out of range."
    except Exception as e:
        return f"Error: {str(e)}"

# Function to save job application data to CSV
def save_application_data(job_title, job_desc, rating, suggestions):
    data = {
        "Job Title": job_title,
        "Job Description": job_desc,
        "Rating": rating,
        "Suggestions": suggestions,
        "Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    df = pd.DataFrame([data])
    
    if not pd.io.common.file_exists(history_file):
        df.to_csv(history_file, index=False)
    else:
        df.to_csv(history_file, mode='a', header=False, index=False)

# Function to view job application history
def view_history():
    if pd.io.common.file_exists(history_file):
        return pd.read_csv(history_file)
    else:
        return pd.DataFrame(columns=["Job Title", "Job Description", "Rating", "Suggestions", "Date"])

# Define Gradio interface for the application
def main(resume_file, job_desc, job_title):
    resume = extract_resume_text(resume_file)
    analysis = analyze_resume(resume, job_desc)
    
    # Save data (placeholder for actual rating and suggestions in the analysis response)
    rating = 80  # Placeholder for rating
    suggestions = "Improve formatting and highlight relevant experience."  # Placeholder for suggestions
    
    save_application_data(job_title, job_desc, rating, suggestions)
    
    return f"Resume Analysis:\n{analysis}", suggestions

# Gradio UI
with gr.Blocks() as app:
    gr.Markdown("AI Resume Analysis Assistant")
    
    with gr.Row():
        resume_file = gr.File(label="Upload your Resume (.docx or .pdf)")
        job_desc = gr.Textbox(label="Paste the Job Description", lines=10)
        job_title = gr.Textbox(label="Job Title")
        
    with gr.Row():
        submit = gr.Button("Analyze Resume")
    
    with gr.Row():
        analysis_output = gr.Textbox(label="Analysis Result", interactive=False)
        suggestions_output = gr.Textbox(label="Suggestions", interactive=False)
    
    with gr.Row():
        history_button = gr.Button("View Application History")
        history_output = gr.Dataframe(label="Application History", interactive=False)
    
    # Fix: Match the number of outputs with what main() returns
    submit.click(
        fn=main,
        inputs=[resume_file, job_desc, job_title],
        outputs=[analysis_output, suggestions_output]
    )
    history_button.click(fn=view_history, outputs=[history_output])

# Launch the app
app.launch(pwa=True, share=False)


* Running on local URL:  http://127.0.0.1:7872

To create a public link, set `share=True` in `launch()`.


