In [None]:
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Setting up the Streamlit page with a cool title and layout
st.set_page_config(page_title="AI Resume Screening", page_icon="📄", layout="wide", initial_sidebar_state="expanded")

# Some CSS to make it look fancy
st.markdown("""
    <style>
    .stApp {background-color: #e6e6fa;}
    h1 {color: #000000;}
    h2 {color: #000000;}
    .stTextArea > label {color: #000000; font-weight: bold; font-size: 16px;}
    .stFileUploader > label {color: #000000; font-weight: bold; font-size: 16px;}
    .reportview-container .main .block-container {background-color: #ffffff; border-radius: 10px; padding: 20px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);}
    </style>
    """, unsafe_allow_html=True)

# Function to pull text out of PDFs
def extract_text_from_pdf(file):
    pdf = PdfReader(file)
    text = ""
    for page in pdf.pages:
        text += page.extract_text()
    return text

# This ranks resumes based on how close they match the job description
def rank_resumes(job_description, resumes):
    documents = [job_description] + resumes
    vectorizer = TfidfVectorizer().fit_transform(documents)
    vectors = vectorizer.toarray()
    job_description_vector = vectors[0]
    resume_vectors = vectors[1:]
    cosine_similarities = cosine_similarity([job_description_vector], resume_vectors).flatten()
    return cosine_similarities

# Main app stuff starts here
st.title("AI Resume Screening & Candidate Ranking System")
st.header("Job Description")
job_description = st.text_area("Enter the job description")  # Where the user types the job details

st.header("Upload Resumes")
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)  # Drag and drop resumes here

# When we’ve got files and a job description, let’s rank ‘em
if uploaded_files and job_description:
    st.header("Ranking Resumes")
    resumes = [extract_text_from_pdf(file) for file in uploaded_files]
    scores = rank_resumes(job_description, resumes)
    results = pd.DataFrame({"Resume": [file.name for file in uploaded_files], "Score": scores})
    results = results.sort_values(by="Score", ascending=False)  # Higher score = better match
    st.write("### Results")
    st.dataframe(results)  # Show the ranked list in a nice table