In [None]:
import torch
from transformers import DistilBertTokenizer, DistilBertModel
from sklearn.metrics.pairwise import cosine_similarity
import os
import PyPDF2

# Load pre-trained DistilBERT model and tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased')

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        with open(pdf_path, "rb") as pdf_file:
            pdf_reader = PyPDF2.PdfFileReader(pdf_file)
            for page_num in range(pdf_reader.numPages):
                page = pdf_reader.getPage(page_num)
                text += page.extractText()
    except Exception as e:
        print(f"Error reading PDF: {e}")
    return text

# Function to tokenize and obtain embeddings for text
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # Aggregate token embeddings
    return embeddings

# Folder containing CVs in PDF format
cv_folder = "data"

# List of job descriptions
job_descriptions = [
    "minimum qualifications bachelors degree or equivalent practical experience years of experience in saas or productivity tools businessexperience managing enterprise accounts with sales cycles preferred qualifications years of experience building strategic business partnerships with enterprise customersability to work through and with a reseller ecosystem to scale the businessability to plan pitch and execute a territory business strategyability to build relationships and to deliver results in a crossfunctionalmatrixed environmentability to identify crosspromoting and uppromoting opportunities within the existing account baseexcellent account management writtenverbal communication strategic and analyticalthinking skills about the job as a member of the google cloud team you inspire leading companies schools and government agencies to work smarter with google tools like google workspace search and chrome you advocate the innovative power of our products to make organizations more productive collaborative and mobile your guiding light is doing whats right for the customer you will meet customers exactly where they are at and provide them the best solutions for innovation using your passion for google products you help spread the magic of google to organizations around the world the google workspace team helps customers transform and evolve their business through the use of googles productivity collaboration and content management suite of applications as part of an entrepreneurial team in this growing business you will help shape the future of businesses use technology to connect with customers employees and partners as a google workspace sales specialist you will be responsible for maintenance and expansion of google workspace business growth across the region with customers in this role youll create and execute the strategy and provide unique insights on applying google workspace solutions to enterprisesyou will build an excellent pipeline and work with the account teams to build out the customer solution and establish partnerships you will strategize with partners to increase account and territory business growth you will work directly with customers coordinate internal resources and construct successful strategies at account and territory level google cloud accelerates organizations ability to digitally transform their business with the best infrastructure platform industry solutions and expertise we deliver enterprisegrade solutions that leverage googles cuttingedge technology all on the cleanest cloud in the industry customers in more than countries and territories turn to google cloud as their trusted partner to enable growth and solve their most critical business problems"",
    "about the company large public research university in ohio industry higher education type educational institution founded employees categories education big buckeyes college healthcare hospitals clinics osu ohio ohio state ohio state university the scarlet and gray the ohio state university university higher education universities specialties teaching research service international student experience and faculty excellence about the role travel percent less than functions ceopresident medical carehospital administration"
    "the cocacola company fulltime as a warehouse lead with cocacola you will lead and direct the daily activities of warehouse teammates to ensure the safe and successful completion of all related daily warehouse activities and operations verify and ensure all applicable warehouse processes are followed train and lead warehouse personnel evaluate processes striving for continuous improvements lead and model the safety culture and strive for compliance support lead and motivate a team to deliver resultshiring fast"
    "monday through friday no weekends full time hour week example am to pm or am to pm alternating shifts part time monday through friday hours vary based on applicant need locally owned financial institution family atmosphere no sale quotas no pressure excellent working conditions and benefits for full time employees experience not necessary will train job types fulltime parttime pay per hour benefits k k matching dental insurance health insurance life insurance vision insurance physical setting office schedule hour shift work location one location"
    "design develop and test high quality software features by understanding user needs and implementing well designed code follow and implement the code with standards and best practices good communication sense of urgency a good team player skills experience a bachelors degree in computer science engineering information technology or equivalent experience years professional software development experience with php on wordpress in an enterprise environment development experience in wordpress wp plugins php htmlcss java script mysql linux git strong analytical and debugging skills testing and familiarity with common debugging tools",
    # Add more job descriptions here

]

cv_embeddings_dict = {}

#cv embedding processing
for filename in os.listdir(cv_folder):
    if filename.endswith(".pdf"):
        cv_path = os.path.join(cv_folder, filename)
        cv_text = extract_text_from_pdf(cv_path)
        cv_embeddings = get_embeddings(cv_text)
        cv_embeddings_dict[filename] = cv_embeddings

# Calculate and rank CVs for each job description
for job_description in job_descriptions:
    job_description_embeddings = get_embeddings(job_description)
    similarity_scores = {}

    for cv_filename, cv_embeddings in cv_embeddings_dict.items():
        similarity = cosine_similarity(job_description_embeddings, cv_embeddings)[0][0]
        similarity_scores[cv_filename] = similarity

    # Rank CVs based on similarity scores
    ranked_cvs = sorted(similarity_scores.items(), key=lambda x: x[1], reverse=True)

    # Print the top 5 CVs for this job description
    print(f"Job Description:")
    print(job_description)
    print("\nTop 5 Matching CVs:")
    for cv_filename, similarity in ranked_cvs[:5]:
        print(f"- CV: {cv_filename}, Similarity Score: {similarity}")
    print("\n")

