In [7]:
from sentence_transformers import SentenceTransformer, util

def calculateATSscore_with_bert(resume_data, job_description):
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # Pre-trained BERT model
    resume_embedding = model.encode(resume_data)
    job_desc_embedding = model.encode(job_description)
    similarity_value = util.cos_sim(resume_embedding, job_desc_embedding)
    return similarity_value.item()




In [31]:
import csv
import io
import requests
import json
import html  # For escaping HTML characters
from bs4 import BeautifulSoup
from openai import OpenAI 

# Initialize OpenAI API with Nvidia's Llama 3.1 70b nemotron model
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key="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"
)

def clean_text_output(text):
    """
    Cleans the output to handle HTML characters and unwanted tags.
    """
    text = html.unescape(text)  # Unescape HTML entities
    soup = BeautifulSoup(text, 'html.parser')  # Use BeautifulSoup to handle HTML tags
    cleaned_text = soup.get_text(separator="\n").strip()  # Remove tags and handle newlines
    return cleaned_text

def modelFeedback(ats_score, resume_data, job_description):
    input_prompt = f"""
    You are now an ATS Score analyzer and given ATS Score is {int(ats_score * 100)}%. 
    Your task is to provide feedback to the user based on the ATS score.
    Print ATS score first. Mention where the resume is good and where the resume lacks. 
    Show list of missing skills and suggest improvements. 
    Show list of weak action verbs and suggest improvements.
    Show weaker sentences and suggest improvements.
    Talk about each section of the user's resume and discuss good and bad points of it only if it has any. 
    Resume Data: {resume_data}
    Job Description: {job_description}
    """

    try:
        # Generate response using the OpenAI API
        response = client.chat.completions.create(
            model="nvidia/llama-3.1-nemotron-70b-instruct",  # Using Llama 3.1 70b
            messages=[
                {"role": "user", "content": input_prompt}
            ],
            temperature=0.03,  # Lowering temperature for precise output
            top_p=0.7,  # Prioritize high-probability tokens
            max_tokens=700,  # Allow longer content
        )

        # Extract and clean the response
        feedback_text = response.choices[0].message.content.strip()  # Corrected line
        cleaned_feedback = clean_text_output(feedback_text)

        return cleaned_feedback

    except requests.exceptions.RequestException as e:
        print(f"API request failed: {str(e)}")
        return "Error: Unable to generate feedback."

In [32]:
a =  modelFeedback(ats_score, resume_data, job_description)
a

'**ATS Score: 50%**\n\n**Overall Feedback:**\nYour resume has shown strengths in showcasing technical skills and experience in data science, particularly in machine learning and data visualization. However, to improve your ATS score and increase the chances of passing through the applicant tracking system, focus on enhancing your resume\'s alignment with the job description, highlighting soft skills, and refining the structure for better readability.\n\n**Where the Resume is Good:**\n\n1. **Technical Skills:** Your resume effectively lists a wide range of relevant technical skills, including programming languages, database management, NLP, machine learning, data visualization, and project management tools.\n2. **Specific Achievements:** You\'ve quantified achievements in your professional experience, such as "reduced load by 10%," "85% accuracy," and "saving 10 hrs on manual extractions," which demonstrates impact.\n\n**Where the Resume Lacks:**\n\n1. **Alignment with Job Description:*

In [23]:
ats_score = 0.5

In [None]:
resume_data = """ PROFESSIONAL EXPERIENCE
International Air Transport Association (IATA) Canada
Data Scientist (Global Aviation Data Management Intern) May 2024 - Present
- Orchestrated high-performance data pipelines with Snowflake, Talend, and Python, unifying data from 300+ airlines and GSPs.
- Crafted data transformation and quality assurance scripts that optimized Talend workflows and reduced load by 10%.
- Pioneered an NLP-based de-identification script with 85% accuracy, integrating AI model for data submission to Snowflake.
- Streamlined client analysis, integrating 5 data sources into Power BI dashboards via Python scripting, reducing manual workload.
- Executed data analysis for clients by extracting SQL data (up to 500k) and delivering insights through Power BI dashboards.
- Collaborated with the R&D team to build a Streamlit-based data app, saving 10 hrs on manual extractions and real-time analysis delivery.
- Architected an LSTM model for incident classification by aligning descriptors with company's standards, achieving 90% accuracy.
- Extended contract and became focal point for 14 airlines and GSPs, managing client relationships.
AENIX Remote
Data Analyst (Industrial Automation Analyst) January 2021 - November 2022
- Refined an intelligent BI system to categorize customers into 4 groups based on product usage, enhancing marketing efforts.
- Optimized machine learning models for component compatibility testing, achieving 90% accuracy across 1,000+ components.
- Produced weekly reports for non-technical clients, collaborating with engineers and managers to improve customer satisfaction by 12%.
- Automated the weekly KPIs and KPVs with Power BI dashboards, eliminating the manual reporting process and saving 3 days per week.
- Recognized with Esto Excellence Award for contributions, resulting in full-time promotion within 6 months.
EDUCATION
Lambton College, Canada January 2023 - September 2024
Certification, Artificial Intelligence and Machine Learning
Chandigarh Engineering College, Chandigarh June 2017 - September 2021
Bachelor's, Computer Engineering
SKILLS
Programming Languages: Python (Pandas, NumPy), SQL (MySQL, PostgresSQL)
Database Management: Data Pipelines, Talend, MS SQL Server, Snowflake
Natural Language Processing: Gensim, NLTK, Spacy, Text Mining, Sentiment Analysis, Topic Modelling
Machine Learning and Deep Learning: Keras, TensorFlow, Sklearn (Supervised, Unsupervised)
Data Visualization: Power BI, Tableau, Matplotlib, Plotly, Seaborn
Project Management: JIRA, Salesforce
Cloud and Version Control: Git, Google Cloud, Bitbucket, S3 Bucket
CERTIFICATIONS
IBM Data Analyst Professional Certificate: Data Science, Excel, IBM Cloud, SQL, Python, Statistics, DAX
IBM AI Engineer Professional Certificate: Machine Learning, Deep Learning, Natural Language Processing, Computer Vision
PUBLICATIONS
Bains, K. S., et al. (2023). Alzheimer's diagnostic with OASIS, STEM Fellowship Journal, p. 5, para. 2.
PROJECTS
Toronto Transit Performance Analysis - Link to project
LSTM, Prophet, Data Visualization and Tableau
- Developed forecasting models and 15 visualizations to communicate bus delay patterns, trends, and insights in a comprehensive analysis
report with predictions for next year's delays.
Credit Score Estimator - Link to project
Machine Learning, Deep Learning, App Deployment and Streamlit
- Deployed an interactive Streamlit app for real-time credit score analysis, achieving approximately 85% accuracy with embedded insights
on the top 10 weighted features."""

In [24]:
job_description = """Job Title: Data Scientist
Company: XYZ Corporation
Location: Mountain View, CA (for illustrative purposes only)

Job Overview:

XYZ Corporation is seeking a highly skilled and motivated Data Scientist to join our dynamic team. As a Data Scientist, you will play a key role in extracting valuable insights from our vast datasets to drive informed decision-making and contribute to the company's success. This role involves leveraging advanced analytics and machine learning techniques to solve complex business problems.

Responsibilities:

Develop and implement machine learning models for predictive and prescriptive analytics.
Analyze large datasets to identify trends, patterns, and correlations that can inform business strategies.
Collaborate with cross-functional teams to understand business requirements and provide data-driven solutions.
Design and implement experiments to validate hypotheses and improve model performance.
Stay abreast of the latest developments in data science, machine learning, and industry best practices.
Present findings and insights to both technical and non-technical stakeholders in a clear and concise manner.
Ensure the quality, reliability, and integrity of data used for analysis.
Qualifications:

Master's or Ph.D. in a quantitative field such as Computer Science, Statistics, or related discipline.
Proven experience as a Data Scientist with a strong track record of successfully applying machine learning techniques to real-world problems.
Proficiency in programming languages such as Python or R.
Solid understanding of statistical concepts and techniques.
Strong analytical and problem-solving skills.
Excellent communication and collaboration skills.
Preferred Skills:

Experience with big data technologies such as Hadoop, Spark, or similar.
Knowledge of cloud computing platforms (e.g., Google Cloud, AWS, Azure).
Familiarity with deep learning frameworks (e.g., TensorFlow, PyTorch).
Previous experience in the tech industry.
Benefits:

Competitive salary and performance-based bonuses.
Comprehensive health, dental, and vision insurance.
401(k) retirement plan.
Professional development opportunities.
Flexible work hours and remote work options.
This is just a generic example, and actual job descriptions may vary. It's important to carefully read and understand the specific requirements and expectations outlined in the job postings of the companies you are interested in."""