In [23]:
import PyPDF2
import google.generativeai as genai
import pandas as pd
import os

In [16]:
genai.configure(api_key=os.environ['API_KEY'])

model = genai.GenerativeModel('gemini-pro')
response = model.generate_content('Hi How are you?')

print(response.text)

I am an artificial intelligence chatbot. I am designed to be helpful and informative. I do not have feelings or personal experiences like humans do. I am always available to assist you with your questions and provide the best possible responses based on the information I have been trained on. How can I help you today?


In [19]:
def input_pdf_setup(file_path):
    text = ''
    if file_path is not None:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text += page.extract_text()
    return text

In [None]:
def get_gemini_response(input,pdf_content,prompt):
    model=genai.GenerativeModel('gemini-pro')
    response=model.generate_content([input,pdf_content,prompt])
    return response.text

In [24]:
def read_resumes(folder_path):
    # List all PDF files in the specified folder
    files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
    
    # Prepare a list to hold data
    data = []
    
    # Process each file
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)
        resume_text = input_pdf_setup(file_path)
        data.append({'file_name': file_name, 'resume_text': resume_text})
    
    # Create a DataFrame
    df = pd.DataFrame(data)
    return df

In [25]:
folder_path = '/Users/parthvinm/Desktop/NLP SSM/Final Project/Resume_Job-Align/Resumes'
df_resumes = read_resumes(folder_path)
df_resumes

Unnamed: 0,file_name,resume_text
0,Resume_Jash_09_SDE.pdf,"Jash Shah Baltimore, MD|+1 (667) 600-9472..."
1,Parthvi_Mehta_Resume.pdf,PARTHVI MEHTA \npmehta14@jh.edu | +1 (667) -...


In [26]:
prompt_structure = """Objective:  Parse a text-formatted resume efficiently and extract diverse applicant's data into a structured JSON format that adheres to the provided TypeScript interface schema.

Input: Text-formatted applicant's resume.

Steps:
1. Analyze Structure: Examine the text-formatted resume to understand its organization and identify key sections (e.g., education, experience, skills).
2. Convert to JSON: Map the extracted information to the corresponding fields in the schema, creating a structured JSON representation.
3. Optimize Output: Ensure the JSON is well-formatted, error-free, and handles missing values appropriately.
4. Handle Variations: Account for different resume styles and formatting to accurately extract relevant data.

Consider following TypeScript Interface for JSON schema:
```
interface Media {
  linkedin: string;
  github: string;
  devpost: string;
  medium: string;
  leetcode: string;
  dagshub: string;
  kaggle: string;
  instagram: string;
}

interface Education {
  degree: string;
  university: string;
  from: string;
  to: string;
  grade?: string;
  coursework?: string[];
}

interface SkillSection {
  name: string;
  skills: string[];
}

interface Work {
  role: string;
  company: string;
  from: string;
  to: string;
  description: string[];
}

interface Project {
  name: string;
  type: string;
  link?: string;
  from: string;
  to: string;
  description: string[];
}

interface Certification {
  name: string;
  by: string;
  link: string;
}

interface Achievements {
  [index: number]: string;
}

interface RESUME_DATA_SCHEMA {
  name: string;
  summary: string;
  phone: string;
  email: string;
  media: Media;
  education: Education[];
  skill_section: SkillSection[];
  work_experience: Work[];
  projects: Project[];
  certifications: Certification[];
  achievements: Achievements;
}
```

Desired Output: Write the Well-formatted JSON adhering to the RESUME_DATA_SCHEMA schema, handling missing values with empty strings or "None".
<JSON_OUTPUT_ACCORDING_TO_RESUME_DATA_SCHEMA>

The results should contain valid JSON only, without any delimiter or characters making invalid JSON format."""

In [36]:
prompt_job_description = """I will provide a job and company description for your analysis. In your analysis you have to identify the key words, expertise & requirements the job demands, but also from the company description.

Your task is consider it to meticulously assess the information and furnish the details like Job title, Keywords(key words are expertise & requirements the job demands), Job purpose, Job duties and responsibilities, Required qualifications, Preferred qualifications, Company name and Company details(points which include overview, mission, values or way of working) in bulleted points. 

Provide them only in JSON format - which is easily parse by any json parser - with following keys:
title, keywords, purpose, duties_responsibilities, required_qualifications, preferred_qualifications, company_name, company_info.

Output:
{
    "title": "RESULT", 
    "keywords": "RESULT", 
    purpose: "RESULT", 
    duties_responsibilities: "RESULT", 
    required_qualifications: "RESULT", 
    preferred_qualifications: "RESULT", 
    company_name: "RESULT",
    company_info: "RESULT",
}"""

In [37]:
model = genai.GenerativeModel('gemini-pro')

def generate_response(row, resume = True):
    if resume:
        response = model.generate_content([prompt_structure, row['resume_text']])
    else:
        response = model.generate_content([prompt_job_description, row['job_description']])
    return response

In [38]:
df_resumes['resume_json'] = df_resumes.apply(generate_response, axis=1)
df_resumes

Unnamed: 0,file_name,resume_text,resume_json
0,Resume_Jash_09_SDE.pdf,"Jash Shah Baltimore, MD|+1 (667) 600-9472...",response:\nGenerateContentResponse(\n done=...
1,Parthvi_Mehta_Resume.pdf,PARTHVI MEHTA \npmehta14@jh.edu | +1 (667) -...,response:\nGenerateContentResponse(\n done=...


In [30]:
input_text1 = """
About the job
Data Analyst Internship -Data Science & Gen-AI Team

At Peerlogic, we're pioneering the integration of artificial intelligence to revolutionize communication within the healthcare community. Guided by our core values of Curiosity, Accountability, Grit, and Enthusiasm (CAGE), we strive for excellence in innovating AI-driven solutions that improve, streamline, and transform professional interactions in medical settings. We're on the hunt for a Data Analysis II Intern to join our Data Science and Gen-AI team, a role critical to our mission of shaping the future of healthcare communication.

Location: Scottsdale, Arizona

Duration: 6 months, with potential for extension or transition to a full-time position

Role Overview

As a Data Analysis Intern, you will dive deep into the intricacies of AI and data science, contributing directly to projects at the cutting edge of technology and healthcare. Reporting to Dr. Amir Yazdavar, PhD, Principal Data Scientist, your work will support the development of AI solutions that embody our CAGE values, pushing the boundaries of what's possible in healthcare communication.

Key Responsibilitiaes

    Conduct advanced data analysis, utilizing statistical software and machine learning algorithms to extract insights from complex healthcare datasets.
    Collaborate with our AI development team to refine and enhance AI models, ensuring they meet the high standards required for healthcare applications.
    Engage in the entire lifecycle of AI solution development, from initial data collection and analysis to implementation and feedback collection.
    Develop and present clear, actionable insights to team members and stakeholders, aiding in decision-making and strategy development.
    Contribute to the creation of a robust knowledge base by documenting findings, methodologies, and best practices.

Qualifications

    Pursuing or recently completed a degree in Data Science, Computer Science, Statistics, Mathematics, or related field, with a strong academic record.
    Demonstrated experience with data analysis and statistical tools (e.g., Python, R), and a keen interest in AI and machine learning.
    A proactive learner with a knack for problem-solving and a passion for healthcare innovation.
    Exceptional communication skills, with the ability to convey complex ideas effectively to a diverse audience.
    Alignment with Peerlogic’s core values of Curiosity, Accountability, Grit, and Enthusiasm, and a strong desire to make a positive impact in healthcare.

What You'll Gain:

    A competitive stipend and flexible scheduling to accommodate your educational commitments.
    Hands-on experience in a fast-paced startup environment, working on groundbreaking projects at the intersection of AI and healthcare.
    Direct mentorship from Dr. Amir Yazdavar and the opportunity to learn from a team of experienced professionals.
    Access to a network of industry professionals and potential for future employment opportunities within Peerlogic.

Interested candidates should apply with a resume, cover letter detailing their interest and fit for the role, and any relevant project samples or portfolios.

Peerlogic is an equal-opportunity employer dedicated to building a diverse and inclusive team. We encourage applications from all qualified individuals who share our vision for improving healthcare communication. """

In [29]:
input_text2 = """ Machine Learning Engineering Intern- Visual Gen AI
Pyxer Inc. · Santa Clara, CA ·

About the job
We are pyxer, a visual gen AI startup in the apparel B2C space, on a mission to revolutionize clothes shopping. We are building an AI personal shopper that helps people find their perfect looks in both the digital and real world, by styling and shopping through their personal photos.


We are seeking an exceptional machine learning engineering intern who is creative, driven, and can get things done. You will have the unique opportunity to have your fingerprints on the next transformational consumer tech company, creating products that have the potential to be used by every person on the planet. 



Our product is already being actively used in 55 countries. You will get to watch your work come to life quickly as it will be frequently deployed into production. You will gain a deep sense of achievement, while acquiring critical insights and skills in generative AI for photos and videos.


We currently have two open positions available for this role.


In your application, please write a short cover letter on why you believe in our idea and mission. We consider each intern as a pyxer brand ambassador, so they must strongly believe in the idea and mission.


This role is ideal for you if:

You are passionate about our mission and are looking for a long-term employment opportunity beyond the internship (e.g. part-time role, full-time founding engineer).
You prefer collaborating in-person with a team, rather than remotely. This internship will take place entirely in-person at our office in the Santa Clara, CA area.


Key Responsibilities:

Train and fine-tune visual gen AI models to be deployed in real world, production settings.
Work with large-scale datasets and apply data preprocessing and augmentation methods.
Evaluate model performance through quantitative metrics and qualitative assessment.
Refine and optimize our pipelines to increase efficiency.
Collaborate with cross-functional teams to integrate AI solutions into broader systems.
Write clean, well-documented, and maintainable code adhering to best practices.


Qualifications:

Strong understanding of the fundamentals of deep learning.
Excellent Python programming and debugging skills, with a strong grasp of its essential data structures and algorithms.
Understanding of shell scripting and working with Ubuntu or other Linux distributions.
Proficiency in PyTorch.


Preferred Skills:

Prior experience or projects related to image processing, video processing, or computer vision.


Duration:

3 months, starting from June.


Compensation:

$5000 per month"""

In [43]:
df_jobs = [('Job Description 1',input_text1), ('Job Description 2',input_text2)]
df_jobs = pd.DataFrame(df_jobs, columns=['title','job_description'])
df_jobs

Unnamed: 0,title,job_description
0,Job Description 1,\nAbout the job\nData Analyst Internship -Data...
1,Job Description 2,Machine Learning Engineering Intern- Visual G...


In [44]:
df_jobs['job_json'] = df_jobs.apply(generate_response, resume = False, axis=1)
df_jobs

Unnamed: 0,title,job_description,job_json
0,Job Description 1,\nAbout the job\nData Analyst Internship -Data...,response:\nGenerateContentResponse(\n done=...
1,Job Description 2,Machine Learning Engineering Intern- Visual G...,response:\nGenerateContentResponse(\n done=...


In [54]:
job_resume_align_prompt = """I will provide a job description and my details in JSON format.
Your task is to analyze and match my details with the job's requirements.
Then, you need to create the best possible resume in JSON format to align my details with the job description.

Instructions:
- Include only 3 work and project experiences. each experience with 3 bulleted points closely aligned with job details. It very important you follow this.
- Use quantifiable impacts for each bullet point.
- Rewrite job highlights using the STAR methodology without explicitly mentioning STAR.
- Employ STRONG action verbs showcasing soft skills.
- Maintain truthfulness and objectivity in listing experience.
- Format experience points as 'Did X by doing Y accomplish Z'.
- Prioritize specificity - with respect to job - over generality.
- Proofread and Correct spelling and grammar errors.
- Aim for clear expression over impressiveness.
- Prefer active voice over passive voice.
- Omit a summary about the candidate.

Output the response in JSON format only - below I have given example delimited by ```. Output must be easily parse by python's json parser. Extract all mentioned properties in given example without changing their names.
```
{
  "personal": { "name": "", "phone": "+1 111-222-3333", "email": "", "github": "", "linkedin": ""
  },
  "education": [
    { "degree": "", "university": "", "from": "", "to": "", "grade": "", "coursework": [] },
    ...
  ],
  "skill_section": [
    { "name": "", "skills": [] }
    ...
  ],
  "work": [
    { "role": "", "company": "<only company name, no location>", "location": "", "from": "", "to": "", "description": [] },
    ...
  ],
  "projects": [
    { "name": "", "link": "", "from": "", "to": "", "description": [] },
    ...
  ],
  "certifications": [
    {"name": "", "issuer": "", "link": ""},
    ...
  ],
  "achievements": [
    "achievements_1",
    ...
  ]
}
```"""

In [55]:
def generate_response_final(resume_text, job_description):
    response = model.generate_content([job_resume_align_prompt, job_description, resume_text])
    return response

In [61]:
for job_index, job in df_jobs.iterrows():
    job_title = job['title']
    job_description = job['job_json'].text

    # Initialize an empty list to store the responses for all resumes for this job title
    responses = []

    # Iterate through each resume and apply the generate_response_final function
    for resume_index, resume in df_resumes.iterrows():
        response = generate_response_final(resume['resume_json'].text, job_description)
        responses.append(response)

    # Assign the list of responses to the corresponding new column in df_resumes
    df_resumes[job_title] = responses


In [62]:
df_resumes

Unnamed: 0,file_name,resume_text,resume_json,Job Description 1,Job Description 2
0,Resume_Jash_09_SDE.pdf,"Jash Shah Baltimore, MD|+1 (667) 600-9472...",response:\nGenerateContentResponse(\n done=...,response:\nGenerateContentResponse(\n done=...,response:\nGenerateContentResponse(\n done=...
1,Parthvi_Mehta_Resume.pdf,PARTHVI MEHTA \npmehta14@jh.edu | +1 (667) -...,response:\nGenerateContentResponse(\n done=...,response:\nGenerateContentResponse(\n done=...,response:\nGenerateContentResponse(\n done=...
