In [116]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv
import os

In [34]:
load_dotenv()

api_key = os.getenv("GOOGLE_API_KEY")

In [104]:
loader = PyPDFLoader("../assets/ats-friendly-technical-resume.pdf")
documents = loader.load()


In [105]:

# 4. Split the document into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [106]:

# 5. Generate embeddings using Vertex AI
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
docsearch = FAISS.from_documents(texts, embeddings)

In [131]:
# 6. Create a question-answering chain using Vertex AI
llm = GoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
chain = load_qa_chain(llm, chain_type="stuff")

# messages = [
#     SystemMessage(content="application deadline should be a date. today is 25/08/2024. mony should be in RS. give a perfect job description in 6 lines for the following in JSON form with these key values only Job Title,Job Description, Department, Job Location, Employment Type, Salary Range, Must be numeric values Application Deadline, Required Qualifications, Preferred Qualifications, Responsibilities. Remember to only respond with a valid JSON object."),
#     HumanMessage(content="senior software devloper role for backend devloper in django 50 plus years experience salary of 50k in next week")
# ]

# result = llm.invoke(messages)

In [114]:

skils = ["python", "ruby", "machine learning", "django", "react js"]
skill_query = f"compare the skills in the resume with {skils}. based on the skills matched calculate overall score for skills and give output in decimal value and fomrat it as json"
# docs = docsearch.similarity_search(skill_query)
# docsearch.
answer = chain.run(input_documents=documents, question=skill_query)
print(answer)
# print(docs[0].page_content)
# print(documents[0].page_content)

```json
{
  "skills": {
    "python": true,
    "ruby": false,
    "machine learning": true,
    "django": false,
    "react js": false
  },
  "score": 0.6
}
```


In [140]:
prompt_template = PromptTemplate(
    input_variables=["resume_text", "job_description"],
    template=(
        "You are an HR assistant. Evaluate this resume based on the following job description: {job_description}. "
        "Score the resume on relevance, experience, skills match, and formatting.the output should only be in json format with maximum score as 10. don't give any other explanation.and don't"
        "Resume: {resume_text}"
    )
)

chain = LLMChain(llm=llm, prompt=prompt_template)
jd = '''We are looking for a Software Engineer with experience in Python, Django, and React.js.
The ideal candidate should have at least 3 years of experience in software development, 
familiarity with CI/CD pipelines, and a strong understanding of web technologies.'''
score = chain.run(resume_text=documents[0].page_content, job_description=jd)
print(score)

```json
{
"relevance": 4,
"experience": 6,
"skills_match": 3,
"formatting": 8
}
```
