## Environment Setup

In [103]:
#imports
from dotenv import load_dotenv
from langchain_openai import OpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.document_loaders import BSHTMLLoader


import os


#Load env variables
load_dotenv()

True

In [104]:
#get the OpenAi Api key
open_ai_api_key = os.environ.get('OPEN_AI_KEY')

## Retrieval Augmented Generation (RAG)

### Read Resume.pdf File

In [105]:
#Read Doc func
def read_doc(file):
    file_loader = PyPDFLoader(file)
    documents = file_loader.load()
    return documents

In [106]:
# divide document into chunks

def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc = text_splitter.split_documents(docs)
    return doc

In [107]:
#create open_ai embeddings
embeddings = OpenAIEmbeddings(api_key=open_ai_api_key)
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x123185400>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x12312dbb0>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None)

### Import Resume

In [108]:
#Load the resume.pdf with the pdf loader
resume_doc = read_doc('Zaaheda_Resume_19_02_24.pdf')

In [109]:
#chunk the resume 
resume_doc_chunked = chunk_data(docs=resume_doc)

In [110]:
#Store the Embeddings for the resume content in a Vector Store
db_resume = Chroma.from_documents(resume_doc_chunked, embeddings)

### Import the Job Description
For now Job Description is in a pdf. Aim to integrate other formats such as web loader (scarpping job description from a website)

In [112]:
job_description_doc=read_doc('sample-job-description.pdf')
job_description_doc

[Document(page_content=' \n \n  Sample Job Description \n \n \n \n Job Title:  \n    Human Resources Assistant  \n \nJob Description:  This position reports to the Human Resources (HR) director and \ninterfaces with company managers and HR staff. Company XYZ is \ncommitted to an employee -orientated, hig h performance culture that \nemphasizes empowerment, quality, continuous improvement, and the recruitment and ongoing development of a superior workforce.  \n \nThe intern will gain exposure  \nto these functional areas:   HR Information Systems ; Employee relations ; Training and development;  \n  Benefits ; Compensation; Organization development ; Employment  \n \n \nSpecific responsibilities:   -      Employee orientation and training logistics and recordkeeping  \n- Company -wide committee facilitation and participation  \n- Employee safety, welfare, wellness and health reporting  \n- Provide direct support to employees during implementation of HR  \nservices, policies and programs

In [113]:
job_description_doc_chunked = chunk_data(docs=job_description_doc)
len(job_description_doc_chunked)

3

In [114]:
db_job_description = Chroma.from_documents(job_description_doc_chunked,embeddings)

## Feed all this data to LLM using LCEL to create resume based on Job Description

In [115]:

retriever1 = db_resume.as_retriever()
retriever2 = db_job_description.as_retriever()

template = """Use the resume content from {context1} and the Job Description from {context2} to fullfill the request.

Request: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(api_key=open_ai_api_key)
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context1": retriever1, "context2": retriever2, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("Rewrite Zaaheda's resume based on the Job Description")

"Zaaheda's resume showcases expertise in various technical skills such as Python, Java, JavaScript, SQL, Databricks, Azure, Apache Spark, LangChain, Flask, AWS, git/GitHub, Azure DevOps, and more. Additionally, Zaaheda has experience with AI technologies like Genrative AI, Retrieval Augmented Generation, and Vector DB, along with implementing tools like Textract, Comprehend, and Translate.\n\nBased on the job description for a Human Resources Assistant at Company XYZ, Zaaheda can highlight her experience in HR Information Systems, Employee relations, Training and development, Benefits, Compensation, Organization development, and Employment. Zaaheda can emphasize her skills in recordkeeping, committee facilitation, and employee orientation and training logistics to align with the specific responsibilities outlined in the job description.\n\nBy showcasing her technical skills and experience in AI technologies, Zaaheda can demonstrate her ability to adapt and learn quickly in a dynamic wo