In [1]:
import os
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter  import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama
from langchain.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [2]:
path = "./cv"
persist_directory= "./vectorstore/db"
n_doc = 3 ##number of documents return

loader = DirectoryLoader(path = path, show_progress=True)
documents = loader.load()

embeddings = OllamaEmbeddings(base_url=os.getenv("OLLAMA_ADDR"), model=os.getenv("OLLAMA_MODEL"))

llm = Ollama(base_url=os.getenv("OLLAMA_ADDR"), model=os.getenv("OLLAMA_MODEL"))

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:08<00:00,  1.20it/s]


In [7]:
text_splitter  = RecursiveCharacterTextSplitter(chunk_size=10000)
text_splitter.split_documents(documents)
vectorstore = Chroma.from_documents(persist_directory=persist_directory, embedding=embeddings, documents=documents)
retriver = vectorstore.as_retriever(search_type="mmr",search_kwargs= {"k": n_doc} )

In [9]:
def file_reader(path):
    with open(path, mode="r+", encoding= "utf-8") as f:
        return f.read()

In [14]:
jd_path = "jd\job_description.txt"
jd=file_reader(jd_path)
print(jd)

responsibilities:
• Design, develop, and maintain multiple databases within the data warehouse. 
• Create, implement, and test data models and database management systems.
• Conduct research and support internal/external groups with the selection and implementation of applications and database management tools.
• Identify and utilize database management systems to aggregate and analyze data. 
• In collaboration with the VP, IBT and the Senior Data Warehouse Engineer, develop and implement data administration policies, standards, and models. 
• Work with internal departments and colleagues to understand and document specific data and user requirements, data collection and administration policies, and data access rules. 
• Develop and implement procedures for determining network access and usage and for the backup and recovery of data. 
• Write scripts to support automation, data extraction, and reporting.
• Design and automate routine and self-service reporting solutions (i.e., Power BI

In [15]:
screen_result = retriver.invoke(F"""find the CV with the best match with the JD as below:
   {jd}
    """)

In [16]:
screen_result

[Document(page_content='Name: Emily Smith\n\nEmail: emily.smith@example.com\n\nPhone: +1-555-901-2345\n\nLocation: Vancouver, BC\n\nProfessional Summary\n\nAnalytical and detail-oriented data analyst with 2 years of experience in data cleaning, analysis, and visualization. Strong background in economics and statistics.\n\nSkills\n\nPython, R\n\nData Visualization (Tableau, Power BI)\n\nSQL\n\nExcel\n\nStatistical Analysis\n\nWork Experience\n\nData Analyst\n\nMarket Research Inc., Vancouver, BC\n\nMarch 2022 - Present\n\nConducted data analysis to support market research projects.\n\nCreated visualizations to communicate findings to clients.\n\nResearch Assistant\n\nUniversity of British Columbia, Vancouver, BC\n\nJanuary 2020 - February 2022\n\nAssisted in economic research and data analysis.\n\nEducation\n\nBachelor of Arts in Economics\n\nUniversity of British Columbia, Vancouver, BC\n\n2015 - 2019\n\nCertifications\n\nCertified Business Intelligence Professional (CBIP)', metadata={

In [17]:
retriever = Chroma.from_documents(documents=screen_result, embedding=embeddings).as_retriever()

In [18]:

template = """Summarize and highlight the experience, certificate or education in the CV which match with the JD as below:

CV:
{context}

JD:
{job_description}

"""
prompt = ChatPromptTemplate.from_template(template)
model = llm


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever , "job_description": lambda x: jd}
    | prompt
    | model
    | StrOutputParser()
)

res =chain.invoke("")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


In [96]:
print(res)

Based on the CVs provided, here are some highlights that match with the Job Description (JD):

**CV 1: Emily Smith**

* Education:
	+ Bachelor of Arts in Economics from University of British Columbia (2015-2019)
		- This degree is relevant to the JD's requirement for "strong background in economics and statistics".
* Certifications:
	+ Certified Business Intelligence Professional (CBIP) - This certification is related to data visualization, analysis, and reporting, which are mentioned in the JD.
* Skills:
	+ Data Visualization (Tableau, Power BI)
	+ Statistical Analysis
	+ Excel

These skills align with the JD's requirements for designing, developing, and maintaining databases, creating data models, and implementing data administration policies.

**CV 2: Michael Brown**

* Education:
	+ Bachelor of Science in Software Engineering from Dalhousie University (2013-2017)
		- This degree is relevant to the JD's requirement for "background in software engineering".
* Certifications:
	+ AWS C