In [15]:
from langchain_community.llms import Ollama
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [16]:
model = Ollama(model='llama3.1')

In [17]:
# loader = PyPDFLoader('resume')
loader = PyPDFLoader('hashim')
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
final_doc = text_splitter.split_documents(document)

In [18]:
ollama_embeddings = OllamaEmbeddings(
    model='llama3.1',
    model_kwargs={'device':'gpu'}
)

In [19]:
vectors = FAISS.from_documents(final_doc,ollama_embeddings)

In [20]:
retriever  = vectors.as_retriever()

In [21]:
prompt = ChatPromptTemplate.from_template(
"""
Provide me the accurate answers regarding the context.
<context>
{context}
<context>
Question:{input}
"""
)


In [22]:
document_chain = create_stuff_documents_chain(model,prompt)

In [23]:
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [24]:
question = """Please extract the following details from the provided resume:

1. **Candidate’s Name**: The full name of the candidate.
2. **Years of Experience**: Total years of relevant work experience.
3. **Industry**: The industry or field in which the candidate has worked.
4. **Key Skills**: A list of primary skills mentioned in the resume.
5. **Achievements**: Notable accomplishments or achievements listed in the resume.

Provide the extracted information in the following format:

**Candidate’s Name**: [Name]
**Years of Experience**: [Years]
**Industry**: [Industry]
**Key Skills**: [Skill 1], [Skill 2], [Skill 3]
**Achievements**: [Achievement 1], [Achievement 2]
**Previous Roles**: [Role 1] at [Current Company]

Make sure to carefully review the resume and provide accurate and complete details.
    """

In [25]:
response = retrieval_chain.invoke({'input':question})

In [26]:
response['answer']

"**Candidate’s Name**: Mohd Hashim\n**Years of Experience**: 4+ years (Note: The exact number is not mentioned, but based on the provided information, it can be estimated that he has around 4-5 years of experience)\n**Industry**: Data Engineering/Technology\n**Key Skills**: Python, PySpark, Pandas, NumPy, SQL, Power BI, Azure Data Factory, Azure Databricks, Azure Functions, Git, PowerShell, Azure Logic App, Azure blob storage, Azure data lake, Rest API's, Postman, VS Code, Pycharm, Knime, Excel\n**Achievements**: \n- Developed an interactive Power BI dashboard to track raw material consumption, production trends, critical inventory, purchase tracking, non-performing inventory, and machinery downtime, resulting in improved operational efficiency, 20% reduction in machinery downtime, and $40,000 annual cost saving.\n- Achieved a 90% inventory forecast accuracy by utilizing Databricks platform to create robust ETL pipelines with Python, PySpark, and SQL.\n**Previous Roles**: \n- Data Engi