In [5]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import OllamaLLM
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

In [5]:
loader = PyPDFLoader('data\cv.pdf')
docs = loader.load()
docs

[Document(metadata={'producer': 'pdfmake', 'creator': 'pdfmake', 'creationdate': '2025-01-24T06:27:35+00:00', 'title': 'Resume', 'author': 'Kinobi', 'subject': 'Resume', 'keywords': 'Resume', 'source': 'data\\cv.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content="JONATHAN LEXI FEBRIAN SITOHANG\n089637074190 | jonathanlexi39@gmail.com | https://www.linkedin.com/in/jonathanlexi/ | https://github.com/jonathanlex1\nA graduate of Universitas Sumatera Utara with a GPA of 3.71 in computer science who is interested in AI algorithms and capable in \nPython, SQL, PyTorch, Flask, and Streamlit. Good in problem-solving, collaboration and effective communication, with a love of learning \nand a dedication to creativity in tackling real-world problems\nProject Experience\nAgrisense: Plant Health Monitoring Application based on \nMulticlass Image Classification to Detect and Diagnose \nDiseases in Food Plants -  Startup Campus\nAug 2023 - Dec 2023\nCapstone Project Kampus Merdeka Bat

In [6]:
#split document
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100, separators=['\n'])
docs = splitter.split_documents(docs)
docs

[Document(metadata={'producer': 'pdfmake', 'creator': 'pdfmake', 'creationdate': '2025-01-24T06:27:35+00:00', 'title': 'Resume', 'author': 'Kinobi', 'subject': 'Resume', 'keywords': 'Resume', 'source': 'data\\cv.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='JONATHAN LEXI FEBRIAN SITOHANG\n089637074190 | jonathanlexi39@gmail.com | https://www.linkedin.com/in/jonathanlexi/ | https://github.com/jonathanlex1\nA graduate of Universitas Sumatera Utara with a GPA of 3.71 in computer science who is interested in AI algorithms and capable in \nPython, SQL, PyTorch, Flask, and Streamlit. Good in problem-solving, collaboration and effective communication, with a love of learning \nand a dedication to creativity in tackling real-world problems\nProject Experience\nAgrisense: Plant Health Monitoring Application based on'),
 Document(metadata={'producer': 'pdfmake', 'creator': 'pdfmake', 'creationdate': '2025-01-24T06:27:35+00:00', 'title': 'Resume', 'author': 'Kinobi', 'subje

## Ollama Models

In [None]:
#embedding 
embedding_model = OllamaEmbeddings(model='llama3.2')
#vectorstoredb 
faiss_db = FAISS.from_documents(docs, embedding_model)
faiss_db

<langchain_community.vectorstores.faiss.FAISS at 0x1a4b48a2ed0>

In [29]:
result = faiss_db.similarity_search_with_score('the job needs Python, Sql, Machine Learning and work experience')
result[-1]

(Document(id='794d022b-9454-4b0d-ac8e-341ae95a5d3b', metadata={'producer': 'pdfmake', 'creator': 'pdfmake', 'creationdate': '2025-01-24T06:27:35+00:00', 'title': 'Resume', 'author': 'Kinobi', 'subject': 'Resume', 'keywords': 'Resume', 'source': 'data\\cv.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Project Experience\nAgrisense: Plant Health Monitoring Application based on \nMulticlass Image Classification to Detect and Diagnose \nDiseases in Food Plants -  Startup Campus\nAug 2023 - Dec 2023\nCapstone Project Kampus Merdeka Batch 5 - Artificial Intelligence\nDeveloped and refined ideas using the Business Model Canvas to align with goals and create business value.\nImplemented an EfficientNet-based model for multiclass image classification to diagnose plant diseases (cucumber, potato, tomato). \nTrained on 2,400 images and tested on 480, achieving 97.73% accuracy.'),
 np.float32(1.3529248))

### Retriever

In [39]:
#create prompt template
prompt = ChatPromptTemplate([
    ("system", "You are a professional HR assistant helping evaluate how well a CV fits a job description."),
    ("human", 
     "Job Requirement:\n{input}\n\n"
     "Relevant Content from the CV:\n{context}\n\n"
     "Analyze the match and list strengths, gaps, and a recommendation.")
])

In [32]:
llm = OllamaLLM(model='llama3.2', temperature=0)

In [44]:
document_chain = create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a professional HR assistant helping evaluate how well a CV fits a job description.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='Job Requirement:\n{input}\n\nRelevant Content from the CV:\n{context}\n\nAnalyze the match and list strengths, gaps, and a recommendation.'), additional_kwargs={})])
| OllamaLLM(model='llama3.2', temperature=0.0)
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [45]:
retriever = faiss_db.as_retriever()
retriever_chain = create_retrieval_chain(retriever, document_chain)
retriever_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001A4B48A2ED0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a professional HR assistant helping evaluate how well a CV fits a job description.'), additional_kwargs={}), HumanMessagePromptTemplate(prom

In [54]:
result = retriever_chain.invoke({'input' : """

Bachelor’s/Master’s/Ph.D. in a quantitative field (Computer Science, Data Science, Statistics, Economics, Finance, or related, 3+ years of experience in data science, preferably with a focus on credit risk modeling, loan portfolio analytics, or financial risk assessment,
Strong knowledge of credit risk/anti-fraud in the consumer lending domain,
Expertise in Python and machine learning/statistics libraries (scikit-learn, LGBM, XGBoost, etc),
Proficiency in SQL and data processing libraries for analyzing large-scale credit and transaction datasets,
Experience with cloud-based technologies (GCP preferred) and Docker for scalable model deployment,
Ability to translate business objectives into data-driven risk strategies and communicate findings effectively,

"""
})
result['answer']


"Based on the job requirement and the CV provided, here's an analysis of the match:\n\n**Strengths:**\n\n1. Relevant technical skills: The candidate has expertise in Python, SQL, machine learning libraries (scikit-learn, LGBM, XGBoost), and data processing libraries (Pandas, Numpy).\n2. Experience with cloud-based technologies: Although not explicitly stated, the candidate's experience with GCP is implied through their work on the Agrisense project.\n3. Familiarity with machine learning models: The candidate has worked on various projects involving machine learning models, including Convolutional Neural Networks (CNN) and EfficientNet-based models.\n4. Analytical skills: The candidate has demonstrated analytical skills through their work on data analysis and visualization tools like Power BI and Streamlit.\n\n**Gaps:**\n\n1. Lack of direct experience in credit risk modeling or loan portfolio analytics: Although the candidate has worked with large-scale datasets, there is no explicit me

## HuggingFace Models

In [8]:
import os 

In [9]:
load_dotenv()
os.environ['huggingfacehub_api_token'] = os.getenv('huggingfacehub_api_token')

In [10]:
huggingfacehub_api_token = os.getenv('huggingfacehub_api_token')

In [10]:
#load embedding hf model 
hf_embedding_model = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
hf_faiss_db = FAISS.from_documents(docs, hf_embedding_model)

In [12]:
result = hf_faiss_db.similarity_search('the job needs Python, Sql, Machine Learning and work experience')
result

[Document(id='15b2e821-34b4-4121-95b2-9b14bcc62410', metadata={'producer': 'pdfmake', 'creator': 'pdfmake', 'creationdate': '2025-01-24T06:27:35+00:00', 'title': 'Resume', 'author': 'Kinobi', 'subject': 'Resume', 'keywords': 'Resume', 'source': 'data\\cv.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='(88%), and effectively reducing model overfitting.\nDeployed machine learning models into web applications using Flask, ensuring seamless integration and user-friendly accessibility.\nCoffeeshop Sales Analysis and Forecasting Dashboard \nSQL, Power BI, Python\nPersonal Project\nConducted end-to-end analysis of coffee sales data using SQL, encompassing data cleaning, creation of interactive dashboards in \nPower BI, and time series forecasting to uncover trends and insights.\nEducation\nUniversitas Sumatera Utara - Medan, Sumatera Utara Aug 2020 - Dec 2024\nBachelor of Computer Science, 3.71/4.00'),
 Document(id='639b0412-0983-47f1-83ba-5f6a744b5cbe', metadata={'produc

### Retriever

In [6]:
from huggingface_hub import InferenceClient
from langchain_community.llms import HuggingFaceHub
from transformers import pipeline 

In [11]:
from huggingface_hub import InferenceClient

# Ganti 'your_api_token' dengan token API Anda
client = InferenceClient(token=huggingfacehub_api_token)

# Jalankan inferensi
response = client.text_generation(
    model="microsoft/Phi-4-reasoning-plus",
    inputs="Who are you?",
    parameters={"max_new_tokens": 50}
)

print(response)

TypeError: InferenceClient.text_generation() got an unexpected keyword argument 'inputs'

In [9]:
input = 2

def inputData(input:int) : 
    #if not isinstance(input, str) :
     #   raise ValueError('data must be string') 
    print(input)

type(inputData(input))   

2


NoneType