In [1]:
import markdown
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer

from langchain.document_loaders import TextLoader
from langchain.schema import Document

from pathlib import Path

from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


ImportError: dlopen(/opt/anaconda3/envs/czb_llm/lib/python3.10/site-packages/torch/_C.cpython-310-darwin.so, 0x0002): Symbol not found: __ZN4absl12lts_2025012712log_internal10LogMessagelsIiTnNSt3__19enable_ifIXntsr4absl16HasAbslStringifyIT_EE5valueEiE4typeELi0EEERS2_RKS6_
  Referenced from: <F0CE594F-5059-3403-BEDE-CC2EF3170AD7> /opt/anaconda3/envs/czb_llm/lib/libprotobuf.29.3.0.dylib
  Expected in:     <621B4947-F73F-3962-8DDB-2484D6B77411> /opt/anaconda3/envs/czb_llm/lib/libabsl_log_internal_message.2501.0.0.dylib

# Loading documents

In [12]:
documents = {}

for path in Path("./documents").glob("*.md"):
    loader = TextLoader(str(path), encoding="utf-8")
    documents[path.name] = loader.load()

In [13]:
documents['languages.md'][0].page_content

'## Languages\n- **English** – Level C1  \n- **German** – Level B2'

In [14]:
docs = []

for doc in documents.values():
    docs.extend(doc[0].page_content.split("\n\n"))

In [15]:
text_docs = [Document(page_content=doc) for doc in docs]

# Embedding

In [16]:
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')

In [17]:
embedding_model = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

vectorstore = FAISS.from_documents(text_docs, embedding=embedding_model)

In [18]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# Loading Llama model

In [19]:


model_id = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

In [20]:
llm = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use mps:0


# Query router

In [21]:
def is_query_relevant(query: str) -> bool:
    prompt = f"""
Decide if the following question is related to a personal CV or biography.

Question: "{query}"

Answer with only "relevant" or "irrelevant".
"""
    response = llm(prompt, max_new_tokens=10, do_sample=False)[0]['generated_text']
    return "relevant" in response.lower()

# RAG answer generator

In [22]:
def build_rag_prompt(context_docs, query):
    context = "\n\n".join([doc.page_content for doc in context_docs])
    return f"""
You are a helpful assistant answering questions about a person's CV and biography.

Context:
{context}

Question:
{query}

Answer:
"""

def generate_rag_answer(query):
    docs = retriever.get_relevant_documents(query)
    prompt = build_rag_prompt(docs, query)
    response = llm(prompt, max_new_tokens=300, do_sample=True, temperature=0.7)[0]["generated_text"]
    return response.strip()

# Chatbot

In [23]:
def chatbot_response(query):
    if is_query_relevant(query):
        return generate_rag_answer(query)
    else:
        return "This question doesn’t seem to relate to my CV or personal profile. Please ask something else."


In [24]:
chatbot_response("What is your name?")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Unexpected exception formatting exception. Falling back to standard exception


  docs = retriever.get_relevant_documents(query)
Traceback (most recent call last):
  File "/opt/anaconda3/envs/czb_llm/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3579, in run_code
  File "/var/folders/nb/0ttbl47x5vgfpnkhns6r1f580000gn/T/ipykernel_3343/1021707190.py", line 1, in <module>
    chatbot_response("What is your name?")
  File "/var/folders/nb/0ttbl47x5vgfpnkhns6r1f580000gn/T/ipykernel_3343/3694801726.py", line 3, in chatbot_response
    return generate_rag_answer(query)
  File "/var/folders/nb/0ttbl47x5vgfpnkhns6r1f580000gn/T/ipykernel_3343/3849805201.py", line 16, in generate_rag_answer
    docs = retriever.get_relevant_documents(query)
  File "/opt/anaconda3/envs/czb_llm/lib/python3.10/site-packages/langchain_core/retrievers.py", line 411, in get_relevant_documents
  File "/opt/anaconda3/envs/czb_llm/lib/python3.10/site-packages/langchain_core/retrievers.py", line 234, in invoke
ModuleNotFoundError: No module named 'langchain_core.callbacks.manage

In [None]:
# CLI loop
if __name__ == "__main__":
    print("🤖 CV Chatbot (LLaMA + RAG)\nType 'exit' to quit.\n")
    while True:
        query = input("You: ")
        if query.lower() in {"exit", "quit"}:
            break
        print("Bot:", chatbot_response(query), "\n")