# basic import

In [1]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

# Retrieve HF_TOKEN from the environment variables
hf_token = os.getenv("HF_TOKEN")

llm = HuggingFaceInferenceAPI(
    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
    temperature=0.7,
    max_tokens=100,
    token=hf_token,
    provider="auto"
)

response = llm.complete("Hello, how are you?")
print(response)
# I am good, how can I help you today?

  from .autonotebook import tqdm as notebook_tqdm


Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


In [2]:
from llama_index.core import SimpleDirectoryReader

pdf_path = '/Users/kimduhyeon/Downloads/Duhyeon_CV._oct09pdf.pdf'
reader = SimpleDirectoryReader(input_files=[pdf_path])
documents = reader.load_data()

print(documents[:500])



[Document(id_='200db532-a575-4b85-8f9b-5aa513d50c0b', embedding=None, metadata={'page_label': '1', 'file_name': 'Duhyeon_CV._oct09pdf.pdf', 'file_path': '/Users/kimduhyeon/Downloads/Duhyeon_CV._oct09pdf.pdf', 'file_type': 'application/pdf', 'file_size': 404671, 'creation_date': '2025-10-09', 'last_modified_date': '2025-10-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='DUHYEON  KIM  (അ) \n \nProﬁle \nEngineer with expertise in HDL-based hardware design and AI/system architecture. \nExperienced in bridging low-level hardware optimization with high-level AI \napplications to deliver impactful, efﬁcient solutions. \nEducation \nKorea Un

# pymupdf

In [3]:
import fitz  # PyMuPDF
from llama_index.core import Document

# PDF open
doc = fitz.open('/Users/kimduhyeon/Downloads/Duhyeon_CV._oct09pdf.pdf')

# text extract
full_text = ""
for page in doc:
    full_text += page.get_text()

# LlamaIndex Document
documents = [Document(text=full_text)]

print(f"extracted text: {full_text[:500]}")
print(documents[:500])


extracted text: DUHYEON KIM (김두현) 
 
Profile 
Engineer with expertise in HDL-based hardware design and AI/system architecture. 
Experienced in bridging low-level hardware optimization with high-level AI 
applications to deliver impactful, efficient solutions. 
Education 
Korea University, Seoul, South Korea 
🎓 M.S., Semiconductor System Engineering, Mar 2026 - Expected Feb 2028 
🎓 B.S., Electrical Engineering, Mar 2019 - Aug 2025 
• GPA 4.05/4.5 
Relevant Coursework 
• Hardware Design - Digital System Design(A+
[Document(id_='24dfa4b4-3bae-4a4f-887f-3f8089611d8d', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='DUHYEON KIM (김두현) \n \nProfile \nEngineer with expertise in HDL-based hardware design and AI/system architecture. \nExperienced in bridging low-level hardware optimization with high-level AI \nap

# chunking

In [4]:
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_overlap=0, chunk_size=256),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ]
)

# nodes = await pipeline.arun(documents=[Document.example()])
nodes = await pipeline.arun(documents=documents)

2025-10-16 13:58:24,262 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-10-16 13:58:27,666 - INFO - 1 prompt is loaded, with the key: query


In [None]:
node = nodes[0]

print(f"Text: {node.text[:100]}")
print(f"Embedding Dimension: {len(node.embedding)}")
print(f"Embedding Sample: {node.embedding[:10]}")
print(f"ID: {node.id_}")
print(f"Metadata: {node.metadata}")


Text: DUHYEON KIM (김두현) 
 
Profile 
Engineer with expertise in HDL-based hardware design and AI/system arc
Embedding Dimension: 384
Embedding Sample: [-0.07480106502771378, 0.04825197532773018, 0.058346476405858994, -0.07109431177377701, 0.04105697199702263, -0.02796272002160549, -0.0050787730142474174, 0.03678007796406746, -0.030957909300923347, -0.036979757249355316, 0.0470576174557209, -0.0528317391872406, 0.06541488319635391, -0.02232913114130497, 0.030958550050854683, -0.019247766584157944, -0.03825705498456955, 0.004458132199943066, 0.06012910231947899, -0.007345608435571194, 0.015679555013775826, -0.047658734023571014, -0.06091010570526123, -0.049731191247701645, -0.017749156802892685, -0.0427025742828846, 0.06439231336116791, -0.05228588357567787, -0.01951979473233223, -0.14514243602752686, 0.03283509239554405, 0.05806897208094597, 0.027195489034056664, 0.026477361097931862, -0.04303948953747749, 0.001907501369714737, 0.010908858850598335, 0.04465324804186821, -0.01588239893317

In [6]:
from IPython.display import HTML, display

# 색상 팔레트
colors = ['#FFE5E5', '#E5F5FF', '#E5FFE5', '#FFF5E5', '#F5E5FF']

# 각 청크를 색상별로 출력
for i, node in enumerate(nodes):
    color = colors[i % len(colors)]  # 색상 순환
    html = f"""
    <div style="background-color: {color}; padding: 15px; margin: 10px 0; border-radius: 8px; border: 2px solid {color[:-2]}CC;">
        <h3 style="color: #333; margin-top: 0;">📄 Chunk #{i}</h3>
        <pre style="white-space: pre-wrap; font-family: 'Courier New', monospace; color: #333;">{node.text}</pre>
    </div>
    """
    display(HTML(html))

# chromaDB

In [7]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient(path="./cv_chroma_db")
chroma_collection = db.get_or_create_collection("cv_db")        # easily the folder in DB
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=256, chunk_overlap=0),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents)

2025-10-16 13:58:29,865 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-10-16 13:58:30,226 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-10-16 13:58:33,054 - INFO - 1 prompt is loaded, with the key: query


# indexing

In [8]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

2025-10-16 13:58:33,171 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-10-16 13:58:36,015 - INFO - 1 prompt is loaded, with the key: query


In [9]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct",provider="nscale",)
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
query_engine.query("What is the meaning of life?")
# The meaning of life is 42

Response(response="The meaning of life is a philosophical question that has been pondered by many throughout history. It involves exploring the purpose and significance of existence. While the skills and experiences mentioned suggest a focus on technology and engineering, these do not directly address the existential question of life's meaning. This is a deeply personal and subjective topic, often influenced by one's beliefs, values, and experiences.", source_nodes=[NodeWithScore(node=TextNode(id_='d54ee6c6-40c5-49e4-b37b-06f5c8a03374', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='3a1881fd-4906-4c36-a48b-9d5a4257c360', node_type='4', metadata={}, hash='7d88fb7fae064b6f52eb39c83ddaadddd48878b0e57141c6e014da47759da497'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='24ae30e8-3cec-446d-8e25-67adcd43c5b4', node_type='1', metadata={}, hash='69e19f1401b6affec0b

In [10]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
query_engine.query("What is the meaning of life?")
# The meaning of life is 42

HfHubHTTPError: 404 Client Error: Not Found for url: https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1/chat/completions (Request ID: Root=1-68f07b80-010713bd7158104e2afcb644;02ff663a-b0ed-4265-ae30-b5742cc202d5)

# llama_parse