## Set the device to GPU

In [1]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Load multiple documents

In [2]:
import os
from langchain.document_loaders import PyPDFLoader

def load_documents():
    documents = []
    files = ['docs/linkedin_profile.pdf', 'docs/biography.pdf']
    
    for file in files:
        if os.path.exists(file):
            loader = PyPDFLoader(file)
            documents.extend(loader.load())
        else:
            print(f'Warning: {file} not found, skipping...')
    return documents

## Chunking documents for better retrieval

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000, chunk_overlap=200
    )
    return text_splitter.split_documents(documents)

#  Generate Embeddings and Create FAISS Index

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

def create_faiss_index(docs, embedding_model='all-MiniLM-L6-v2'):
    try:
        print('create_faiss_index: downloading embedding model if not available...')
        embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
        vector_db = FAISS.from_documents(docs, embeddings)
        vector_db.save_local('vector-store/faiss_index')
        
        print('FAISS index created successfully.')
        return vector_db
    except Exception as e:
        print(f'unable to create FAISS index: {e}')
        return None

## Load Mistral-7B (Q4_K_M)

In [5]:
from llama_cpp import Llama

def load_llm(model_name='mistral-7b-instruct-v0.1.Q4_K_M.gguf'):
    model_path = os.path.join('models', model_name)
    if not os.path.exists(model_path):
        return None
    
    try:
        return Llama(model_path=model_path, n_gpu_layers=30, n_threads=8, n_ctx=1024)
    except Exception as e:
        print(f'Error loading mistral model: {e}')
        return None

## Implement Retrieval-Augmented Generation (RAG) with improved prompting

In [6]:
def rag_pipeline(query, vector_db, llm):
    if vector_db is None:
        return 'rag_pipeline: No vector database available.'
    
    retrieved_docs = vector_db.similarity_search(query, k=4)
    source_documents = [doc.metadata for doc in retrieved_docs]
    context = '\n'.join([doc.page_content for doc in retrieved_docs])
    
    prompt = f'''
    You are an AI assistant designed to answer questions about Kaung SiThu. 
    Be gentle, informative, and concise. If you don't know an answer, politely say no.
    
    Context:
    {context}
    
    User Question: {query}
    '''
    
    response = llm(prompt)
    return {
        'answer': response['choices'][0]['text'].strip(),
        'sources': source_documents
    }

## Evaluate retrieval and generation models

In [7]:
def evaluate_models():
    print('evaluate_model: evaluating retriever model (FAISS)...')
    try:
        faiss_index = FAISS.load_local(
            'faiss_index', 
            HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'), 
            allow_dangerous_deserialization=True
            )
        print(f'evaluate_model: FAISS index successfully loaded and functional.\n {faiss_index}')
    except Exception as e:
        print(f'evaluate_model: failed to load FAISS index: {e}')
    
    print('evaluate_model: evaluating generator model mistral...')
    try:
        llm = load_llm()    
        response = llm('evaluate_model: test query: What is machine learning?')
        print(f'evaluate_model: {response}')
    except Exception as e:
        print(f'evaluate_model: error with mistral model inference: {e}')

## Executing the pipeline

In [8]:
print('loading documents')
documents = load_documents()
docs = chunk_documents(documents)
docs

loading documents


[Document(metadata={'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content="Contact\n+959779056197 (Mobile)\nneucleyon@gmail.com\nwww.linkedin.com/in/kaung-\nsithu-634ab2160 (LinkedIn)\nwww.facebook.com/archx64/\n(Personal)\nTop Skills\nData Science\nArtificial Intelligence (AI)\nNeural Networks\nCertifications\nMachine Learning by Stanford\nUniversity & DeepLearning.AI on\nCoursera\nSpring Framework\nSupervised Machine Learning:\nRegression and Classification \nAdvanced Learning Algorithms\nUnsupervised Learning,\nRecommenders, Reinforcement\nLearning\nKaung SiThu\nData-Driven Problem Solver\nYangon, Myanmar\nSummary\nI'm just an ordinary guy who is enthusiastic on technology and\nscience\nExperience\nEngineerforce\nPython Developer\nMay 2023\xa0-\xa0Nove

In [9]:
print(f'size of documents: {len(docs)}')

size of documents: 2


In [10]:
print('Creating FAISS index...')
vector_db = create_faiss_index(docs)
vector_db

Creating FAISS index...
create_faiss_index: downloading embedding model if not available...
FAISS index created successfully.


<langchain_community.vectorstores.faiss.FAISS at 0x28158f7c6e0>

In [11]:
llm = load_llm()
evaluate_models()

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from models\mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 lla

evaluate_model: evaluating retriever model (FAISS)...


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from models\mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 lla

evaluate_model: failed to load FAISS index: Error in __cdecl faiss::FileIOReader::FileIOReader(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:68: Error: 'f' failed: could not open faiss_index\index.faiss for reading: No such file or directory
evaluate_model: evaluating generator model mistral...


load_tensors:   CPU_Mapped model buffer size =  4165.37 MiB
.................................................................................................
llama_init_from_model: n_seq_max     = 1
llama_init_from_model: n_ctx         = 1024
llama_init_from_model: n_ctx_per_seq = 1024
llama_init_from_model: n_batch       = 512
llama_init_from_model: n_ubatch      = 512
llama_init_from_model: flash_attn    = 0
llama_init_from_model: freq_base     = 10000.0
llama_init_from_model: freq_scale    = 1
llama_init_from_model: n_ctx_per_seq (1024) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
llama_kv_cache_init: kv_size = 1024, offload = 1, type_k = 'f16', type_v = 'f16', n_layer = 32, can_shift = 1
llama_kv_cache_init: layer 0: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024
llama_kv_cache_init: layer 1: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024
llama_kv_cache_init: layer 2: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024
llama_kv_cache_init: layer 3: n_embd_k_gqa = 1024, n_e

evaluate_model: {'id': 'cmpl-ea5892aa-f5be-424f-8fe8-698fcc60911a', 'object': 'text_completion', 'created': 1741883998, 'model': 'models\\mistral-7b-instruct-v0.1.Q4_K_M.gguf', 'choices': [{'text': '', 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 13, 'completion_tokens': 0, 'total_tokens': 13}}


In [12]:
questions = [   
    'How old is Kaung Sithu?',
    'What is Kaung Sithu\'s highest level of education?',
    'What major or field of study did Kaung pursue during your education?',
    'How many years of work experience does Kaung have',
    'What type of work or industry has Kaung been involved in?',
    'Can you describe Kaung\'s current role or job responsibilities?',
    'What are Kaung\'s core beliefs regarding the role of technology in shaping society?',
    'How does Kaung think cultural values should influence technological advancements?',
    'As a master’s student, what is the most challenging aspect of his studies so far?',
    'What specific research interests or academic goals does Kaung hope to achieve during your time as a master’s student?'
    ]
answers = []
for q in questions:
    result = rag_pipeline(q, vector_db, llm)
    answers.append({'question': q, 'answer': result['answer'], 'sources': result['sources']})
    print(f'Q: {q}\nResponse: {result['answer']}\nSources: {result['sources']}\n')

llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =   54535.54 ms /   804 tokens (   67.83 ms per token,    14.74 tokens per second)
llama_perf_context_print:        eval time =    2606.61 ms /    15 runs   (  173.77 ms per token,     5.75 tokens per second)
llama_perf_context_print:       total time =   57151.48 ms /   819 tokens
Llama.generate: 793 prefix-match hit, remaining 16 prompt tokens to eval


Q: How old is Kaung Sithu?
Response: AI Assistant: I'm sorry, but I don't have
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1196.60 ms /    16 tokens (   74.79 ms per token,    13.37 tokens per second)
llama_perf_context_print:        eval time =    2581.08 ms /    15 runs   (  172.07 ms per token,     5.81 tokens per second)
llama_perf_context_print:       total time =    3787.88 ms /    31 tokens
Llama.generate: 794 prefix-match hit, remaining 15 prompt tokens to eval


Q: What is Kaung Sithu's highest level of education?
Response: AI Assistant: Kaung Sithu holds a Bachelor of Engineering (
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1090.86 ms /    15 tokens (   72.72 ms per token,    13.75 tokens per second)
llama_perf_context_print:        eval time =    2593.89 ms /    15 runs   (  172.93 ms per token,     5.78 tokens per second)
llama_perf_context_print:       total time =    3694.54 ms /    30 tokens
Llama.generate: 793 prefix-match hit, remaining 12 prompt tokens to eval


Q: What major or field of study did Kaung pursue during your education?
Response: AI: Kaung pursued a Bachelor of Engineering (BE) in
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =     887.68 ms /    12 tokens (   73.97 ms per token,    13.52 tokens per second)
llama_perf_context_print:        eval time =    2545.50 ms /    15 runs   (  169.70 ms per token,     5.89 tokens per second)
llama_perf_context_print:       total time =    3443.15 ms /    27 tokens
Llama.generate: 793 prefix-match hit, remaining 15 prompt tokens to eval


Q: How many years of work experience does Kaung have
Response: Answer: Kaung has five years of work experience. He worked as
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1104.62 ms /    15 tokens (   73.64 ms per token,    13.58 tokens per second)
llama_perf_context_print:        eval time =    2545.35 ms /    15 runs   (  169.69 ms per token,     5.89 tokens per second)
llama_perf_context_print:       total time =    3658.48 ms /    30 tokens
Llama.generate: 793 prefix-match hit, remaining 16 prompt tokens to eval


Q: What type of work or industry has Kaung been involved in?
Response: Answer: Kaung has been involved in the software development industry and has
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1165.58 ms /    16 tokens (   72.85 ms per token,    13.73 tokens per second)
llama_perf_context_print:        eval time =    2528.89 ms /    15 runs   (  168.59 ms per token,     5.93 tokens per second)
llama_perf_context_print:       total time =    3704.54 ms /    31 tokens
Llama.generate: 793 prefix-match hit, remaining 20 prompt tokens to eval


Q: Can you describe Kaung's current role or job responsibilities?
Response: Answer: Kaung is currently pursuing a Master of Engineering in Data
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1452.65 ms /    20 tokens (   72.63 ms per token,    13.77 tokens per second)
llama_perf_context_print:        eval time =    2527.81 ms /    15 runs   (  168.52 ms per token,     5.93 tokens per second)
llama_perf_context_print:       total time =    3990.58 ms /    35 tokens
Llama.generate: 793 prefix-match hit, remaining 15 prompt tokens to eval


Q: What are Kaung's core beliefs regarding the role of technology in shaping society?
Response: Answer: Kaung SiThu believes that technology has the potential to
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1107.03 ms /    15 tokens (   73.80 ms per token,    13.55 tokens per second)
llama_perf_context_print:        eval time =    2535.64 ms /    15 runs   (  169.04 ms per token,     5.92 tokens per second)
llama_perf_context_print:       total time =    3651.40 ms /    30 tokens
Llama.generate: 793 prefix-match hit, remaining 21 prompt tokens to eval


Q: How does Kaung think cultural values should influence technological advancements?
Response: Answer: Kaung SiThu believes that cultural values play a crucial
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1495.45 ms /    21 tokens (   71.21 ms per token,    14.04 tokens per second)
llama_perf_context_print:        eval time =    2608.47 ms /    15 runs   (  173.90 ms per token,     5.75 tokens per second)
llama_perf_context_print:       total time =    4113.40 ms /    36 tokens
Llama.generate: 793 prefix-match hit, remaining 25 prompt tokens to eval


Q: As a master’s student, what is the most challenging aspect of his studies so far?
Response: AI Assistant: As a master’s student, Kaung SiTh
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]



llama_perf_context_print:        load time =   54536.49 ms
llama_perf_context_print: prompt eval time =    1820.89 ms /    25 tokens (   72.84 ms per token,    13.73 tokens per second)
llama_perf_context_print:        eval time =    2880.03 ms /    15 runs   (  192.00 ms per token,     5.21 tokens per second)
llama_perf_context_print:       total time =    4709.57 ms /    40 tokens


Q: What specific research interests or academic goals does Kaung hope to achieve during your time as a master’s student?
Response: Answer: Kaung Sithu is currently pursuing a Master of
Sources: [{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-12T23:10:36+07:00', 'author': 'Kaung Sithu', 'moddate': '2025-03-12T23:10:36+07:00', 'source': 'docs/biography.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, {'producer': 'Apache FOP Version 2.2', 'creator': 'PyPDF', 'creationdate': '2025-03-10T15:50:07+00:00', 'title': 'Resume', 'author': 'LinkedIn', 'subject': 'Resume generated from profile', 'source': 'docs/linkedin_profile.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}]

