# Install dependencies

In [None]:
%pip install langchain openai faiss-cpu tiktoken sentence-transformers PyMuPDF ipywidgets deep-translator

In [None]:
%pip freeze > requirements.txt

# PDF faylni yuklab, hujjatni chunklash

In [None]:
# 2.1 PDF faylni yuklash va matnga aylantirish
from langchain.document_loaders import PyMuPDFLoader

# PDF fayl nomi
pdf_path = "data/book_1.pdf"

# Hujjatni yuklash
loader = PyMuPDFLoader(pdf_path)
documents = loader.load()

print(f"{len(documents)} ta sahifa yuklandi.")
print(documents[0].page_content[:300])  # 1-sahifadan parchani chiqaramiz


319 ta sahifa yuklandi.
JO N A V A R R O
SIZ
NIMANI 0‘YLASANGIZ
MEN
0‘SHANI KO'RAMAN



In [83]:
# 2.2 Chunklash (matnni bo‘laklarga ajratish)
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Chunking parametrlar
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
)

chunks = text_splitter.split_documents(documents)
print(f"{len(chunks)} ta chunk tayyor bo‘ldi.")


1132 ta chunk tayyor bo‘ldi.


# Embedding + FAISS Index yaratish

In [84]:
# 3.1 Embedding modelini chaqirish
from langchain.embeddings import HuggingFaceEmbeddings

# Embedding modeli
embedding_model = HuggingFaceEmbeddings(
    # model_name="intfloat/multilingual-e5-small"
    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" 
) #or sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2

# --------------- Ollama Embeddings ----------------
# Agar Ollama embeddings ishlatmoqchi bo'lsangiz, quyidagi kodni
# faqat Ollama o'rnatilgan bo'lsa ishlating:
# from langchain_community.embeddings import OllamaEmbeddings
# embedding_model = OllamaEmbeddings(model="nomic-embed-text")



In [85]:
# 3.2 FAISS bazaga joylash
from langchain.vectorstores import FAISS

# FAISS vektor bazasini yaratish
vector_db = FAISS.from_documents(chunks, embedding_model)

# Localga saqlash (keyingi bosqichda foydalanamiz)
vector_db.save_local("faiss_index")


# Retrieverni sozlash va LLM bilan birlashtirish

In [86]:
# 4.1 FAISS index’ni yuklash

from langchain.vectorstores import FAISS

# Oldin saqlangan bazani yuklaymiz
vector_db = FAISS.load_local("faiss_index", embedding_model)

# Retriever obyektini yasaymiz
retriever = vector_db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)


In [None]:
# 4.2 OpenAI LLM modelini sozlash

# from langchain.llms import OpenAI

# import os

# # OpenAI API key
# os.environ["OPENAI_API_KEY"] = "sk-proj-***"  # <-- bu yerga o‘zingizning API key’ingizni qo‘ying. (pullik)

# # LLM ni ishga tushuramiz (GPT-3.5)
# llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0)


# -------------------- Agar Ollama ishlatmoqchi bo'lsangiz --------------------
# Ollama modelini yuklab olish
# ollama run llama3 or (mistral)

from langchain_community.llms import Ollama

llm = Ollama(model="llama3")



In [96]:
# Custom Prompt
from langchain.prompts import PromptTemplate

template="""
You are an intelligent AI assistant helping a user who speaks English.
The user will ask you a question, and you should provide a clear, concise, and reliable answer based on the following context.
Answer the question based only on the context below.
If the answer is not in the context, say "I don't know."

📚 Context:
{context}

❓ Question: {question}

🤖 Answer:
"""

custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

In [97]:
# CombineDocumentsChain ichida ishlat

from langchain.chains import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

qa_prompt_chain = LLMChain(llm=llm, prompt=custom_prompt)

combine_docs_chain = StuffDocumentsChain(
    llm_chain=qa_prompt_chain,
    document_variable_name="context"
)

In [98]:
# 4.3 RAG chain yaratamiz
from langchain.chains import RetrievalQA

# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     return_source_documents=True
# )

qa_chain = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=combine_docs_chain,
    return_source_documents=True
)



In [99]:
# Translator
from deep_translator import GoogleTranslator

def translate_text(txt_input, source='uz', target='en'):
    """
    Translate text from Uzbek to English.
    """
    input_english = GoogleTranslator(source=source, target=target).translate(txt_input)
    return input_english


In [None]:
# Savol berish va javob olish
input_uzbek = "Qanday yolg‘onni aniqlash mumkin?"
print("❓ Savol:", input_uzbek)

input_english = translate_text(input_uzbek)
print("🔄 Ingliz tiliga tarjima:", input_english)

# Javob olish
result = qa_chain.invoke({"query": input_english})

output_english = result["result"]
# print("🔄 Javob ingliz tilida:", output_english)
print("-"*50)
output_uzbek = translate_text(output_english, source='en', target='uz')
print("🤖 Javob o'zbek tilida:\n", output_uzbek)

print("\n📖 Manbalar:")
for doc in result["source_documents"]:
    print("Fayl:", doc.metadata["source"])
    print("Sahifa:", doc.metadata.get("page", "Noma’lum"))
    print("Matn:\n", doc.page_content)
    print("------")


❓ Savol: Qanday yolg‘onni aniqlash mumkin?
🔄 Ingliz tiliga tarjima: What lies can be determined?
--------------------------------------------------
🤖 Javob o'zbek tilida:

 Taqdim etilgan kontekst asosida, matn yolg'on va yolg'onni aniqlash usullarini muhokama qilmoqda. Muallif kimningdir haqiqatni aytayotgan yoki yo'qligini aniqlashga urinayotganda noilojma inshootlari va xatti-harakatlariga e'tibor berish muhimligini ta'kidlaydi.

Shu nuqtai nazardan, sizning savolingizga javob: "Yolgun Fosh Qiluvchi Savollar Berish Kerak" deb tarjima qilinadi, bu "siz yolg'onni ochib beradigan savollarni berishingiz kerak" deb tarjima qilinadi.

📖 Manbalar:
Fayl: data/book_2.pdf
Sahifa: 63
Matn:
 qilish yoki aytilmay yashirishga harakat qilinayotgan 
xabami aniqlashda yordam beradi.
Mening fikrimcha, haqqoniy ma’lumot olishga uri- 
nishdan ko‘ra, tinchlantiruvchi harakatni kuzatish ter-
------
Fayl: data/book_2.pdf
Sahifa: 288
Matn:
 kuzatish kerak.
Yolg‘onni fosh qiluvchi savollar berish kerak. Bun