In [None]:
pip install fastapi uvicorn langchain langchain-ollama langchain-community ollama python-multipart faiss-gpu faiss-cpu

Collecting fastapi
  Using cached fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting uvicorn
  Using cached uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting langchain
  Using cached langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-ollama
  Using cached langchain_ollama-0.3.6-py3-none-any.whl.metadata (2.1 kB)
Collecting langchain-community
  Using cached langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting ollama
  Downloading ollama-0.5.3-py3-none-any.whl.metadata (4.3 kB)
Collecting starlette<0.48.0,>=0.40.0 (from fastapi)
  Using cached starlette-0.47.2-py3-none-any.whl.metadata (6.2 kB)
Collecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 (from fastapi)
  Using cached pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting annotated-types>=0.6.0 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi)
  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Co

In [1]:
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi import Request
from pydantic import BaseModel

from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.documents import Document

from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_ollama import OllamaEmbeddings, OllamaLLM

from dotenv import load_dotenv
import os
import shutil

In [2]:
# Load env vars (if any)
load_dotenv()

False

In [3]:
# === Konfigurasi direktori upload
UPLOAD_DIR = "../bahan-chatbot/txt"
os.makedirs(UPLOAD_DIR, exist_ok=True)

In [4]:
app = FastAPI()

# === Aktifkan CORS agar bisa dipanggil dari frontend manapun
# Middleware CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Ubah sesuai kebutuhan
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

In [5]:
# === Variabel global
vectordb = None
qa_chain = None
retriever = None
loaded_files = []

In [6]:
# === Fungsi untuk memuat dan vektorisasi dokumen TXT
def load_documents():
    global vectordb, qa_chain, loaded_files, retriever

    folder_path = UPLOAD_DIR
    documents = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            filepath = os.path.join(folder_path, filename)
            loader = TextLoader(filepath, encoding="utf-8")
            file_docs = loader.load()
            documents.extend(file_docs)
            if filename not in loaded_files:
                loaded_files.append(filename)

    print(f"✅ Jumlah chunks: {len(documents)}")

    if not documents:
        print("⚠️ Tidak ada dokumen yang dimuat.")
        return

    # Embedding & Vectorstore
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    vectordb = FAISS.from_documents(documents, embeddings)

    # Tambah retriever
    retriever = vectordb.as_retriever()

    # Language Model & QA Chain
    llm = OllamaLLM(model="llama3")
    qa_chain = load_qa_chain(llm, chain_type="stuff")


In [7]:
# === Endpoint upload file
@app.post("/upload")
async def upload_file(file: UploadFile = File(...)):
    try:
        filename = file.filename
        file_ext = filename.split(".")[-1].lower()

        if file_ext not in ["txt", "pdf"]:
            return JSONResponse(content={"error": "Hanya file .txt atau .pdf yang diperbolehkan"}, status_code=400)

        save_path = os.path.join(UPLOAD_DIR, filename)
        with open(save_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)

        if file_ext == "txt":
            load_documents()

        return {"message": f"{filename} berhasil diupload"}
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)

In [8]:
# Data model input chat
class ChatRequest(BaseModel):
    question: str

In [9]:
    
# Endpoint POST /chat
@app.post("/chat")
async def chat(req: ChatRequest):
    question = req.question
    print("Pertanyaan masuk:", question)
    
    docs = retriever.get_relevant_documents(question)
    answer = qa_chain.run(input_documents=docs, question=question)

    return JSONResponse(content={"answer": answer})

In [10]:
load_documents()

✅ Jumlah chunks: 14


stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  qa_chain = load_qa_chain(llm, chain_type="stuff")


In [11]:
question = "apa itu dtsen?"
print("Pertanyaan masuk:", question)

Pertanyaan masuk: apa itu dtsen?


In [12]:
docs = retriever.get_relevant_documents(question)

  docs = retriever.get_relevant_documents(question)


In [13]:
answer = qa_chain.run(input_documents=docs, question=question)
answer

  answer = qa_chain.run(input_documents=docs, question=question)


'Based on the provided context, DTSN (Data Terpadu Sistem Nasional) is a data system that contains information about individuals, including their demographic characteristics, education level, employment status, and other relevant details. It appears to be a comprehensive database that aims to provide insights into the socio-economic conditions of Indonesia.\n\nIn more detail, DTSEN seems to cover various aspects such as:\n\n1. Demographics: age, sex, marital status, and other individual characteristics.\n2. Education: level of education completed (e.g., primary school, secondary school, etc.), and the highest level of education attained.\n3. Employment: employment status (employed/unemployed), occupation, and industry sector.\n4. Housing: type of housing (owned/rented), size of living space, and other related details.\n\nThe DTSEN database is likely used for various purposes, such as:\n\n1. Research and analysis to inform policy decisions\n2. Monitoring and evaluation of socio-economic