In [2]:
import pandas as pd
import os
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
from langchain.schema import Document

# Load Dataset

In [3]:
file_path = "apple.csv"
df = pd.read_csv(file_path)
df

Unnamed: 0,Question,Answer
0,How do I take a screenshot on an iPhone?,"To take a screenshot on an iPhone, press and h..."
1,How do I change my wallpaper on an iPhone?,"To change your wallpaper on an iPhone, go to S..."
2,How do I make a phone call on an iPhone?,"To make a phone call on an iPhone, open the Ph..."
3,How do I send a text message on an iPhone?,"To send a text message on an iPhone, open the ..."
4,How do I use Siri on an iPhone?,"To use Siri on an iPhone, press and hold the H..."
...,...,...
1000,How to clear cache on Safari on my Mac for imp...,Go to Safari > Preferences > Privacy > Manage ...
1001,What to do if my AirPods Pro have low volume?,"Clean the earbuds, ensure they are correctly s..."
1002,Can I use a wireless trackpad with my Mac for ...,"Yes, you can use a wireless Bluetooth trackpad..."
1003,How to transfer files from my Mac to my iPhone...,"Use apps like Files, iCloud Drive, or third-pa..."


# Cleaning dataset

In [4]:
# Cek jumlah data yang duplikat
df.duplicated().sum()

np.int64(108)

In [5]:
df[df.duplicated].head()

Unnamed: 0,Question,Answer
80,How do I use the built-in Clock app to set mul...,"To set multiple alarms, open the Clock app and..."
82,How do I utilize the Maps app to explore publi...,"To explore public transportation options, open..."
83,How do I take advantage of the Safari web brow...,"To save a web page for offline reading, open t..."
84,How do I personalize my iPhone with custom sou...,"To assign custom sounds to contacts, open the ..."
85,How do I adjust my iPhone's accessibility sett...,"To adjust accessibility settings, go to Settin..."


In [6]:
# hapus data yang duplikat
df = df.drop_duplicates()

In [7]:
# Cek jumlah data yang duplikat
df.duplicated().sum()

np.int64(0)

In [8]:
# cek missing values
df.isna().sum()

Question    0
Answer      1
dtype: int64

In [9]:
# lihat baris yang kosong
df_missing = df[df.isna().any(axis=1)]
df_missing

Unnamed: 0,Question,Answer
270,"Sure, here is a dataset around how to do thing...",


In [10]:
# hapus missing values
df = df.dropna()

In [11]:
# cek missing values
df.isna().sum()

Question    0
Answer      0
dtype: int64

In [12]:
# cleaning data
def clean_text(text):
    if isinstance(text, str): # cek tipe data apakah string
        # hilangkan tab/enter jadi spasi
        text = re.sub(r"[\n\t\r\xa0]", " ", text)

        # hilangkan simbol aneh
        text = re.sub(r"[^a-zA-Z0-9\s.,!?;:'\"()\-]", "", text)

        # hilangkan spasi ganda jadi spasi
        text = re.sub(r"\s+", " ", text)
    
        # hapus spasi depan belakang
        return text.strip()
    
    return text # kalau bukan string kembalikan

df = df.applymap(clean_text)
df.head()

  df = df.applymap(clean_text)


Unnamed: 0,Question,Answer
0,How do I take a screenshot on an iPhone?,"To take a screenshot on an iPhone, press and h..."
1,How do I change my wallpaper on an iPhone?,"To change your wallpaper on an iPhone, go to S..."
2,How do I make a phone call on an iPhone?,"To make a phone call on an iPhone, open the Ph..."
3,How do I send a text message on an iPhone?,"To send a text message on an iPhone, open the ..."
4,How do I use Siri on an iPhone?,"To use Siri on an iPhone, press and hold the H..."


In [13]:
# buat csv menjadi dokumen
documents = []

for i, row in df.iterrows():
    q = row["Question"]
    a = row["Answer"]

    page_content = f"Question: {q}\nA: {a}"
    metadata = {
        "source": os.path.basename(file_path),
        "row" : i
    }

    documents.append(Document(page_content=page_content, metadata=metadata))

In [14]:
documents[0]

Document(metadata={'source': 'apple.csv', 'row': 0}, page_content='Question: How do I take a screenshot on an iPhone?\nA: To take a screenshot on an iPhone, press and hold the Side button and the Volume Up button simultaneously. The screen will flash briefly, and a thumbnail of the screenshot will appear in the bottom-left corner of the screen. Tap the thumbnail to view or edit the screenshot.')

# Chunking

In [15]:
# memecah teks yang besar menjadi beberapa bagian kecil
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=120
)
chunk = text_splitter.split_documents(documents)

In [16]:
print(f"Total Chunk : {len(chunk)}")
chunk[0]

Total Chunk : 898


Document(metadata={'source': 'apple.csv', 'row': 0}, page_content='Question: How do I take a screenshot on an iPhone?\nA: To take a screenshot on an iPhone, press and hold the Side button and the Volume Up button simultaneously. The screen will flash briefly, and a thumbnail of the screenshot will appear in the bottom-left corner of the screen. Tap the thumbnail to view or edit the screenshot.')

# Embedding dan Vector Store

In [17]:
# model embedding
embedding_model = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")

  embedding_model = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
  from .autonotebook import tqdm as notebook_tqdm


In [18]:
# buat vektor store untuk simpan hasil perubahan dari teks ke vector angka
vector_db = Chroma.from_documents(
    documents = chunk,
    persist_directory = "chroma_db",
    embedding=embedding_model
)

# Retriever

In [19]:
# retriever untuk mencari dari db hasil yang paling mirip untuk diambil
retriever = vector_db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

In [20]:
# model LLM
llm = Ollama(model="llama3")

  llm = Ollama(model="llama3")


In [21]:
# sebagai mesin tanya jawab
qa_chain = RetrievalQA.from_chain_type(
    llm = llm, # model yang digunakan untuk menjawab pertanyaan
    retriever = retriever # mencari jawaban yang paling mirip dengan db
)

In [23]:
# test tanya jawab
question = "How to change wallpaper on iphone ?"
result = qa_chain({"query":question})
answer = result["result"]
print(f"Question : {question}\nAnswer : {answer}")

Question : How to change wallpaper on iphone ?
Answer : To change your wallpaper on an iPhone, go to Settings > Wallpaper. Select whether you want to change the wallpaper for your Lock Screen, Home Screen, or both. Choose from a variety of built-in wallpapers or use your own photos.
