In [None]:
# Notebook: RAG_for_Tabular_Data.ipynb

# Import libraries
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import openai
from sklearn.metrics.pairwise import cosine_similarity

# 1. Load and transform CSV to text format
data = pd.read_csv('products.csv')
data['text'] = data.apply(lambda row: f"Produk {row['nama_produk']} dari kategori {row['kategori']} memiliki harga {row['harga']} dan stok {row['stok']} unit.", axis=1)

# Save transformed text data for future indexing
data[['id', 'text']].to_csv('transformed_products.csv', index=False)

# 2. Create sentence embeddings using SentenceTransformers
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
embeddings = model.encode(data['text'].tolist())

# 3. Index the embeddings with FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean)
index.add(np.array(embeddings).astype(np.float32))

# 4. Define a function for search query and retrieval
def retrieve(query, k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding).astype(np.float32), k)
    return data.iloc[indices[0]]

# Test retrieval with a sample query
query = "Tampilkan produk elektronik dengan stok rendah"
retrieved = retrieve(query)
print("Hasil Pencarian:")
print(retrieved[['id', 'nama_produk', 'kategori', 'harga', 'stok']])

# 5. Generate answers using OpenAI API (Example)
openai.api_key = 'YOUR_API_KEY'  # Ganti dengan API key OpenAI kamu

def generate_answer(query, context):
    prompt = f"Pertanyaan: {query}\n\nKonsep Produk: {context}\nJawaban:"
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        max_tokens=100
    )
    return response.choices[0].text.strip()

# Example usage with retrieved data
context = " ".join(retrieved['text'].tolist())
answer = generate_answer(query, context)
print("Jawaban dari LLM:")
print(answer)
