In [None]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("scraped_data.db")

df = pd.read_sql_query("SELECT * FROM reviews", conn)
df.head()

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DataFrameLoader


In [None]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
loader = DataFrameLoader(df, page_content_column="Preprocessed_Long_Text")
documents = loader.load()
docs = text_splitter.split_documents(documents)


In [None]:
# 3. Embeddings erstellen
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding_model)

In [None]:
# 4. LLM vorbereiten
llm = Ollama(model="llama3.1:8b")


In [None]:
# 5. RetrievalQA-Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
    return_source_documents=True
)


In [None]:
# 6. Frage an LLM
query = "What is good about elden ring?"
result = qa_chain(query)

print("Antwort:")
print(result["result"])
print("\nQuellen (Ausschnitte):")
for doc in result["source_documents"]:
    print(f" - {doc.page_content[:200]}...\n")
