In [None]:

# Установка библиотек
!pip install -U langchain langchain-google-genai google-generativeai chromadb sentence-transformers langchain-community beautifulsoup4 html2text

from langsmith import Client
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from dotenv import load_dotenv
import logging
import google.generativeai as genai
from langchain.memory import ConversationBufferMemory         #memory
from langchain.chains import ConversationalRetrievalChain

# Загрузка и разбиение документов
urls = [
    "https://www.euronews.com/news/europe",
    "https://www.dw.com/en/top-stories/s-9097",
    "https://www.reuters.com/news/archive/worldNews",
    "https://apnews.com/hub/europe",
    "https://www.bbc.com/news/world/europe",
    "https://en.wikipedia.org/wiki/History_of_Europe",
    "https://european-union.europa.eu/news-and-events_en"
]
loader = WebBaseLoader(urls)                     #ручной ask_rag() на LangChain-цепочку ConversationalRetrievalChain, в которую встроим память и retriever.
docs = loader.load()

memory = ConversationBufferMemory(
    memory_key="chat_history", return_messages=True
)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True,  # можно отключить, если не нужно
    verbose=True  # помогает при отладке
)


docs.append(
    Document(
        page_content="Sturm Eowyn ereignete sich in Österreich im Dezember 2024.",
        metadata={"source": "manually"}))

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = splitter.split_documents(docs)

# Настройка API ключей и логирования
load_dotenv(".env")
logging.basicConfig(level=logging.INFO)

class Config:
    @staticmethod
    def setup():
        langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
        if langchain_api_key:
            os.environ["LANGCHAIN_API_KEY"] = langchain_api_key
        langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
        if langsmith_endpoint:
            os.environ["LANGCHAIN_ENDPOINT"] = langsmith_endpoint
        langsmith_project = os.getenv("LANGSMITH_PROJECT")
        if langsmith_project:
            os.environ["LANGCHAIN_PROJECT"] = langsmith_project

Config.setup()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Векторизация и сохранение
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(documents=split_docs, embedding=embedding, persist_directory="chroma_storage")
vectordb.persist()
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

# Создание LLM и цепочки с памятью
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.4)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, return_source_documents=True, verbose=True)

# Функция для диалога
def ask_rag_chat(question):
    result = qa_chain({"question": question})
    answer = result["answer"]
 #   print(f"\nQ: {question}\nA: {answer}")
    print(f"Q: {question}\nA: {answer}")
    return answer

# Пример диалога
ask_rag_chat("Was war Sturm Eowyn?")
ask_rag_chat("Wo genau in Österreich?")
ask_rag_chat("Gab es Verletzte?")

# Сохраняем файл
file_path = Path("/mnt/data/rag_project_with_memory.py")
file_path.write_text(code_with_memory)

file_path.name  # Название сгенерированного файла для пользователя
