In [13]:
import os
from dotenv import load_dotenv
from pathlib import Path

dotenv_path = Path('..') / '.env'
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True) 

# Теперь вы можете использовать переменные
DATABASE_URL = os.getenv("URL")
API_KEY = os.getenv("DEEPSEEK_API_KEY")

print(f"Database URL loaded: {DATABASE_URL}")
print(f"Secret Key loaded: {API_KEY}")

Database URL loaded: http://91.219.226.145:11666/v1
Secret Key loaded: sk-7c8d537e261348f2a64876ca6350ace5


In [14]:
import time
import docker
from psycopg import connect
from psycopg.rows import dict_row
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm

PG_IMG   = "pgvector/pgvector:pg16"
PG_NAME  = "pgv-demo"
PG_PORT  = 5431            # порт на хосте
PG_USER  = "admin"
PG_PASS  = "secret"
PG_DB    = "testdb"
EMB_DIM = 384

client = docker.from_env()

# если контейнер уже есть - удалим
try:
    client.containers.get(PG_NAME).remove(force=True)
except docker.errors.NotFound:
    pass

container = client.containers.run(
    PG_IMG,
    name=PG_NAME,
    environment={
        "POSTGRES_USER": PG_USER,
        "POSTGRES_PASSWORD": PG_PASS,
        "POSTGRES_DB": PG_DB
    },
    ports={"5432/tcp": PG_PORT},
    detach=True,
    remove=False
)

container.exec_run(
    f'psql -h localhost -U {PG_USER} -d {PG_DB} '
    '-c "CREATE EXTENSION IF NOT EXISTS vector;"',
    user="postgres"
)
print("Расширения установлены")

DDL2 = f"""
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
  id   BIGSERIAL PRIMARY KEY,
  text TEXT NOT NULL,
  emb  VECTOR({EMB_DIM}) NOT NULL
);
"""
with connect(host="localhost", port=PG_PORT, user=PG_USER,
             password=PG_PASS, dbname=PG_DB) as con:
    with con.cursor() as cur:
        cur.execute(DDL2)
        con.commit()

Расширения установлены


OperationalError: connection failed: connection to server at "127.0.0.1", port 5431 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

In [9]:
DDL = f"""
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
  id   BIGSERIAL PRIMARY KEY,
  text TEXT NOT NULL,
  emb  VECTOR({EMB_DIM}) NOT NULL
);
"""
with connect(host="localhost", port=PG_PORT, user=PG_USER,
             password=PG_PASS, dbname=PG_DB) as con:
    with con.cursor() as cur:
        cur.execute(DDL)
        con.commit()

UndefinedObject: type "vector" does not exist
LINE 6:   emb  VECTOR(384) NOT NULL
               ^

In [None]:
import psycopg

conn_info = dict(user=PG_USER, password=PG_PASS, dbname=PG_DB, host="localhost", port=PG_PORT)
for i in range(90):
    try:
        with psycopg.connect(**conn_info) as conn:
            with conn.cursor() as cur:
                cur.execute("SELECT 1;")
                cur.fetchone()
        break
    except Exception:
        time.sleep(1)
else:
    raise RuntimeError("Postgres did not become ready in time.")

print("Postgres is ready ✅")

Postgres is ready ✅


In [None]:
import nltk
import re

nltk.download('movie_reviews')
from nltk.corpus import movie_reviews

fileids = movie_reviews.fileids()

def tidy(s: str) -> str:
    s = " ".join(s.split())                          # схлопнуть все виды пробелов/переводы строк
    s = re.sub(r"\s+([.,;:!?…])", r"\1", s)          # убрать пробелы ПЕРЕД . , ; : ! ? …
    s = re.sub(r"([(\[{«“])\s+", r"\1", s)           # убрать пробел ПОСЛЕ ( [ { « “
    s = re.sub(r"\s+([)\]}»”])", r"\1", s)           # убрать пробел ПЕРЕД ) ] } » ”
    return s.strip()[:500]

texts = [tidy(movie_reviews.raw(fid)) for fid in fileids]

texts = texts[:100]
print("Документов:", len(texts), "\nПример:", texts[2][:50].replace("\n"," ") + "…")

[nltk_data] Downloading package movie_reviews to
[nltk_data]     /home/shcher/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


Документов: 100 
Пример: it is movies like these that make a jaded movie vi…


In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2") 
EMB_DIM = 384

DDL = f"""
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
  id   BIGSERIAL PRIMARY KEY,
  text TEXT NOT NULL,
  emb  vector({EMB_DIM}) NOT NULL
);
"""
with connect(host="localhost", port=PG_PORT, user=PG_USER,
             password=PG_PASS, dbname=PG_DB) as con:
    with con.cursor() as cur:
        cur.execute(DDL)
        con.commit()

def encode_texts(batch):
    X = model.encode(batch, batch_size=64, show_progress_bar=False,
                     normalize_embeddings=True)
    return X.astype(np.float32)

embs = []
B = 10
for i in tqdm(range(0, len(texts), B)):
    embs.append(encode_texts(texts[i:i+B]))
embs = np.vstack(embs)
embs.shape

rows = [(texts[i], embs[i].tolist()) for i in range(len(texts))]
with connect(host="localhost", port=PG_PORT, user=PG_USER,
             password=PG_PASS, dbname=PG_DB) as con:
    with con.cursor() as cur:
        cur.executemany(
            "INSERT INTO docs (text, emb) VALUES (%s, %s)",
            rows
        )
        con.commit()

UndefinedObject: type "vector" does not exist
LINE 6:   emb  vector(384) NOT NULL
               ^

In [None]:
import os
from dotenv import load_dotenv

current_dir = os.getcwd()
parent_dir = os.path.join(current_dir, '..') 
dotenv_path = os.path.join(parent_dir, '.env')
load_dotenv(dotenv_path)

In [None]:
from langchain_mcp_adapters.client import MultiServerMCPClient
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()

MODEL_PROVIDER = os.getenv("MODEL_PROVIDER", "open")  # "open" | "deepseek"
COMMON_KW = dict(max_completion_tokens=6000, temperature=0.7, seed=42, top_p=0.9)

if MODEL_PROVIDER == "open":
    llm = ChatOpenAI(
        model=os.getenv("OPEN_MODEL_NAME", "cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit"),
        base_url=os.getenv("OPEN_BASE_URL", DATABASE_URL),
        api_key=os.getenv("API_KEY", "EMPTY"),
        **COMMON_KW
    )
    print("success")
elif MODEL_PROVIDER == "deepseek":
    llm = ChatOpenAI(
        model=os.getenv("DEEPSEEK_MODEL", "deepseek-chat"),
        base_url=os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"),
        api_key=os.getenv("DEEPSEEK_API_KEY", "sk-7c8d537e261348f2a64876ca6350ace5"),
        **COMMON_KW
    )
else:
    raise ValueError("MODEL_PROVIDER должен быть 'open' или 'deepseek'")

client = MultiServerMCPClient({
    "file_reader": {
        "transport": "streamable_http",
        "url": "http://127.0.0.1:8011/mcp"
    },
    "texts_db": {
        "transport": "streamable_http",
        "url": "http://127.0.0.1:8000/mcp"        
    }
})


tools = await client.get_tools()
print("TOOL LIST:", [t.name for t in tools])

success


  + Exception Group Traceback (most recent call last):
  |   File "/home/shcher/venvs/ml/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3665, in run_code
  |     await eval(code_obj, self.user_global_ns, self.user_ns)
  |   File "/tmp/ipykernel_4713/3696479541.py", line 41, in <module>
  |     tools = await client.get_tools()
  |             ^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/home/shcher/venvs/ml/lib/python3.12/site-packages/langchain_mcp_adapters/client.py", line 157, in get_tools
  |     tools_list = await asyncio.gather(*load_mcp_tool_tasks)
  |                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/home/shcher/venvs/ml/lib/python3.12/site-packages/langchain_mcp_adapters/tools.py", line 189, in load_mcp_tools
  |     async with create_session(connection) as tool_session:
  |   File "/usr/lib/python3.12/contextlib.py", line 231, in __aexit__
  |     await self.gen.athrow(value)
  |   File "/home/shcher/venvs/ml/lib/python3.12/site-packages/l

In [None]:
from langgraph.prebuilt import create_react_agent


agent = create_react_agent(
    llm,
    tools,
    debug=True
)

# Проверка read_file_mcp на своем csv

In [None]:
sql_response = await agent.ainvoke(
    {"messages": [{"role": "user", "content": "Найди в папке lecture_2/sql_results файл works.csv и кратко опиши его содержимое"}]}
)
print(sql_response["messages"].pop().content)

[1m[values][0m {'messages': [HumanMessage(content='Найди в папке lecture_2/sql_results файл works.csv и кратко опиши его содержимое', additional_kwargs={}, response_metadata={}, id='a7f546ef-1cdd-4d5d-ab7c-ea0cc3f97042')]}
[1m[updates][0m {'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-a5cd58f1dfb040539bd2a12d0699d62c', 'function': {'arguments': '{"path": "lecture_2/sql_results", "pattern": "works.csv"}', 'name': 'ls'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 619, 'total_tokens': 649, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit', 'system_fingerprint': None, 'id': 'chatcmpl-76718c004fa142b090e55ce1c4f260dc', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--1bb1bc20-677a-47ce-901f-cee107c534ca-0', tool_calls=[{'name': 'ls', 'args': {'path': '

In [None]:
from langchain_core.messages import HumanMessage
conversation_messages = sql_response["messages"].copy()
conversation_messages.append(HumanMessage(content="Какая статья является самой цитирумой? Выведи название"))

sql_response_2 = await agent.ainvoke({"messages": conversation_messages})
print(sql_response_2["messages"].pop().content)

[1m[values][0m {'messages': [HumanMessage(content='Найди в папке lecture_2/sql_results файл works.csv и кратко опиши его содержимое', additional_kwargs={}, response_metadata={}, id='a7f546ef-1cdd-4d5d-ab7c-ea0cc3f97042'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-a5cd58f1dfb040539bd2a12d0699d62c', 'function': {'arguments': '{"path": "lecture_2/sql_results", "pattern": "works.csv"}', 'name': 'ls'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 619, 'total_tokens': 649, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit', 'system_fingerprint': None, 'id': 'chatcmpl-76718c004fa142b090e55ce1c4f260dc', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--1bb1bc20-677a-47ce-901f-cee107c534ca-0', tool_calls=[{'name': 'ls', 'args': {'path': 'lecture_2/sql_results', 'pattern': 'works.c

In [None]:
conversation_messages.append(HumanMessage(content="Какое максимальное число соавторов одной статьи имеется в таблице? выведи их имена"))

sql_response_2 = await agent.ainvoke({"messages": conversation_messages})
print(sql_response_2["messages"].pop().content)

[1m[values][0m {'messages': [HumanMessage(content='Найди в папке lecture_2/sql_results файл works.csv и кратко опиши его содержимое', additional_kwargs={}, response_metadata={}, id='a7f546ef-1cdd-4d5d-ab7c-ea0cc3f97042'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-a5cd58f1dfb040539bd2a12d0699d62c', 'function': {'arguments': '{"path": "lecture_2/sql_results", "pattern": "works.csv"}', 'name': 'ls'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 619, 'total_tokens': 649, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit', 'system_fingerprint': None, 'id': 'chatcmpl-76718c004fa142b090e55ce1c4f260dc', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--1bb1bc20-677a-47ce-901f-cee107c534ca-0', tool_calls=[{'name': 'ls', 'args': {'path': 'lecture_2/sql_results', 'pattern': 'works.c

# Проверка vec_mcp на загруженной в postgres базе данных

In [None]:
sql_response = await agent.ainvoke(
    {"messages": [{"role": "user", "content": "Найди обзоры фильмов, касающихся romance, перечисли id и предложение о романтике трех из них"}]}
)
print(sql_response["messages"].pop().content)

[1m[values][0m {'messages': [HumanMessage(content='Найди обзоры фильмов, касающихся romance, перечисли id и предложение о романтике трех из них', additional_kwargs={}, response_metadata={}, id='e62a2a43-5bc6-40ba-a135-2ab0c26a8bfe')]}
[1m[updates][0m {'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-b41e54f7c3f647178bb48b0229095334', 'function': {'arguments': '{"query": "romance", "k": 3}', 'name': 'search_vector'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 877, 'total_tokens': 904, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit', 'system_fingerprint': None, 'id': 'chatcmpl-e08c7bcc3360469288d1cfe52f4a2d0b', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--80c46185-5555-48d4-9494-1e8601692d15-0', tool_calls=[{'name': 'search_vector', 'args': {'que

In [None]:
sql_response = await agent.ainvoke(
    {"messages": [{"role": "user", "content": "Найди обзоры фильмов, касающихся романтики, перечисли id и предложение о романтике трех из них"}]}
)
print(sql_response["messages"].pop().content)

#Русский "романтики" сильно хуже "romance"

[1m[values][0m {'messages': [HumanMessage(content='Найди обзоры фильмов, касающихся романтики, перечисли id и предложение о романтике трех из них', additional_kwargs={}, response_metadata={}, id='5a3041ca-f9b3-432f-99ce-782b118b8679')]}
[1m[updates][0m {'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-dea2bfba1f284b6ebd031d1ada615fe7', 'function': {'arguments': '{"query": "романтика в фильмах", "k": 3}', 'name': 'search_vector'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 878, 'total_tokens': 910, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit', 'system_fingerprint': None, 'id': 'chatcmpl-b6fe868b098c42ff8a34c8e4bcc940a0', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--2d5f7ef5-8645-487f-85dd-4f1db7fc4df1-0', tool_calls=[{'name': 'search_vector',