In [None]:
%pip install chromadb openai python-dotenv pydantic sentence-transformers

In [1]:
import asyncio
import json
from typing import Any, Dict, List, Literal, Optional

from chromadb import AsyncHttpClient
from dotenv import load_dotenv
from openai import AsyncOpenAI
from pydantic import BaseModel, Field
from sentence_transformers import SentenceTransformer

load_dotenv()

# Model and settings
QWEN_MODEL = "Qwen/Qwen2.5-14B-Instruct"
vllm_client = AsyncOpenAI(
    base_url="http://154.20.254.95:50856/v1", api_key="dummy_key"
)

# Initialize Sentence Transformer for embeddings
embedding_model = SentenceTransformer("sentence-transformers/LaBSE")

# Initialize Chroma
chroma_client = await AsyncHttpClient(host="95.181.175.113", port=8000)
collection = await chroma_client.get_or_create_collection("rzd_documents")

# Models for structured output
class DocumentReference(BaseModel):
    title: str
    section: str
    relevance: Literal['high', 'medium', 'low']

class ThinkStep(BaseModel):
    reasoning: str
    conclusion: str

class Checklist(BaseModel):
    direct_answer: bool = Field(..., description='Direct answer to the question provided')
    document_reference: bool = Field(..., description='Reference to the relevant document')
    additional_context: Optional[str] = Field(None, description='Additional context if necessary')

class Answer(BaseModel):
    question_type: Literal['policy', 'procedure', 'safety', 'general', 'greeting']
    relevant_documents: List[DocumentReference]
    thinking_steps: List[ThinkStep]
    final_answer: str
    confidence: float = Field(..., ge=0, le=1)
    checklist: Checklist

# Functions for working with the knowledge base
def create_embeddings(texts: List[str]):
    return embedding_model.encode(texts)

async def get_relevant_documents(query: str):
    query_embedding = create_embeddings([query])[0]
    results = await collection.query(
        query_embeddings=[query_embedding.tolist()], n_results=5
    )
    return results["documents"][0]

def parse_response(response_text: str) -> Answer:
    try:
        # Попытка найти JSON в ответе
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1
        if json_start != -1 and json_end != -1:
            json_str = response_text[json_start:json_end]
            structured_output = json.loads(json_str)
            textual_answer = response_text[json_end:].strip()
        else:
            # Если JSON не найден, считаем весь ответ текстовым
            structured_output = {}
            textual_answer = response_text.strip()

        # Проверка и коррекция question_type
        question_type = structured_output.get('question_type', 'general')
        if question_type not in ['policy', 'procedure', 'safety', 'general', 'greeting']:
            question_type = 'general'

        # Заполнение структурированного вывода с чеклистом
        checklist = Checklist(
            direct_answer=structured_output.get('checklist', {}).get('direct_answer', False),
            document_reference=structured_output.get('checklist', {}).get('document_reference', False),
            additional_context=structured_output.get('checklist', {}).get('additional_context')
        )

        answer = Answer(
            question_type=question_type,
            relevant_documents=[DocumentReference(**doc) for doc in structured_output.get('relevant_documents', [])],
            thinking_steps=[ThinkStep(**step) for step in structured_output.get('thinking_steps', [])],
            final_answer=textual_answer or structured_output.get('final_answer', 'Ответ не предоставлен.'),
            confidence=structured_output.get('confidence', 0.0),
            checklist=checklist
        )
        return answer
    except Exception as e:
        print(f'Ошибка при парсинге ответа: {str(e)}')
        return Answer(
            question_type='general',
            relevant_documents=[],
            thinking_steps=[],
            final_answer=response_text or 'Не удалось получить структурированный ответ.',
            confidence=0.0,
            checklist=Checklist(direct_answer=False, document_reference=False)
        )

async def print_real_time_answer(answer: str):
    for char in answer:
        print(char, end="", flush=True)
        await asyncio.sleep(0.025)  # 25 ms delay
    print()  # For line break at the end

# Main function for asking a question
async def ask_question(question: str):
    system_prompt = (
        "You are an assistant answering questions for new employees of Russian Railways (RZD). "
        "Use the provided knowledge base to answer. "
        "If you cannot answer the question, indicate this with a `null` response in the structured output. "
        "ALWAYS respond in Russian, regardless of the language of the question."
    )

    relevant_docs = await get_relevant_documents(question)

    cot_prompt = (
        "Let's approach this step-by-step:\n"
        "1) Determine the type of employee query\n"
        "2) Find relevant information in the knowledge base\n"
        "3) Formulate a structured response\n\n"
    )

    checklist_prompt = (
        "\nEnsure your answer includes:\n"
        "1) A direct answer to the question\n"
        "2) Reference to the relevant document\n"
        "3) Additional context if necessary\n"
    )

    prompt = (
        f"{cot_prompt}"
        f"QUESTION\n\n{question}\n\n"
        f"KNOWLEDGE BASE\n\n{relevant_docs}\n\n"
        f"{checklist_prompt}\n"
        "Provide your answer in the following format:\n"
        "1. A JSON object containing the structured information.\n"
        "2. A newline character.\n"
        "3. The textual answer in Russian.\n"
        "Example for a greeting:\n"
        '{"question_type": "greeting", "relevant_documents": [], "thinking_steps": [], "final_answer": "Здравствуйте!", "confidence": 1.0, "checklist": {"direct_answer": true, "document_reference": false}}\n'
        "Здравствуйте! Чем могу помочь?\n"
        "For other types of questions, use 'policy', 'procedure', 'safety', or 'general' as appropriate."
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    try:
        response = await vllm_client.chat.completions.create(
            model=QWEN_MODEL,
            messages=messages,
            temperature=0.7,
            max_tokens=1000,
        )

        parsed_response = parse_response(response.choices[0].message.content)

        return parsed_response
    except Exception as e:
        print(f"Произошла ошибка при обработке запроса: {str(e)}")
        return Answer(
            question_type='general',
            relevant_documents=[],
            thinking_steps=[],
            final_answer=f"Извините, произошла ошибка при обработке вашего запроса: {str(e)}",
            confidence=0.0,
            checklist=Checklist(direct_answer=False, document_reference=False)
        )

# Function for processing a list of questions
async def chat_loop():
    print(
        "Чат-бот РЖД готов к работе. Введите ваш вопрос или 'выход' для завершения."
    )
    while True:
        question = input("Ваш вопрос: ")
        if question.lower() == "выход":
            print("Выход из программы.")
            break
        try:
            response = await ask_question(question)

            # Print structured output (for programmatic use)
            print("\nСтруктурированный вывод:")
            print(json.dumps(response.dict(), ensure_ascii=False, indent=2))

            # Print text answer in real-time
            print("\nОтвет:")
            await print_real_time_answer(response.final_answer)
        except Exception as e:
            print(f"Произошла ошибка: {str(e)}")
            print("Пожалуйста, попробуйте задать вопрос еще раз.")

# Run the chat loop
await chat_loop()

  from tqdm.autonotebook import tqdm, trange


<coroutine object main at 0x7dcce07cbf10>