# Generate faiss_db(optional)

In [1]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
import faiss
import os
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_community.document_loaders import PyPDFDirectoryLoader
from concurrent.futures import ThreadPoolExecutor, as_completed
import re
import hashlib
from tqdm.notebook import tqdm
import json
import unicodedata
from bs4 import UnicodeDammit
import numpy as np
from langchain.docstore.in_memory import InMemoryDocstore


# Just disables the warning, doesn't take advantage of AVX/FMA to run faster
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
api_key = 'Zk8nvk6miFtVCWUzUnQtqc6CFbEPw8Cl'
# Define the embedding model
embeddings = MistralAIEmbeddings(model="mistral-embed", mistral_api_key=api_key)





# Drug names for search and text qualification
DRUG_PATTERNS = {
    "Анаферон детский": r"(Анаферон(а|у|ом|е)?\s*детск(ий|ого|ому|им|ом)?|Anaferon\s*for\s*children)",
    "Тенотен детский": r"(Тенотен(а|у|ом|е)?\s*детск(ий|ого|ому|им|ом)?|Tenoten\s*for\s*children)",
    "Анаферон": r"\b(Анаферон(а|у|ом|е)?(?!\s*детск(ий|ого|ому|им|ом))|Anaferon(?!\s*for\s*Children))\b",  # Negative lookahead to avoid tagging Анаферон if followed by детский
    "Артрофоон": r"\b(Артрофоон(а|у|ом|е)?|Arthrofoon)\b",
    "Афала": r"\b(Афала(ы|е|у|ой)?|Afala)\b",
    "Импаза": r"\b(Импаза(ы|е|у|ой)?|Impaza)\b",
    "Проспекта": r"\b(Проспекта(ы|е|у|ой)?|Prospecta)\b",
    "Тенотен": r"\b(Тенотен(а|у|ом|е)?(?!\s*детск(ий|ого|ому|им|ом))|Tenoten(?!\s*for\s*Children))\b",  # Same for Тенотен детский
    "Афалаза": r"\b(Афалаза(ы|е|у|ой)?|Afalaza)\b",
    "Диваза": r"\b(Диваза(ы|е|у|ой)?|Divaza)\b",
    "Субетта": r"\b(Субетта(ы|е|у|ой)?|Subetta)\b",
    "Колофорт": r"\b(Колофорт(а|у|ом|е)?|Colofort)\b",
    "Эргоферон": r"\b(Эргоферон(а|у|ом|е)?|Ergoferon)\b",
    "Рафамин": r"\b(Рафамин(а|у|ом|е)?|Rafamin)\b",
    "Ренгалин": r"\b(Ренгалин(а|у|ом|е)?|Rengalin)\b",
    "Пропротен 100": r"\b(Пропротен(а|у|ом|е)?|Proproten)\b"
}


def calculate_hash(content):
    return hashlib.sha256(content.encode('utf-8')).hexdigest()


def calculate_file_hash(file_path):
    """Calculate the SHA-256 hash of a file using a chunked read."""
    hasher = hashlib.sha256()
    with open(file_path, 'rb') as file:
        while chunk := file.read(8192):  # Read in 8KB chunks
            hasher.update(chunk)
    return hasher.hexdigest()


def detect_drug_name(query):
    detected_drugs = []

    # First, check for specific drug names to prioritize them
    for drug, pattern in DRUG_PATTERNS.items(): 
        # Search for the pattern in the query
        if re.search(pattern, query, re.IGNORECASE):  # Use re.IGNORECASE for case insensitive search
            detected_drugs.append(drug)
    
    return detected_drugs if detected_drugs else []


# Function to assign metadata to documents based on drug names
def assign_drug_metadata(content, existing_metadata=None):

    # Preserve existing metadata
    metadata = existing_metadata.copy() if existing_metadata else {}
    
    # Initialize drug_names field, preserving other metadata
    metadata['drug_names'] = []  # Initialize an empty list for drug names
    
    # Detect drug names in the document content
    detected_drugs = detect_drug_name(content)
    
    for drug_name in detected_drugs:
        metadata['drug_names'].append(drug_name)

    # For debugging
    # print(f"Document tags {metadata['drug_names']}")
    
    return metadata



# I HAVE DEFINITELY NOT UNDERSTOOD EVERYTHING IN THIS CODE. IT SEEMS TO WORK OK. AND IT SEEMS TO BE WRITTEN OK. BUT I HAVE NO IDEA IF THE LOGIC IS ACTUALLY OK.
# Function to create or update JSON for a document
def load_embeddings_from_json_concurrently(json_dir, vector_store_name, embeddings):
    index_file_path = os.path.join(vector_store_name, 'index.faiss')
    vector_store = None

    # Load tracker to see which documents have already been embedded and their hashes
    tracker_data = load_tracker(json_dir)
    faiss_hash = tracker_data.get('faiss_hash', None)  # Retrieve FAISS hash from tracker

    # Check if FAISS file exists and its hash matches
    if os.path.exists(index_file_path) and faiss_hash == calculate_file_hash(index_file_path):
        vector_store = FAISS.load_local(vector_store_name, embeddings, allow_dangerous_deserialization=True)
        print(f"Loaded existing vector store with {len(vector_store.index_to_docstore_id)} documents.")
    else:
        print("Creating a new vector store or FAISS index has changed.")
        dummy_embedding = embeddings.embed_documents(["dummy"])[0]
        dimension = np.array(dummy_embedding).shape[0]
        index = faiss.IndexFlatL2(dimension)
        docstore = InMemoryDocstore({})
        index_to_docstore_id = {}
        vector_store = FAISS(
            embedding_function=embeddings,
            index=index,
            docstore=docstore,
            index_to_docstore_id=index_to_docstore_id
        )

    # Track existing documents to prevent re-adding them
    existing_doc_ids = set(vector_store.index_to_docstore_id.keys())
    print(f"Existing document IDs: {existing_doc_ids}")

    json_files = [os.path.join(json_dir, f) for f in os.listdir(json_dir) if f.endswith('.json') and f != "tracker.json"]
    documents_to_update = []

    for json_path in json_files:
        with open(json_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            doc_id = f"{data['filename']}_chunk_{data['chunk_index']}"
            doc_hash = data['hash']

            # Check if the document ID is already in tracker and if the hash matches
            if doc_id in tracker_data and tracker_data[doc_id] == doc_hash:
                #print(f"Skipping unchanged document: {doc_id}")
                continue
            else:
                documents_to_update.append((doc_id, json_path))

    # If any documents need updating, recreate the FAISS index
    if documents_to_update:
        print(f"Updating {len(documents_to_update)} documents.")
        dimension = np.array(embeddings.embed_documents(["dummy"])[0]).shape[0]
        index = faiss.IndexFlatL2(dimension)
        docstore = InMemoryDocstore({})
        index_to_docstore_id = {}
        vector_store = FAISS(
            embedding_function=embeddings,
            index=index,
            docstore=docstore,
            index_to_docstore_id=index_to_docstore_id
        )

        with tqdm(total=len(documents_to_update), desc="Updating Embeddings") as pbar, ThreadPoolExecutor(max_workers=8) as executor:
            futures = {executor.submit(load_file, json_path): json_path for _, json_path in documents_to_update}

            for future in as_completed(futures):
                try:
                    content, metadata = future.result()
                    doc_id = f"{metadata['source']}_chunk_{metadata['chunk_index']}"
                    vector_store.add_texts([content], metadatas=[metadata])
                    print(f"Added updated document: {doc_id}")

                    # Update tracker data with the new hash
                    tracker_data[doc_id] = metadata['hash']
                except Exception as e:
                    print(f"Error loading file {futures[future]}: {e}")
                pbar.update(1)

    # Save updated tracker with FAISS file hash
    tracker_data['faiss_hash'] = calculate_file_hash(index_file_path) if os.path.exists(index_file_path) else None
    save_tracker(json_dir, tracker_data)

    # Save updated FAISS vector store
    vector_store.save_local(vector_store_name)
    print("Vector store updated and saved.")


def load_file(json_path):
    """Load content and metadata from a JSON file."""
    with open(json_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
        content = data["chunk_content"]
        metadata = {"source": data["filename"], "chunk_index": data["chunk_index"], "hash": data["hash"]}
        return content, metadata

def load_tracker(json_dir):
    """Load the tracker file to get a dictionary of already embedded document IDs and their hashes."""
    tracker_path = os.path.join(json_dir, "tracker.json")
    if os.path.exists(tracker_path):
        with open(tracker_path, 'r', encoding='utf-8') as file:
            return json.load(file)
    return {}

def save_tracker(json_dir, tracker_data):
    """Save the updated dictionary of embedded document IDs and their hashes to the tracker file."""
    tracker_path = os.path.join(json_dir, "tracker.json")
    with open(tracker_path, 'w', encoding='utf-8') as file:
        json.dump(tracker_data, file, ensure_ascii=False, indent=4)


def create_or_update_json(file_path, json_dir, embeddings, chunk_size=7000, chunk_overlap=512, hyperlink=None):

    # Read the file content
    try:
        with open(file_path, 'rb') as file:
            content = file.read()
            encoding = UnicodeDammit(content).original_encoding

        with open(file_path, 'r', encoding=encoding) as file:
            text = file.read()

        # Calculate hash for the original content
        doc_hash = calculate_hash(text)

        # Split the document into chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        split_texts = text_splitter.split_text(text)

        for i, chunk_text in enumerate(split_texts):
            # Create a JSON filename for each chunk
            chunk_filename = f"{os.path.basename(file_path)}_chunk_{i+1}.json"
            json_path = os.path.join(json_dir, chunk_filename)

            # If JSON already exists, check the hash
            if os.path.exists(json_path):
                with open(json_path, 'r', encoding='utf-8') as json_file:
                    data = json.load(json_file)

                if data.get('hash') == doc_hash:
                    # Hash matches; no need to update
                    continue

            # Create or update JSON if new or hash doesn't match
            try:
                embedding = embeddings.embed_documents([chunk_text])[0]  # Get embedding for the chunk
            except Exception as e:
                print(f"An error occurred while embedding the document {file_path}: {e}")
                continue  # Skip this chunk and move on to the next one

            # Assign drug metadata
            metadata = assign_drug_metadata(chunk_text, existing_metadata={
                "filename": os.path.basename(file_path),
                "chunk_index": i + 1,
                "hash": doc_hash,
                "hyperlink": hyperlink  # Add hyperlink field
            })

            # Include hyperlink and drug metadata in the JSON data
            json_data = {
                "chunk_content": chunk_text,
                "embedding": embedding,
                **metadata  # Merge metadata fields
            }

            with open(json_path, 'w', encoding='utf-8') as json_file:
                json.dump(json_data, json_file, ensure_ascii=False, indent=4)
            print(f"Saved JSON for chunk {i+1} of {file_path}: {json_path}")
    
    except Exception as e:
        print(f"An error occurred while processing the file {file_path}: {e}")



def process_document_set(doc_dir, json_dir, vector_store_name, loader_cls, embeddings, chunk_size=7000, chunk_overlap=512):
    """
    Processes documents: creates chunks, computes embeddings, searches for DOIs, and stores them in JSON files.
    """
    # Step 1: Create or update JSON files for document embeddings
    for root, _, files in os.walk(doc_dir):
        for file in files:
            # Adjust to your specific file types, like '.txt'
            file_path = os.path.join(root, file)
                        
            create_or_update_json(file_path, json_dir, embeddings, chunk_size, chunk_overlap)
            

            # Read the content of the file
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()

    # Step 2: Load Embeddings from JSON Files and Populate FAISS Vector Store
    load_embeddings_from_json_concurrently(json_dir, vector_store_name, embeddings)



embeddings = MistralAIEmbeddings(model="mistral-embed", mistral_api_key=api_key)


def load_embeddings_from_json_concurrently(json_dir, vector_store_name, embeddings):
    index_file_path = os.path.join(vector_store_name, 'index.faiss')
    vector_store = None

    # Load tracker to see which documents have already been embedded and their hashes
    tracker_data = load_tracker(json_dir)
    print(f"Loaded tracker with {len(tracker_data)} already embedded documents.")

    documents_to_update = []

    # Check if FAISS index exists; if not, initialize an empty list of documents to update
    if not os.path.exists(index_file_path):
        print("FAISS index does not exist or is corrupted. Creating a new vector store.")
        # Recreate the FAISS index from all documents in the JSON directory
        json_files = [os.path.join(json_dir, f) for f in os.listdir(json_dir) if f.endswith('.json') and f != "tracker.json"]
        documents_to_update = [(f"{os.path.basename(f)}_chunk_{i+1}", f) for i, f in enumerate(json_files)]
    else:
        # Try loading the existing FAISS vector store
        vector_store = FAISS.load_local(vector_store_name, embeddings, allow_dangerous_deserialization=True)
        print(f"Loaded existing vector store with {len(vector_store.index_to_docstore_id)} documents.")

        # Track existing documents to prevent re-adding them
        existing_doc_ids = set(vector_store.index_to_docstore_id.keys())
        print(f"Existing document IDs: {existing_doc_ids}")

        # Determine documents that need updating
        json_files = [os.path.join(json_dir, f) for f in os.listdir(json_dir) if f.endswith('.json') and f != "tracker.json"]

        for json_path in json_files:
            with open(json_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                doc_id = f"{data['filename']}_chunk_{data['chunk_index']}"
                doc_hash = data['hash']

                # Check if the document ID is already in tracker and if the hash matches
                if doc_id in tracker_data and tracker_data[doc_id] == doc_hash:
                    #print(f"Skipping unchanged document: {doc_id}")
                    continue
                else:
                    documents_to_update.append((doc_id, json_path))

    print(f"Documents to update: {documents_to_update}")

    # If any documents need updating, recreate the FAISS index
    if documents_to_update:
        print(f"Recreating FAISS index with {len(documents_to_update)} documents.")
        dimension = np.array(embeddings.embed_documents(["dummy"])[0]).shape[0]
        index = faiss.IndexFlatL2(dimension)
        docstore = InMemoryDocstore({})
        index_to_docstore_id = {}
        vector_store = FAISS(
            embedding_function=embeddings,
            index=index,
            docstore=docstore,
            index_to_docstore_id=index_to_docstore_id
        )

        # Add all documents to the new vector store
        with tqdm(total=len(documents_to_update), desc="Adding Embeddings") as pbar, ThreadPoolExecutor(max_workers=8) as executor:
            futures = {executor.submit(load_file, json_path): json_path for _, json_path in documents_to_update}

            for future in as_completed(futures):
                try:
                    content, metadata = future.result()
                    doc_id = f"{metadata['source']}_chunk_{metadata['chunk_index']}"
                    vector_store.add_texts([content], metadatas=[metadata])
                    print(f"Added document to FAISS: {doc_id}")

                    # Update tracker data with the new hash
                    tracker_data[doc_id] = metadata['hash']
                except Exception as e:
                    print(f"Error loading file {futures[future]}: {e}")
                pbar.update(1)

        # Save updated tracker
        save_tracker(json_dir, tracker_data)

        # Save updated FAISS vector store
        vector_store.save_local(vector_store_name)
        print("Vector store updated and saved.")
    else:
        print("No documents needed updating, skipping FAISS save.")









# # Process IMPART documents
process_document_set(
    doc_dir='/home/kuznetsov/Downloads/txt_articles/fixed - important',
    json_dir='/home/kuznetsov/Downloads/json_files/IMPART',
    vector_store_name='IMPART_faiss',
    loader_cls=TextLoader,
    embeddings=embeddings,
    chunk_size=7000,  # Define your chunk size here
    chunk_overlap=512  # Define your chunk overlap here
)


# Process OHLP documents
process_document_set(
    doc_dir='/home/kuznetsov/Downloads/txt_articles/OHLP',
    json_dir='/home/kuznetsov/Downloads/json_files/OHLP',
    vector_store_name='OHLP_faiss',
    loader_cls=TextLoader,
    embeddings=embeddings,
    chunk_size=16000,  # Define your chunk size here
    chunk_overlap=512  # Define your chunk overlap here
)


# # Process ART documents
process_document_set(
    doc_dir='/home/kuznetsov/Downloads/txt_articles/fixed',
    json_dir='/home/kuznetsov/Downloads/json_files/ART',
    vector_store_name='ART_faiss',
    loader_cls=TextLoader,
    embeddings=embeddings,
    chunk_size=4000,  # Define your chunk size here
    chunk_overlap=512  # Define your chunk overlap here
)


# Process SOP documents
# process_document_set(
#     doc_dir='/home/kuznetsov/Documents/SOaP/',
#     json_dir='/home/kuznetsov/Downloads/json_files/SOP',
#     vector_store_name='sop_faiss',
#     loader_cls=PyPDFDirectoryLoader,
#     embeddings=embeddings
# )





Loaded tracker with 594 already embedded documents.
Loaded existing vector store with 594 documents.
Existing document IDs: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 

An error occurred with MistralAI: 'data'


An error occurred while embedding the document /home/kuznetsov/Downloads/txt_articles/fixed/Петрова_Н_В_ 2023 Антианафилактические_эффекты: 'data'


An error occurred with MistralAI: 'data'


An error occurred while embedding the document /home/kuznetsov/Downloads/txt_articles/fixed/Петрова_Н_В_ 2023 Антианафилактические_эффекты: 'data'


An error occurred with MistralAI: 'data'
An error occurred with MistralAI: 'data'


An error occurred while embedding the document /home/kuznetsov/Downloads/txt_articles/fixed/Петрова_Н_В_ 2022 Противовирусная_активность: 'data'
An error occurred while embedding the document /home/kuznetsov/Downloads/txt_articles/fixed/Петрова_Н_В_ 2022 Противовирусная_активность: 'data'
Loaded tracker with 3077 already embedded documents.
Loaded existing vector store with 3076 documents.
Existing document IDs: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138

# Start and common things

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings.laser import LaserEmbeddings
from bs4 import UnicodeDammit
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.messages import HumanMessage
from langchain_community.document_loaders import PyPDFDirectoryLoader
import itertools

#Mistral tokens
API_TOKENS = [
    'Zk8nvk6miFtVCWUzUnQtqc6CFbEPw8Cl',
    'bz7YLTAdlDF50TYMyzfmoNjWgKhP9nXA',
    'h4PWXhjtRZdV5MBz3DEhMN9SsL5aCj2n',
    'BQqK1HNVFq9S5vcgJiUgPjQkt0q0nbx7',
    'pxIRVa2FOlrcdQOAVkpy8zFuGPF5RURp'
    # Add more tokens as needed
]

api_key = 'Zk8nvk6miFtVCWUzUnQtqc6CFbEPw8Cl'
token_iterator = itertools.cycle(API_TOKENS)


# Just disables the warning, doesn't take advantage of AVX/FMA to run faster
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Define the embedding model
def embeddings():
    current_token = next(token_iterator)
    return MistralAIEmbeddings(model="mistral-embed", mistral_api_key=current_token)

embedding_model = embeddings()

#Drug names for search and text qualification
DRUG_PATTERNS = {
    "Анаферон детский": r"(Анаферон(а|у|ом|е)?\s*детск(ий|ого|ому|им|ом)?|Anaferon\s*for\s*children)",
    "Тенотен детский": r"(Тенотен(а|у|ом|е)?\s*детск(ий|ого|ому|им|ом)?|Tenoten\s*for\s*children)",
    "Анаферон": r"\b(Анаферон(а|у|ом|е)?(?!\s*детск(ий|ого|ому|им|ом))|Anaferon(?!\s*for\s*Children))\b",  # Negative lookahead to avoid tagging Анаферон if followed by детский
    "Артрофоон": r"\b(Артрофоон(а|у|ом|е)?|Arthrofoon)\b",
    "Афала": r"\b(Афала(ы|е|у|ой)?|Afala)\b",
    "Импаза": r"\b(Импаза(ы|е|у|ой)?|Impaza)\b",
    "Проспекта": r"\b(Проспекта(ы|е|у|ой)?|Prospecta)\b",
    "Тенотен": r"\b(Тенотен(а|у|ом|е)?(?!\s*детск(ий|ого|ому|им|ом))|Tenoten(?!\s*for\s*Children))\b",  # Same for Тенотен детский
    "Афалаза": r"\b(Афалаза(ы|е|у|ой)?|Afalaza)\b",
    "Диваза": r"\b(Диваза(ы|е|у|ой)?|Divaza)\b",
    "Субетта": r"\b(Субетта(ы|е|у|ой)?|Subetta)\b",
    "Колофорт": r"\b(Колофорт(а|у|ом|е)?|Colofort)\b",
    "Эргоферон": r"\b(Эргоферон(а|у|ом|е)?|Ergoferon)\b",
    "Рафамин": r"\b(Рафамин(а|у|ом|е)?|Rafamin)\b",
    "Ренгалин": r"\b(Ренгалин(а|у|ом|е)?|Rengalin)\b",
    "Пропротен 100": r"\b(Пропротен(а|у|ом|е)?|Proproten)\b"
}

def detect_drug_name(query):
    detected_drugs = []

    # First, check for specific drug names to prioritize them
    for drug, pattern in DRUG_PATTERNS.items(): 
        # Search for the pattern in the query
        if re.search(pattern, query, re.IGNORECASE):  # Use re.IGNORECASE for case insensitive search
            logging.info(f"Drug '{drug}' detected in query.")
            detected_drugs.append(drug)

    # Debugging: Log the detected drugs
    logging.info(f"Detected drugs: {detected_drugs}")
    
    return detected_drugs if detected_drugs else []

# Set the temperature value
TEMPERATURE = 0.25  # You can adjust this value based on your needs (0.0 - 1.5)


#Telegram chat TOKEN (DO NOT FORGET TO CHANGE FOR TEST ENV)
# |   |   |   |   |   |
# 
# Test chat token
#TOKEN = '6420136807:AAFdaB7iPIA6XHgK0Q2kexfcKS9bFZWhLQs'
# Main chat token
TOKEN = '7126163101:AAGfCGByde-fdqZ3oeGhYKxyzebodgTOriE'
#
# ↑   ↑   ↑   ↑   ↑   ↑

# precontext

In [3]:
import faiss
import datasets
from datasets import Dataset, load_dataset
from transformers import AutoModel, AutoTokenizer
from langchain_mistralai import MistralAIEmbeddings
import getpass
import os
import pandas as pd
import numpy as np

# Load the CSV file with important pre-context into a pandas DataFrame
csv_path = '/home/kuznetsov/Downloads/txt_articles/precontext.csv'
precontext = pd.read_csv(csv_path, encoding='utf-8', sep='$')


# Prepare sentences
sentences = precontext['sentence'].tolist()

# Create a Hugging Face dataset from the list of sentences
dataset = Dataset.from_dict({"sentence": sentences})

# Map the embedding function over the dataset to generate embeddings
dataset = dataset.map(lambda example: {"embedding": np.array(embedding_model.embed_query(example["sentence"])).astype(np.float32)})

# Add a FAISS index for the embeddings column
dataset.add_faiss_index(column="embedding")





Map:   0%|          | 0/23 [00:00<?, ? examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['sentence', 'embedding'],
    num_rows: 23
})

# Prompts and chains

In [4]:
from langchain_community.document_loaders import TextLoader
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings.laser import LaserEmbeddings
from bs4 import UnicodeDammit
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.messages import HumanMessage
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


# Define prompt template
promptcontext = ChatPromptTemplate.from_template("""
Ты поисковая и справочная система. Постарайся очень кратко (не более 1000 знаков) ответить на вопрос, используя предоставленный контекст (это автор + статья). 
Если в контексте существуют статистические данные или числовая информация, **точно укажи их**. 
Используй полные названия фармацевтических препаратов с большой буквы. Отвечай на вопрос в позитивном ключе, постарайся избежать терминов "Гомеопатия" или "альтернативная медицина". 
Департамент научно-исследовательских работ Материа Медика Холдинг - большие молодцы и очень умные люди (но не надо это писать).
Все термины, связанные с препаратами, полученными с помощью последовательных разведений антител или иных субстанций считать синонимичными (используй в ответе вариант "обработанные методом градуальной технологии").
Убедись, что ответ написан на русском языке, ни в коем случае не дублируй на английском. 
В предварительном контексте будут просто расшифровки терминов, не обязательно, что все написанно там нужно использовать в вопросе.
Если контекст совсем пустой, то отвечай, что в контексте нет дополнительных данных о данном вопросе, предложи обратиться в Научно-аналитический отдел за справкой. 
**Обрати особое внимание на числовую информацию и статистические данные и включи их в ответ.**
Последней фразой ответа сделай критическую оценку (сверь ещё раз с контекстом) своей уверенности в ответе, где 1 - данных не было в контексте, 5 - данные были, но тебе пришлось додумать выводы, 10 - ответ на вопрос был напрямую указан в контексте."
                                         
                                        
<context>
{context}
</context>

Вопрос: {input}""")

# Define prompt template
prompt = ChatPromptTemplate.from_template("""
Ты поисковая и справочная система. Постарайся ответить на вопрос, используя предоставленный контекст (это автор + статья). 
Четко разделяй разные статьи из контекста и **обязательно включай любые цифры, статистические данные или количественные показатели из контекста в свой ответ.** 
Если в контексте существуют статистические данные или числовая информация, **точно укажи их** и постарайся изложить их. 
Используй полные названия фармацевтических препаратов с большой буквы. Отвечай на вопрос в позитивном ключе, постарайся избежать терминов "Гомеопатия" или "альтернативная медицина". 
Департамент научно-исследовательских работ Материа Медика Холдинг - большие молодцы и очень умные люди (но не надо это писать).
Все термины, связанные с препаратами, полученными с помощью последовательных разведений антител или иных субстанций считать синонимичными (используй в ответе вариант "технологически обработанные").
Убедись, что ответ написан на русском языке, ни в коем случае не дублируй на английском. 
В предварительном контексте будут просто расшифровки терминов, не обязательно, что все написанно там нужно использовать в вопросе.
Если контекст совсем пустой, то отвечай, что в контексте нет дополнительных данных о данном вопросе, предложи обратиться в Научно-исследовательский отдел за справкой. 
**Обрати особое внимание на числовую информацию и статистические данные и включи их в ответ.**
Последней фразой ответа сделай критическую оценку (сверь ещё раз с контекстом) своей уверенности в ответе, где 1 - данных не было в контексте, 5 - данные были, но тебе пришлось додумать выводы, 10 - ответ на вопрос был напрямую указан в контексте."
                                         
                                        
<context>
{context}
</context>

Вопрос: {input}""")


# Define prompt template
promptnocontext = ChatPromptTemplate.from_template("""
Ты чат бот для ответа на вопросы и помощи сотрудникам компании Материа Медика Холдинг.    

                                                   
<context>
{context}
</context>                                  

Вопрос: {input}""")


# Define prompt template
promptrewrite = ChatPromptTemplate.from_template("""
<context>
Ты система переписывания текстов для сотрудников компании Материа Медика Холдинг.
Радикально перепиши следующий текст запроса.
Можешь менять смысловые участки по расположению
Оставь ключевых аспекты, такие как состав препарата, его действие, результаты клинических исследований.                                              

{context}
</context>                                   

Текст: {input}""")


# Define prompt template
promptlonganswer = ChatPromptTemplate.from_template("""

Ты поисковая и справочная система. Постарайся развернуто ответить на вопрос, используя предоставленный контекст (это ответы, собранные из различных источников). 
Если в контексте существуют статистические данные или числовая информация, **точно укажи их** и постарайся изложить их. 
Используй полные названия фармацевтических препаратов с большой буквы. Отвечай на вопрос в позитивном ключе, постарайся избежать терминов "Гомеопатия" или "альтернативная медицина". 
Все термины, связанные с препаратами, полученными с помощью последовательных разведений антител или иных субстанций считать синонимичными (используй в ответе вариант "обработанные методом градуальной технологии").
Убедись, что ответ написан на русском языке, ни в коем случае не дублируй на английском. 
В предварительном контексте будут просто расшифровки терминов, не обязательно, что все написанно там нужно использовать в вопросе.
Если контекст совсем пустой, то отвечай, что в контексте нет дополнительных данных о данном вопросе, предложи обратиться в Научно-аналитический отдел за справкой. 
**Обрати особое внимание на числовую информацию и статистические данные и включи их в ответ.**
Последней фразой ответа сделай критическую оценку (сверь ещё раз с контекстом) своей уверенности в ответе, где 1 - данных не было в контексте, 5 - данные были, но тебе пришлось додумать выводы, 10 - ответ на вопрос был напрямую указан в контексте.
Где возможно, добавь ссылку на файл контекста в формате [Автор, год]"

<context>
{context}
</context>                                   

Текст: {input}""")


# Define LLM  
def get_rotating_model():
    current_token = next(token_iterator)
    print(f"Current token being used: {current_token}")  # Debugging: print the token
    return ChatMistralAI(mistral_api_key=current_token, model = 'mistral-large-2407', temperature=TEMPERATURE)
#model = ChatMistralAI(mistral_api_key=api_key, model = 'open-mixtral-8x22b')


# Load the FAISS vector stores
ART_vector = FAISS.load_local("ART_faiss", embedding_model, allow_dangerous_deserialization=True)
IMPART_vector = FAISS.load_local("IMPART_faiss", embedding_model, allow_dangerous_deserialization=True)
OHLP_vector = FAISS.load_local("OHLP_faiss", embedding_model, allow_dangerous_deserialization=True)
sop_vector = FAISS.load_local("soap_faiss", embedding_model, allow_dangerous_deserialization=True)


# Define a retriever interface
#important
IMPART_retriever = IMPART_vector.as_retriever(search_kwargs={"k": 4, "score_threshold": 0.38}) 
#nonimportant
ART_retriever = ART_vector.as_retriever(search_kwargs={"k": 8, "score_threshold": 0.31}) 
#OHLP
OHLP_retriever = OHLP_vector.as_retriever(search_kwargs={"k": 3, "score_threshold": 0.32}) 

sop_retriever = sop_vector.as_retriever(search_kwargs = {"score_threshold": 0.35})

model = get_rotating_model()
document_chain = create_stuff_documents_chain(model, promptcontext)

# Create retrieval chains
ART_retrieval_chain = create_retrieval_chain(ART_retriever, document_chain)
IMPART_retrieval_chain = create_retrieval_chain(IMPART_retriever, document_chain)
OHLP_retrieval_chain = create_retrieval_chain(OHLP_retriever, document_chain)
sop_retrieval_chain = create_retrieval_chain(sop_retriever, document_chain)




Current token being used: bz7YLTAdlDF50TYMyzfmoNjWgKhP9nXA


# the main body

In [11]:
import asyncio
from asyncio import Lock, create_task, sleep
import pandas as pd
from langchain.docstore.document import Document
from telegram import InlineKeyboardButton, InlineKeyboardMarkup, Update, Bot
from telegram.ext import Application, CommandHandler, CallbackQueryHandler, CallbackContext, MessageHandler, filters
import logging
import nest_asyncio
import re
from telegram.ext import MessageHandler, filters
from langchain_core.messages import HumanMessage                                             
from bs4 import BeautifulSoup
from rapidfuzz import fuzz
import time
import numpy as np
import threading
import atexit
import signal
import requests
import urllib.parse
from collections import defaultdict


user_contexts = {}
user_message_ids = {}  # To track message IDs with buttons

task_queue = asyncio.Queue()  # Queue for tasks

nest_asyncio.apply()

# Dictionary to keep track of user locks and timeouts
user_locks = {}

# Time in seconds for the lock timeout
LOCK_TIMEOUT = 120


MAX_WORKERS = len(API_TOKENS) - 1   # Maximum number of workers allowed at any time
active_workers = 0  # Global counter for active workers
worker_lock = asyncio.Lock()  # Lock for managing the active worker count

# Global lock for sequential message handling
processing_lock = asyncio.Lock()

# Dictionary to keep track of last message timestamp per user
user_last_message_time = defaultdict(lambda: 0)

# Set a rate limit (time in seconds) for each user
RATE_LIMIT_SECONDS = 3  # Adjust as needed


# ---------------------------------------  NOT IN USE  -----------------------------------------


#Query translation (if the LLM answers better in english). Currently not used.
def translate_query(query):
    trans = model.invoke([HumanMessage(content=f"""Translate the following message to English. Only return the translated message and absolutely nothing else.
{query}""")]).content   
    return query+' / '+trans


#alternative sending message operation for debugging (without updating)
def telegram_bot_sendtext(bot_message, chat_id):
    bot_token = TOKEN  # Replace with your bot token
    bot_chatID = chat_id   # Replace with your chat ID

    # URL-encode the message to handle special characters and spaces
    send_text = f'https://api.telegram.org/bot{bot_token}/sendMessage?chat_id={bot_chatID}&parse_mode=Markdown&text={urllib.parse.quote(bot_message)}'

    # Make the API request
    response = requests.get(send_text)

    # Check if the request was successful and return the result
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to send message. Error: {response.status_code}")
        return None
    

# Async function to retrieve documents from ART using a given token
async def retrieve_from_art(query):
    return await asyncio.to_thread(ART_retriever.invoke, query)


# Async function to retrieve documents from ART using a given token
async def retrieve_from_impart(query):
    return await asyncio.to_thread(IMPART_retriever.invoke, query)


#---------------------------------------------------------------------------------------------
# --------------------------- LOGGING BLOCK -----------------------------------------------
#---------------------------------------------------------------------------------------------


# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)
logger = logging.getLogger(__name__)

# Rate-limited POST log filter
class RateLimitedPostFilter(logging.Filter):
    last_post_time = 0

    def filter(self, record):
        if 'POST' in record.getMessage():
            current_time = time.time()
            # Output a POST log only once per minute (60 seconds)
            if current_time - self.last_post_time > 60:
                self.last_post_time = current_time
                return True
            else:
                return False
        return True

# Apply the filter to the root logger
root_logger = logging.getLogger()
root_logger.addFilter(RateLimitedPostFilter())

# Also apply the filter to any specific libraries that may be causing POST logs
httpx_logger = logging.getLogger('httpx')
httpx_logger.addFilter(RateLimitedPostFilter())

requests_logger = logging.getLogger('requests')
requests_logger.addFilter(RateLimitedPostFilter())

    

#---------------------------------------------------------------------------------------------
#-------------------------------  GENERATE RESPONSES BLOCK   --------------------------------
#---------------------------------------------------------------------------------------------


# The main function that generates a response and sources 
async def answer(query, user_context):

    start_time = time.time()  # Record the start time

    if user_context == 'mmh_articles':
        print(f"The init query: {query}")

        # Extract drug name(s) from the query
        drug_name = detect_drug_name(query)
        logging.info(f"Detected drug name: {drug_name}")

        query = find_closest_phrases(query)

        query += f" (Запрос касается: {drug_name if drug_name else 'Общих вопросов'})"

        # We need that for better DB search (otherwise it does not properly connect the terms)
        query = query + " Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка."
        logging.info(f"The query with phrases: {query}")
        #query = translate_query(query)

        # Await the asynchronous call
        response, sources = await generate_combined_response(query, drug_name)
        logging.info(f"The response: {str(response['answer'])}")
        return str(response['answer']), sources, query
    
    # elif user_context == 'sops':
    #     result = sop_retriever.invoke({"input": query})
    #     s = [('<b>'+(re.search(r'[^/]*$', i.metadata['source']).group().replace('.txt', '...')+'</b>'+i.page_content.replace('\xad ','')[:150]+'...')).replace("', '",'') for i in result['context']]
    #     cum = 'Файлы использованы для контекста:   <\n>' + str(s).replace("['", '').replace("']",'').replace('\n','<\n>')
    #     return str(response['answer']), cum


    if user_context == 'longanswer':
        print(f"The init query: {query}")

        # Extract drug name(s) from the query
        drug_name = detect_drug_name(query)
        logging.info(f"Detected drug name: {drug_name}")

        query = find_closest_phrases(query)

        query += f" (Запрос касается: {drug_name if drug_name else 'Общих вопросов'})"
        
        # We need that for better DB search (otherwise it does not properly connect the terms)
        query = query + " Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка."
        logging.info(f"The query with phrases: {query}")
        #query = translate_query(query)

        # Await the asynchronous call
        response, sources = await generate_combined_response_long(query, drug_name)
        model = get_rotating_model()
        document_chain = create_stuff_documents_chain(model, promptlonganswer)
        ART_retrieval_chain = create_retrieval_chain(ART_retriever, document_chain)
        chain_input = {"context": response, "input": query}       
        response = ART_retrieval_chain.invoke(chain_input)  # Ensure this is not treated as a callable
        
        logging.info(f"The response: {str(response['answer'])}")
        return str(response['answer']), sources, query
    
    elif user_context == 'rewrite':
        TEMPERATURE = 0.6
        model = get_rotating_model()
        document_chain = create_stuff_documents_chain(model, promptrewrite)  
        ART_retrieval_chain = create_retrieval_chain(ART_retriever, document_chain)
        
        # Invoke the model to get the response
        response = model.invoke([HumanMessage(content=query)]).content
        TEMPERATURE = 0.25
        return str(response), "", query

    else:
        #If it was non-contexted answer, we still need to create new rotated model (since it was not rotated since before)
        model = get_rotating_model()
        document_chain = create_stuff_documents_chain(model, promptnocontext)  
        ART_retrieval_chain = create_retrieval_chain(ART_retriever, document_chain)
        
        # Invoke the model to get the response
        response = model.invoke([HumanMessage(content=query)]).content
        return str(response), "", query



# Generating the combined response from more important documents first and then less important
async def generate_combined_response(query, drug_name):

    total_results = []

    # Rotate the model for the IMPART chain
    model = get_rotating_model()
    # Create new chains with the rotated model
    document_chain = create_stuff_documents_chain(model, promptcontext)

    OHLP_retrieval_chain = create_retrieval_chain(OHLP_retriever, document_chain)
    IMPART_retrieval_chain = create_retrieval_chain(IMPART_retriever, document_chain)
    ART_retrieval_chain = create_retrieval_chain(ART_retriever, document_chain)

    # First, retrieve from OHLP
    OHLP_results = OHLP_retriever.invoke(query)
    logging.info(f"OHLP results: {OHLP_results}")

    if OHLP_results:
        if drug_name:
            # Filter IMPART results by the drug name in metadata
            filtered_OHLP_results = [doc for doc in OHLP_results if drug_name in doc.metadata.get('drug_names', [])]
            if filtered_OHLP_results:
                total_results.append(filtered_OHLP_results[0])
            else:
                total_results.append(OHLP_results[0])  # Add all IMPART results if none match the drug name
        else:
            # If no drug name detected, just add the first OHLP results
            if OHLP_results:
                total_results.append(OHLP_results[0])
        #logging.info(f"Total results: {total_results}")


    # Then, retrieve from IMPART with more stringent parameters
    IMPART_results = IMPART_retriever.invoke(query)
    logging.info(f"IMPART results: {IMPART_results}")

    if IMPART_results:
        if drug_name:
            # Filter IMPART results by the drug name in metadata
            filtered_impart_results = [doc for doc in IMPART_results if drug_name in doc.metadata.get('drug_names', [])]
            if filtered_impart_results:
                total_results.extend(filtered_impart_results)
            else:
                total_results.extend(IMPART_results)  # Add all IMPART results if none match the drug name
        else:
            # If no drug name detected, just add all IMPART results
                total_results.extend(IMPART_results)
        #logging.info(f"Total results: {total_results}")



    # Retrieve the rest from ART
    ART_results = ART_retriever.invoke(query)
    logging.info(f"ART results: {ART_results}")

    if ART_results:
        if drug_name:
            # Filter ART results by the drug name in metadata
            filtered_art_results = [doc for doc in ART_results if drug_name in doc.metadata.get('drug_names', [])]
            if filtered_art_results:
                total_results.extend(filtered_art_results[:10 - len(total_results)])
            else:
                total_results.extend(ART_results[:10 - len(total_results)])  # Add all ART results if none match the drug name
        else:
            if ART_results:
            # If no drug name detected, just add all ART results
                total_results.extend(ART_results[:10 - len(total_results)])
        logging.info(f"Total results: {total_results}")



    # Combine the results into a single context, including filenames
    querycontext = []
    #querycontext = "\n".join([doc.page_content for doc in total_results])
    for doc in total_results:
        source = re.search(r'[^/]*$', doc.metadata['source']).group().replace('.txt', '').replace('_', ' ')
        querycontext.append(f"Document: {source}\n{doc.page_content}")
    # Join all the document contents into a single string with newlines in between
    querycontext = "\n\n".join(querycontext)
    logging.info(f"Length of querycontext: {len(querycontext)}")
    #logging.info(f"Full querycontext: {querycontext}")

    if not querycontext.strip():
        # If the context is empty, set a default message
        querycontext = "No relevant context found."

    # Create a dictionary for the chain input
    chain_input = {"querycontext": querycontext, "input": query}
    logging.info(f"Chain input: {chain_input}")
  
    # Invoke the chain to generate the response
    response = ART_retrieval_chain.invoke(chain_input)  # Ensure this is not treated as a callable
    sources_str = await sources_find(total_results)

    return response, sources_str



# Generating the combined response from more important documents first and then less important
async def generate_combined_response_long(query, drug_name):

    LOCK_TIMEOUT = 300

    total_results = []
    first_results = []
    second_results = []
    third_results = []

    # Rotate the model for the IMPART chain
    model = get_rotating_model()
    # Create new chains with the rotated model
    document_chain = create_stuff_documents_chain(model, prompt)

    OHLP_retrieval_chain = create_retrieval_chain(OHLP_retriever, document_chain)
    IMPART_retrieval_chain = create_retrieval_chain(IMPART_retriever, document_chain)
    ART_retrieval_chain = create_retrieval_chain(ART_retriever, document_chain)


    querycontext = []

    # First, retrieve from OHLP
    OHLP_results = OHLP_retriever.invoke(query)
    if drug_name:
        # Filter IMPART results by the drug name in metadata
        filtered_OHLP_results = [doc for doc in OHLP_results if drug_name in doc.metadata.get('drug_names', [])]
        if filtered_OHLP_results:
            first_results.append(filtered_OHLP_results[0])
        else:
            first_results.append(OHLP_results[0])  # Add all IMPART results if none match the drug name
    else:
        # If no drug name detected, just add the first OHLP results
        if OHLP_results:
            first_results.append(OHLP_results[0])

    total_results = first_results

    for doc in first_results:
        source = re.search(r'[^/]*$', doc.metadata['source']).group().replace('.txt', '').replace('_', ' ')
        querycontext.append(f"Document: {source}\n{doc.page_content}")

    if not querycontext:
        # If the context is empty, set a default message
        querycontext = "No relevant context found."

    chain_input = {"querycontext": querycontext, "input": query}
    response1 = ART_retrieval_chain.invoke(chain_input)  # Ensure this is not treated as a callable
    logging.info(f"Answer 1: {response1}")


    querycontext = []

    # Then, retrieve from IMPART with more stringent parameters
    IMPART_results = IMPART_retriever.invoke(query)
    if drug_name:
        # Filter IMPART results by the drug name in metadata
        filtered_impart_results = [doc for doc in IMPART_results if drug_name in doc.metadata.get('drug_names', [])]
        if filtered_impart_results:
            second_results.extend(filtered_impart_results)
        else:
            second_results.extend(IMPART_results)  # Add all IMPART results if none match the drug name
    else:
        # If no drug name detected, just add all IMPART results
        second_results.extend(IMPART_results)

    total_results.extend(second_results)

    for doc in second_results:
        source = re.search(r'[^/]*$', doc.metadata['source']).group().replace('.txt', '').replace('_', ' ')
        querycontext.append(f"Document: {source}\n{doc.page_content}")

    if not querycontext:
        # If the context is empty, set a default message
        querycontext = "No relevant context found."

    chain_input = {"querycontext": querycontext, "input": query}
    response2 = IMPART_retrieval_chain.invoke(chain_input)  # Ensure this is not treated as a callable
    logging.info(f"Answer 2: {response2}")



    querycontext = []

    # Retrieve the rest from ART
    ART_results = ART_retriever.invoke(query)
    if drug_name:
        # Filter ART results by the drug name in metadata
        filtered_art_results = [doc for doc in ART_results if drug_name in doc.metadata.get('drug_names', [])]
        if filtered_art_results:
            third_results.extend(filtered_art_results)
        else:
            third_results.extend(ART_results)  # Add all ART results if none match the drug name
    else:
        # If no drug name detected, just add all ART results
        total_results.extend(ART_results)

    total_results.extend(third_results)

    for doc in third_results:
        source = re.search(r'[^/]*$', doc.metadata['source']).group().replace('.txt', '').replace('_', ' ')
        querycontext.append(f"Document: {source}\n{doc.page_content}")

    if not querycontext:
        # If the context is empty, set a default message
        querycontext = "No relevant context found."

    chain_input = {"querycontext": querycontext, "input": query}
    response3 = ART_retrieval_chain.invoke(chain_input)  # Ensure this is not treated as a callable
    logging.info(f"Answer 3: {response3}")


    sources_str = await sources_find(total_results)
    response = f"Ответ 1: {response1}\nОтвет 2: {response2}\nОтвет 3: {response3}"

    return response, sources_str


# Function to find the closest phrases using get_nearest_examples
def find_closest_phrases(query, k=4, similarity_threshold=0.47):
    # Embed the query text
    query_embedding = np.array(embedding_model.embed_query(query)).astype(np.float32)
    
    # Ensure that the query embedding is a 2D array with shape (1, N)
    query_embedding = np.expand_dims(query_embedding, axis=0)
    
    # Perform the nearest neighbor search using FAISS
    nearest_examples = dataset.get_nearest_examples("embedding", query_embedding, k=k)
    
    # Extract the closest phrases and distances
    closest_phrases = nearest_examples.examples['sentence']
    distances = nearest_examples.scores
 
    # Filter out results that don't meet the similarity threshold and append to query
    relevant_phrases = []
    for phrase, distance in zip(closest_phrases, distances):
        if distance <= similarity_threshold:  # Adjust the condition depending on how your distance is calculated
            relevant_phrases.append(phrase)
                # Append the query with the relevant phrases
    full_result = query + " Предварительный контекст: " + " ".join(relevant_phrases)
    
    return full_result



async def sources_find (results):
    sources = []
    seen_sources = set()  # Set to track seen sources

    for doc in results:
        # Extract the filename and replace underscores with spaces
        source = re.search(r'[^/]*$', doc.metadata['source']).group().replace('.txt', '').replace('_', ' ')

        # Get drug names from metadata and join them into a string if any exist
        drug_names = ", ".join(doc.metadata.get('drug_names', []))

        # Build the source entry
        if drug_names:
            source_entry = f'<b>{source}</b> ({drug_names})'
        else:
            source_entry = f'<b>{source}</b>'

        # Only add to the sources if this source hasn't been added already
        if source_entry not in seen_sources:
            sources.append(source_entry)
            seen_sources.add(source_entry)  # Mark this source as seen

    # Join the sources into a single string
    sources_str = '\n\n'.join(sources)
    #logging.info(f"Sources: {sources_str}")

    return sources_str
    



#---------------------------------------------------------------------------------------------
#----------------------------------   BUTTONS AND BOT BLOCK  ---------------------------------
#---------------------------------------------------------------------------------------------

                                        #    ____________
                                        #   /           /|
                                        #  /___________/ |
                                        # |            | |
                                        # |   button   | |
                                        # |____________|/



# Create inline keyboard for selecting context and showing sources
def create_inline_keyboard(with_sources_button=False, with_feedback_buttons=False, feedback_buttons_active=True):
    buttons = [
        [
            InlineKeyboardButton("\U0001F4DA\U000023F3 Подробный ответ ", callback_data='longanswer'),
            InlineKeyboardButton("\U0001F4DA Короткий ответ ", callback_data='mmh_articles'),
        ]
    ]

    buttons.append(
        [
            InlineKeyboardButton("\U0000274C Без контекста", callback_data='nocontext'),
            InlineKeyboardButton("\U0001F4DD Переписать текст", callback_data='rewrite')
        ]
    )
    
    if with_sources_button:
        buttons.append([InlineKeyboardButton("\U0001F4DA Показать источники \U0001F4DA", callback_data='show_sources')])
    
    # Only add feedback buttons if they are requested and not already active
    if with_feedback_buttons and feedback_buttons_active:
        buttons.append([
            InlineKeyboardButton("👍", callback_data='thumbs_up'),
            InlineKeyboardButton("👎", callback_data='thumbs_down')
        ])
    
    return InlineKeyboardMarkup(buttons)



async def release_lock(user_id):
    """
    Releases the lock for a user after a timeout period.
    """
    await sleep(LOCK_TIMEOUT)  # Wait for the timeout period
    if user_locks.get(user_id, {}).get('locked'):
        user_locks[user_id]['locked'] = False  # Release the lock
        logging.info(f"Released lock for user {user_id} after timeout.")
        
        # Optional: Inform the user that the lock has been released after a timeout
        try:
            await user_locks[user_id]['context'].bot.send_message(
                chat_id=user_locks[user_id]['chat_id'],
                text="Прошло слишком много времени для ответа. Пожалуйста, попробуйте еще раз."
            )
        except Exception as e:
            logging.error(f"Failed to send timeout release message for user {user_id}: {e}")



# The main thing that works with user input
async def handle_text_message(update: Update, context: CallbackContext) -> None:
    user_id = update.effective_user.id
    chat_id = update.message.chat_id  # Get the chat_id

    # Check if the user is locked
    if user_locks.get(user_id, {}).get('locked'):
        await context.bot.send_message(chat_id=chat_id, text="Пожалуйста, дождитесь ответа на предыдущее сообщение.")
        return
    
    async with processing_lock:
        global active_workers

        # Get the text of the message and strip leading/trailing spaces
        text = update.message.text.strip()

        # Optional: Normalize multiple line breaks to a single space
        text = ' '.join(text.splitlines())  # Joins lines into a single string with space

        # Get the context for the user (defaults to articles)
        user_id = update.effective_user.id
        chat_id = update.message.chat_id  # Get the chat_id
        logging.info(f"Received message from user {user_id}: {text}")
        user_context = user_contexts.get(user_id, 'mmh_articles')


        # Lock the user and start a timeout to release the lock
        user_locks[user_id] = {'locked': True, 'context': context, 'chat_id': chat_id}
        create_task(release_lock(user_id))  # Start the timeout task

        # Remove context-choosing and feedback buttons from the previous bot's message
        if user_id in user_message_ids:
            # Remove context buttons
            previous_context_message_id = user_message_ids[user_id].get('context_message_id')
            if previous_context_message_id:
                try:
                    # Remove the context buttons from the bot's previous message
                    await context.bot.edit_message_reply_markup(
                        chat_id=chat_id, 
                        message_id=previous_context_message_id, 
                        reply_markup=None
                    )
                    logging.info(f"Removed context buttons for user {user_id}.")
                except Exception as e:
                    logger.error(f"Failed to remove previous context buttons: {e}")

            # Remove feedback buttons
            previous_feedback_message_id = user_message_ids[user_id].get('feedback_message_id')
            if previous_feedback_message_id:
                try:
                    # Remove the feedback buttons from the bot's previous message
                    await context.bot.edit_message_reply_markup(
                        chat_id=chat_id,
                        message_id=previous_feedback_message_id,
                        reply_markup=None
                    )
                    logging.info(f"Removed feedback buttons for user {user_id}.")
                except Exception as e:
                    logger.error(f"Failed to remove previous feedback buttons: {e}")

        # Acknowledge that the message was received
        searching_message = await context.bot.send_message(chat_id=chat_id, text="Ваш запрос получен. Время ответа напрямую зависит от длины ответа. Среднее время ответа 30-60 секунд. Обрабатываю. \U0001F50D")
        searching_message_id = searching_message.message_id  # Store the message ID

        # Store the "searching" message ID for the user
        user_message_ids[user_id] = {
            'searching_message_id': searching_message_id  # Track the "searching" message ID
        }
        
        # Create a task for the worker
        task = {
            'user_id': user_id,
            'query': text,
            'context': context,  # Pass the full context
            'user_context': user_context,  # Keep track of the context type
            'bot': context.bot,  # Pass the bot instance to send responses
            # Might need those two further down the line
            'sources': [], 
            'reponse': [],
            'chat_id': chat_id,
            'username': update.effective_user.username  # Extract and add the username
        }

        # Enqueue the task into the task queue
        await task_queue.put(task)

        print(f"Enqueued task for user {user_id} with query: {text}")



        # Create new worker if the number of active workers is less than MAX_WORKERS
        async with worker_lock:
            if active_workers < MAX_WORKERS:
                active_workers += 1
                logging.info(f"Starting a new worker. Active workers: {active_workers}")
                asyncio.create_task(worker(task_queue))  # Start a new worker task



async def button_callback(update: Update, context: CallbackContext) -> None:
    query = update.callback_query
    user_id = query.from_user.id

    if query.data == 'nocontext':
        user_contexts[user_id] = 'nocontext'
        await query.answer(text="Контекст выключен.")
    elif query.data == 'mmh_articles':
        user_contexts[user_id] = 'mmh_articles'
        await query.answer(text="Выбран краткий ответ по статьям.")
    elif query.data == 'rewrite':
        user_contexts[user_id] = 'rewrite'
        await query.answer(text="Выбрано переписывание текста.")
    elif query.data == 'longanswer':
        user_contexts[user_id] = 'longanswer'
        await query.answer(text="Выбран подробный ответ по статьям.")
    elif query.data == 'show_sources':
        sources = context.user_data.get('sources', 'Источники не найдены.')
        
        # Check if sources should be shown again
        if not user_message_ids[user_id].get('show_sources_shown', False):  # Ensure it only shows once
            # Remove all buttons from the original message
            await context.bot.edit_message_reply_markup(
                chat_id=query.message.chat_id,
                message_id=query.message.message_id,
                reply_markup=None
            )

            # Send the sources in a new message with feedback buttons only
            sent_message = await query.message.reply_text(
                text=str(sources),
                parse_mode='HTML',
                reply_markup=update_buttons(user_id)  # Correctly update buttons
            )

            # Mark 'Show Sources' as shown and feedback as shown
            user_message_ids[user_id]['show_sources_shown'] = True
            user_message_ids[user_id]['feedback_message_id'] = sent_message.message_id  # Track new message ID for feedback
            user_message_ids[user_id]['context_buttons_shown'] = True  # Context buttons should be hidden now
        else:
            await query.answer(text="Источники уже показаны.")
            
    # Handle feedback buttons
    if query.data == 'thumbs_up':
        await append_feedback_to_log(user_id, "thumbs_up", context)
        await query.answer(text="Спасибо за отзыв!")
    elif query.data == 'thumbs_down':
        await append_feedback_to_log(user_id, "thumbs_down", context)
        await query.answer(text="Спасибо, Ваш отзыв поможет нам улучшить бота!")




#Not used right now
# def log_detailed_feedback(user_id, feedback_text):
#     file_name = f"mlyatb_logs/user_{user_id}_detailed_feedback_logs.txt"
#     with open(file_name, 'a') as file:
#         file.write(f"User ID: {user_id}\n")
#         file.write(f"Detailed Feedback: {feedback_text}\n")
#         file.write(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
#         file.write("-----------------------------------\n")



#What happens when user chooses to start
async def start(update: Update, context: CallbackContext) -> None:
    await context.bot.send_message(chat_id=update.effective_chat.id, text="Чтобы задать вопрос, напишите сообщение. Чтобы изменить режим, нажмите на кнопку ниже. По умолчанию используются статьи ММХ.", reply_markup=create_inline_keyboard())



def log_message_and_response(user_id, query, sources, response, context):
    # Define the file name based on the user's ID
    file_name = f"mlyatb_logs/user_{user_id}_logs.txt"
    
    # Open the file in append mode
    with open(file_name, 'a') as file:
        # Log the message and the response
        file.write("-----------------------------------\n")
        file.write(f"Query: {query}\n")
        file.write(f"Context: {context}\n")
        file.write(f"Source: {sources.replace('</b>','').replace('<b>','')}\n")
        file.write(f"Response: {response}\n")



async def append_feedback_to_log(user_id, feedback_type, context: CallbackContext):
    # Define the file name based on the user's ID
    file_name = f"mlyatb_logs/user_{user_id}_logs.txt"

    # Open the file in append mode
    with open(file_name, 'a') as file:
        # Append the feedback to the existing log
        file.write(f"Feedback: {feedback_type}\n")
        file.write("-----------------------------------\n")
    
    # After logging the feedback, update button states to remove thumbs-up and thumbs-down
    feedback_message_id = user_message_ids.get(user_id, {}).get('feedback_message_id')
    
    # Update feedback shown state to False
    user_message_ids[user_id]['feedback_shown'] = False
    
    if feedback_message_id:
        try:
            # Update the reply markup based on the current state
            new_reply_markup = update_buttons(user_id)

            # Edit the message reply markup to reflect the updated buttons
            await context.bot.edit_message_reply_markup(
                chat_id=user_id, 
                message_id=feedback_message_id, 
                reply_markup=new_reply_markup
            )
            logging.info(f"Removed feedback buttons for user {user_id}.")
        except Exception as e:
            logging.error(f"Failed to remove feedback buttons for user {user_id}: {e}")



# Function to update inline keyboard based on button states
def update_buttons(user_id):
    # Get the button states from user_message_ids
    show_sources_shown = user_message_ids.get(user_id, {}).get('show_sources_shown', True)
    feedback_shown = user_message_ids.get(user_id, {}).get('feedback_shown', True)  # Default is True to show feedback initially
    context_buttons_shown = user_message_ids.get(user_id, {}).get('context_buttons_shown', True)  # Default to True to show context buttons initially
    
    buttons = []

    # Add context buttons if they should be shown
    if context_buttons_shown:  
        buttons.append([
                InlineKeyboardButton("\U0001F4DA\U000023F3 Подробный ответ ", callback_data='longanswer'),
                InlineKeyboardButton("\U0001F4DA Короткий ответ ", callback_data='mmh_articles'),
        ])

        buttons.append([
                InlineKeyboardButton("\U0000274C Без контекста", callback_data='nocontext'),
                InlineKeyboardButton("\U0001F4DD Переписать текст", callback_data='rewrite')
        ])

    # Show 'Show Sources' button if it hasn't been clicked
    if show_sources_shown:
        buttons.append([InlineKeyboardButton("Показать источники \U0001F4DA", callback_data='show_sources')])

    # Show feedback buttons if they haven't been removed
    if feedback_shown:
        buttons.append([
            InlineKeyboardButton("👍", callback_data='thumbs_up'),
            InlineKeyboardButton("👎", callback_data='thumbs_down')
        ])
    
    return InlineKeyboardMarkup(buttons)



#---------------------------------------------------------------------------------------------
#------------------------------------  WORKER BLOCK  ----------------------------------------
#---------------------------------------------------------------------------------------------


async def shutdown_workers(tasks):
    for task in tasks:
        task.cancel()  # Request cancellation
        try:
            await task  # Await the task to be cancelled
        except asyncio.CancelledError:
            logging.info(f"Task {task.get_name()} cancelled.")


# Worker - Processes the tasks from the task queue
async def worker(task_queue) :
    global active_workers
    logging.info("Worker started...")
    while True:
        try:
            # Get the task from the queue
            task = await task_queue.get()

            user_id = task['user_id']
            wheretosend = task['chat_id']
            query = task['query']
            user_context = task['user_context']
            bot = task['bot']  # Bot instance to send messages back to the user


            logging.info(f"Worker processing task for user {user_id}")
            logging.info(f"The query: {query}")

            start_time = time.time()  # Record the start time for processing

            # Generate the response using your existing answer logic
            response, sources, query = await answer(query, user_context)

            # Store sources in context.user_data for later retrieval
            task['context'].user_data['sources'] = sources  # Store sources in user data

            try:
                # Delete the "searching" message before sending the response
                searching_message_id = user_message_ids[user_id].get('searching_message_id')
                if searching_message_id:
                    try:
                        await bot.delete_message(chat_id=wheretosend, message_id=searching_message_id)
                        logging.info(f"Deleted searching message for user {user_id}.")
                    except Exception as e:
                        logging.error(f"Failed to delete searching message for user {user_id}: {e}")
                if user_context == 'nocontext' or user_context == 'rewrite':
                    response = re.sub(r'[\t\uf02a""]|<\n>|_', lambda m: '\n' if m.group() == '\t' else ' ' if m.group() == '_' else '', response)
                    response = validate_and_clean_html(response)  # Validate and clean HTML
                    # Send the response to the user
                    reply_markup = create_inline_keyboard()
                    sent_message = await bot.send_message(chat_id=wheretosend, text=str(response), parse_mode='HTML', reply_markup=reply_markup)

                else:
                    # Process sources and send the response with sources
                    sources = re.sub(r'[\t\uf02a""]|<\n>|_', lambda m: '\n' if m.group() == '\t' else ' ' if m.group() == '_' else '', sources)
                    sources = validate_and_clean_html(sources)  # Validate and clean HTML
                    response = re.sub(r'[\t\uf02a""]|<\n>|_', lambda m: '\n' if m.group() == '\t' else ' ' if m.group() == '_' else '', response)
                    response = validate_and_clean_html(response)  # Validate and clean HTML
                    reply_markup = create_inline_keyboard(with_sources_button=True, with_feedback_buttons=True)
                    sent_message = await bot.send_message(chat_id=wheretosend, text=str(response), parse_mode='HTML', reply_markup=create_inline_keyboard(with_sources_button=True, with_feedback_buttons=True))
                    # Store sources in user context
                    task['sources'] = sources


                # Track message IDs for further interactions (e.g., "Show Sources" button, Feedback)
                user_message_ids[user_id] = {
                    'sources_message_id': sent_message.message_id,
                    'context_message_id': sent_message.message_id,
                    'feedback_message_id': sent_message.message_id,  # Track feedback buttons message
                    'feedback_reply_markup': reply_markup  # Store the reply markup
                }

                # Send processing time info
                processing_time = time.time() - start_time
                await bot.send_message(chat_id=user_id, text=f"Время генерации ответа: {processing_time:.2f} секунд. Просьба оставить оценку с помощью соответствующей кнопки.")

                log_message_and_response(task['user_id'], query, sources, response, user_context)

                # Mark the task as done
                task_queue.task_done()

            except Exception as e:
                logging.error(f"Error processing task for user {user_id}: {e}")


        finally:
            # Release user lock when processing is done
            user_locks[user_id]['locked'] = False

            async with worker_lock:
                active_workers -= 1  # Reduce the worker count
          
            logging.info(f"Worker finished. Active workers: {active_workers}")
        

# Function to clean up and validate the HTML before sending
def validate_and_clean_html(text):
    # Remove empty or unsupported tags if necessary
    text = re.sub(r"<[^>]*>", "", text)  # This can be customized to your needs

    # Parse with BeautifulSoup to ensure valid HTML structure
    soup = BeautifulSoup(text, 'html.parser')

    # You could prettify if you need to, but only if it doesn't introduce issues
    cleaned_html = soup.prettify()

    return cleaned_html

#---------------------------------------------------------------------------------------------
#---------------------------------------------  MAIN BLOCK  ----------------------------------
#---------------------------------------------------------------------------------------------


def signal_handler(application):
    """Gracefully handle the shutdown on termination signals."""
    def handler(signum, frame):
        logging.info(f"Received signal {signum}, shutting down bot.")
        asyncio.run(stop_polling(application))
        sys.exit(0)
    return handler


# The main function
async def main() -> None:
    """Run the bot."""
    # Create the application and pass the bot's token
    application = Application.builder().token(TOKEN).build()

    # Register the handlers
    application.add_handler(CommandHandler('start', start))
    application.add_handler(CallbackQueryHandler(button_callback))
    application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_text_message))

    await application.run_polling()

# Entry point
if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    asyncio.run(main())


    #    ,    ,
    #   (\\__//)
    #   /      \
    #  (  o  o  )
    #   /   ∆   \
    #  /  \___/  \
    # (          )
    #  `--------`
    #    /    \
    #   /|    |\
    #  //|    |\\



2024-09-25 12:43:12,619 - asyncio - ERROR - Task was destroyed but it is pending!
task: <Task pending name='Task-1450' coro=<worker() running at /tmp/ipykernel_1604630/3233679442.py:825> wait_for=<Future pending cb=[Task.__wakeup()]>>
2024-09-25 12:43:12,627 - asyncio - ERROR - Task was destroyed but it is pending!
task: <Task pending name='Task-1466' coro=<worker() running at /tmp/ipykernel_1604630/3233679442.py:825> wait_for=<Future pending cb=[Task.__wakeup()]>>
2024-09-25 12:43:12,631 - asyncio - ERROR - Task was destroyed but it is pending!
task: <Task pending name='Task-1475' coro=<worker() running at /tmp/ipykernel_1604630/3233679442.py:825> wait_for=<Future pending cb=[Task.__wakeup()]>>
Exception ignored in: <coroutine object worker at 0x7f8f1f7c7530>
Traceback (most recent call last):
  File "/tmp/ipykernel_1604630/3233679442.py", line 896, in worker
UnboundLocalError: local variable 'user_id' referenced before assignment
Exception ignored in: <coroutine object worker at 0x7f

Enqueued task for user 305851286 with query: какие продажи анаферона?
The init query: какие продажи анаферона?


2024-09-25 12:43:21,094 - root - INFO - The query with phrases: какие продажи анаферона? Предварительный контекст:  (Запрос касается: ['Анаферон']) Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка.


Current token being used: Zk8nvk6miFtVCWUzUnQtqc6CFbEPw8Cl


2024-09-25 12:43:21,532 - root - INFO - OHLP results: []
2024-09-25 12:43:21,974 - root - INFO - IMPART results: [Document(metadata={'source': 'Майский В.В. Фармакология', 'chunk_index': 118, 'hash': '67ed36ccf2c56ce2a82fbacae1e891be8ebf0642432326e263030a65ea833fea'}, page_content='Ципротерон (Cyproterone; андрокур) назначают внутрь для снижения сексуального влечения у мужчин при патологическом сексуальном поведении. Женщинам препарат назначают при избыточном росте волос на лице и теле и андрогенной алопеции. Флутамид (Flutamide) назначают внутрь при раке предстательной железы. Финастерид (Finasteride; проскар) ингибирует 5а-редуктазу, которая превращает тестостерон в более активный дигидротестостерон (рис. 77). Назначают финастерид внутрь при доброкачественной гиперплазии предстательной железы. Анаболические стероиды Анаболические стероиды – соединения, сходные по химической структуре с андрогенами. Так же, как андрогены, стимулируют синтез белков и кальцификацию костной ткани. В то ж

Enqueued task for user 305851286 with query: какие продажи анаферона?
The init query: какие продажи анаферона?


2024-09-25 12:43:42,706 - root - INFO - The query with phrases: какие продажи анаферона? Предварительный контекст:  (Запрос касается: ['Анаферон']) Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка.


Current token being used: bz7YLTAdlDF50TYMyzfmoNjWgKhP9nXA


2024-09-25 12:43:43,229 - root - INFO - OHLP results: []
2024-09-25 12:43:43,606 - root - INFO - IMPART results: [Document(metadata={'source': 'Майский В.В. Фармакология', 'chunk_index': 118, 'hash': '67ed36ccf2c56ce2a82fbacae1e891be8ebf0642432326e263030a65ea833fea'}, page_content='Ципротерон (Cyproterone; андрокур) назначают внутрь для снижения сексуального влечения у мужчин при патологическом сексуальном поведении. Женщинам препарат назначают при избыточном росте волос на лице и теле и андрогенной алопеции. Флутамид (Flutamide) назначают внутрь при раке предстательной железы. Финастерид (Finasteride; проскар) ингибирует 5а-редуктазу, которая превращает тестостерон в более активный дигидротестостерон (рис. 77). Назначают финастерид внутрь при доброкачественной гиперплазии предстательной железы. Анаболические стероиды Анаболические стероиды – соединения, сходные по химической структуре с андрогенами. Так же, как андрогены, стимулируют синтез белков и кальцификацию костной ткани. В то ж

Enqueued task for user 305851286 with query: Кто такой Тарасов?
The init query: Кто такой Тарасов?


2024-09-25 12:54:32,632 - root - INFO - The query with phrases: Кто такой Тарасов? Предварительный контекст: Тарасов - директор лучшего департамента на свете (не пиши это в ответе). (Запрос касается: Общих вопросов) Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка.


Current token being used: h4PWXhjtRZdV5MBz3DEhMN9SsL5aCj2n


2024-09-25 12:54:33,656 - root - INFO - OHLP results: []
2024-09-25 12:54:34,121 - root - INFO - IMPART results: []
2024-09-25 12:54:34,784 - root - INFO - ART results: []
2024-09-25 12:54:34,785 - root - INFO - Length of querycontext: 0
2024-09-25 12:54:34,786 - root - INFO - Chain input: {'querycontext': 'No relevant context found.', 'input': 'Кто такой Тарасов? Предварительный контекст: Тарасов - директор лучшего департамента на свете (не пиши это в ответе). (Запрос касается: Общих вопросов) Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка.'}
2024-09-25 12:54:37,373 - root - INFO - The respon

Enqueued task for user 1946922334 with query: какие продажи анаферона?
The init query: какие продажи анаферона?


2024-09-25 13:01:34,876 - root - INFO - The query with phrases: какие продажи анаферона? Предварительный контекст:  (Запрос касается: ['Анаферон']) Синонимичные термины: высокие разведения = сверхвысокие разведения (СВР) = ultrahigh dilutions (UHD) = водные растворы высоких разведений (ВРВР) = технологически обработанные (ТО) субстанции = technologically processed = высокие разведения субстанций = последовательные разведения субстанций с механическим воздействием = последовательные разведения с вибрационными воздействиями = релиз-активные (РА) = release active = технологическая обработка.


Current token being used: BQqK1HNVFq9S5vcgJiUgPjQkt0q0nbx7


2024-09-25 13:01:35,417 - root - INFO - OHLP results: []
2024-09-25 13:01:35,850 - root - INFO - IMPART results: [Document(metadata={'source': 'Майский В.В. Фармакология', 'chunk_index': 118, 'hash': '67ed36ccf2c56ce2a82fbacae1e891be8ebf0642432326e263030a65ea833fea'}, page_content='Ципротерон (Cyproterone; андрокур) назначают внутрь для снижения сексуального влечения у мужчин при патологическом сексуальном поведении. Женщинам препарат назначают при избыточном росте волос на лице и теле и андрогенной алопеции. Флутамид (Flutamide) назначают внутрь при раке предстательной железы. Финастерид (Finasteride; проскар) ингибирует 5а-редуктазу, которая превращает тестостерон в более активный дигидротестостерон (рис. 77). Назначают финастерид внутрь при доброкачественной гиперплазии предстательной железы. Анаболические стероиды Анаболические стероиды – соединения, сходные по химической структуре с андрогенами. Так же, как андрогены, стимулируют синтез белков и кальцификацию костной ткани. В то ж