## ChromaDB connection and load data 

In [1]:
from IPython.display import display, clear_output

In [2]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '../../'))
sys.path.append(project_root)

In [3]:
import chromadb
import pandas as pd
import json
from copy import deepcopy
from rag.core.utils.config_loader import ConfigLoader

In [4]:
class ChromaSearcher:
    """
    A class to create ChromaDB collections from given specifics and to search in these collections.
    """

    def __init__(self):
        """
        Initialization of ChromaDB client. Requires host, port, and embedding model from config.
        """
        config_loader = ConfigLoader()
        config = config_loader.load_config()
        self.host = config_loader.get_host()
        self.port = config_loader.get_port()
        self.embedding_model = config_loader.get_embedding_model()
        self.embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
            model_name=self.embedding_model)
        self.chroma_client = chromadb.HttpClient(host=self.host, port=self.port)

    def list_collections(self):
        """
        A method to list all collections in ChromaDB
        Returns:
            List of collection names
        """
        return self.chroma_client.list_collections()

    def add_specific(self, file_name, sheet_name, collection_name):
        """
        A method for adding specific to ChromaDB collection
        Arguments:
        - file_name: str - A spreadsheet file with specific data.
        - sheet_name: str - Name of a sheet from spreadsheet file.
        - collection_name: str - Name of ChromaDB collection to be created. Must be Latin letters and numbers only.
        """
        spec_data = pd.read_excel(file_name, sheet_name=sheet_name, header=[0,1,2,3])
        spec_data = spec_data.fillna("")
        res = {}
        res['description_full'] = []
        for i in spec_data.columns:
            ii = list(i)
            res['description_full'].append({})
            res['description_full'][-1]['column_title'] = ''
            for j in range(1, len(ii)):
                if not (ii[j].startswith("Unnamed")):
                    res['description_full'][-1]['column_title'] += ii[j]
                    res['description_full'][-1]['column_title'] += '__'
            res['description_full'][-1]['column_title'] = res['description_full'][-1]['column_title'][:-2]
            res['description_full'][-1]['column_text'] = " ".join(spec_data[i].tolist()).strip()
        
        ids = []
        documents = []
        metadatas = []
        for j in range(len(res['description_full'])):
            ids.append("id" + str(j))
            documents.append(res['description_full'][j]['column_text'])
            md = {}
            md['column_title'] = res['description_full'][j]['column_title']
            metadatas.append(deepcopy(md))
        
        # Создаем коллекцию с метаданными о происхождении
        collection_metadata = {
            "source_file": file_name,
            "sheet_name": sheet_name,
            "conf_title": spec_data.columns[0][0],
            "conf_title_short": sheet_name
        }
        
        # Создаем новую коллекцию или получаем существующую
        try:
            # Пробуем получить существующую коллекцию
            collection = self.chroma_client.get_collection(
                name=collection_name, 
                embedding_function=self.embedding_function
            )
            print(f"Collection '{collection_name}' already exists. Adding new documents.")
        except Exception:
            # Если коллекция не существует, создаем новую
            collection = self.chroma_client.create_collection(
                name=collection_name,
                metadata=collection_metadata,
                embedding_function=self.embedding_function
            )
            print(f"Created new collection '{collection_name}'.")
        
        # Добавляем документы в коллекцию
        collection.add(
            metadatas=metadatas,
            documents=documents,
            ids=ids
        )
        print(f"Added {len(documents)} documents to collection '{collection_name}'.")
        
        return collection_name
    
    def get_collection_info(self, collection_name):
        """
        A method to get information about a collection
        Arguments:
        - collection_name: str - A name of ChromaDB collection
        Returns:
            Dictionary with collection information
        """
        try:
            collection = self.chroma_client.get_collection(
                name=collection_name, 
                embedding_function=self.embedding_function
            )
            # Получаем метаданные коллекции
            metadata = collection.metadata
            # Получаем количество документов
            count = collection.count()
            
            return {
                "name": collection_name,
                "document_count": count,
                "metadata": metadata
            }
        except Exception as e:
            print(f"Error getting collection info: {e}")
            return None

    def get_specifics(self, collection_name):
        """
        A method to get collection contents by name
        Arguments:
        - collection_name: str - A name of ChromaDB collection (must be an existing collection)
        """
        collection = self.chroma_client.get_collection(name=collection_name, embedding_function=self.embedding_function)
        return collection.get(include=["documents", "embeddings", "metadatas"])
    
    def find_similar_records(self, collection_name, texts: list, n_results=5):
        """
        A method for searching parts of specifics that could be similar to a given text
        Arguments:
        - collection_name: str - A name of a collection where parts should be searched
        - texts: list - A lists of texts (str) to be found (compared)
        - n_results: int - Number of search results to return (by default 5)
        """
        collection = self.chroma_client.get_collection(name=collection_name, embedding_function=self.embedding_function)
        return collection.query(query_texts=texts, n_results=n_results, include=["documents", "metadatas"])
    
    def delete_collection(self, collection_name):
        """
        A method to delete a collection
        Arguments:
        - collection_name: str - A name of ChromaDB collection to delete
        """
        try:
            self.chroma_client.delete_collection(name=collection_name)
            print(f"Collection '{collection_name}' deleted successfully.")
            return True
        except Exception as e:
            print(f"Error deleting collection: {e}")
            return False

load data

In [5]:
specifics_path = "D:/GitHub/Edulytica/src/rag/data/Specification.xlsx"


In [6]:
cs = ChromaSearcher()
cs.add_specific(specifics_path, "ППС", "PPS")


[32m2025-04-18 21:55:53.482[0m | [1mINFO    [0m | [36mrag.core.utils.config_loader[0m:[36mload_config[0m:[36m47[0m - [1mConfiguration loaded successfully: {'count_keywords': 10, 'host': 'localhost', 'port': 8080, 'collections_file': 'all_collections.json', 'embedding_model': 'BAAI/bge-m3'}[0m


Created new collection 'PPS'.
Added 8 documents to collection 'PPS'.


'PPS'

In [7]:
cl = cs.get_specifics("PPS")
print(cl)

{'ids': ['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7'], 'embeddings': array([[-0.05616511, -0.01541862, -0.02970634, ...,  0.016752  ,
         0.00894882, -0.00475067],
       [-0.02731149,  0.00175687, -0.03545567, ...,  0.01454608,
        -0.05446029, -0.02225705],
       [-0.0324773 ,  0.01229401, -0.01113146, ..., -0.01004309,
        -0.05448178, -0.01861655],
       ...,
       [-0.03238031,  0.01179582, -0.0283231 , ..., -0.01092045,
        -0.01374846,  0.01287064],
       [-0.05800085,  0.01502216, -0.04128072, ..., -0.00755922,
        -0.03570972, -0.00576454],
       [-0.01509039,  0.00757372,  0.01312466, ..., -0.02653653,
        -0.02884256, -0.03378146]]), 'metadatas': [{'column_title': 'О конференции'}, {'column_title': 'Программа конференции'}, {'column_title': 'Секции__Секция 1'}, {'column_title': 'Секции__Секция 2'}, {'column_title': 'Секции__Секция 3'}, {'column_title': 'Секции__Секция 4'}, {'column_title': 'Требования__Требования к статье'}, {'column_

In [8]:
cqq = cs.find_similar_records("PPS", ["Статья должна быть оформлена"])

### Refactoring

In [2]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '../../'))
sys.path.append(project_root)

In [3]:
import chromadb
import pandas as pd
from copy import deepcopy
import os
from typing import List, Dict, Any, Optional, Union, Tuple
from loguru import logger
from rag.core.utils.config_loader import ConfigLoader


In [4]:
class EmbeddingProcessor:
    """
    Класс для создания и обработки эмбеддингов из текстовых данных
    """
    def __init__(self, embedding_model: Optional[str] = None):
        """
        Инициализация с выбором модели эмбеддингов
        
        Args:
            embedding_model: Название модели для создания эмбеддингов (опционально)
                            Если не указана, будет использована модель из конфигурации
        """
        config_loader = ConfigLoader()
        self.embedding_model = embedding_model or config_loader.get_embedding_model()
        logger.info(f"Initializing EmbeddingProcessor with model: {self.embedding_model}")
        
        try:
            self.embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
                model_name=self.embedding_model)
            logger.info(f"Successfully initialized embedding function with model {self.embedding_model}")
        except Exception as e:
            logger.error(f"Failed to initialize embedding function: {e}")
            raise
    
    def get_embedding_function(self):
        """
        Получить функцию для создания эмбеддингов
        
        Returns:
            Функция для создания эмбеддингов
        """
        return self.embedding_function
    
    def process_excel_data(self, file_name: str, sheet_name: str) -> Dict[str, Any]:
        """
        Обработка Excel-файла и подготовка данных для эмбеддингов
        
        Args:
            file_name: Путь к Excel-файлу
            sheet_name: Имя листа в Excel-файле
            
        Returns:
            Словарь со структурированными данными для сохранения в ChromaDB
        """
        logger.info(f"Processing Excel data from file: {file_name}, sheet: {sheet_name}")
        
        try:
            # Чтение данных из Excel
            spec_data = pd.read_excel(file_name, sheet_name=sheet_name, header=[0,1,2,3])
            spec_data = spec_data.fillna("")
            logger.info(f"Successfully read Excel data with shape: {spec_data.shape}")
            
            # Обработка данных
            res = {}
            res['description_full'] = []
            
            # Обрабатываем заголовки столбцов
            for i in spec_data.columns:
                ii = list(i)
                res['description_full'].append({})
                res['description_full'][-1]['column_title'] = ''
                
                for j in range(1, len(ii)):
                    if not (ii[j].startswith("Unnamed")):
                        res['description_full'][-1]['column_title'] += ii[j]
                        res['description_full'][-1]['column_title'] += '__'
                
                res['description_full'][-1]['column_title'] = res['description_full'][-1]['column_title'][:-2]
                res['description_full'][-1]['column_text'] = " ".join(spec_data[i].tolist()).strip()
            
            # Готовим данные для ChromaDB
            ids = []
            documents = []
            metadatas = []
            
            for j in range(len(res['description_full'])):
                ids.append(f"id{j}")
                documents.append(res['description_full'][j]['column_text'])
                md = {
                    'column_title': res['description_full'][j]['column_title']
                }
                metadatas.append(deepcopy(md))
            
            # Создаем метаданные коллекции
            collection_metadata = {
                "source_file": file_name,
                "sheet_name": sheet_name,
                "conf_title": spec_data.columns[0][0],
                "conf_title_short": sheet_name
            }
            
            logger.info(f"Processed {len(documents)} documents from Excel data")
            
            return {
                "ids": ids,
                "documents": documents,
                "metadatas": metadatas,
                "collection_metadata": collection_metadata
            }
            
        except Exception as e:
            logger.error(f"Error processing Excel data: {e}")
            raise


In [5]:

class ChromaDBManager:
    """
    Класс для управления коллекциями в ChromaDB
    """
    def __init__(self, embedding_function=None):
        """
        Инициализация менеджера ChromaDB и установление соединения
        
        Args:
            embedding_function: функция для создания эмбеддингов (опционально)
                              Если не указана, будет использована модель из конфигурации
        """
        config_loader = ConfigLoader()
        config = config_loader.load_config()
        self.host = config_loader.get_host()
        self.port = config_loader.get_port()
        
        logger.info(f"Initializing ChromaDBManager with host: {self.host}, port: {self.port}")
        
        # Устанавливаем соединение с ChromaDB
        try:
            self.chroma_client = chromadb.HttpClient(host=self.host, port=self.port)
            logger.info("Successfully connected to ChromaDB")
        except Exception as e:
            logger.error(f"Failed to connect to ChromaDB: {e}")
            raise
        
        # Используем переданную функцию эмбеддингов или создаем новую из конфигурации
        if embedding_function:
            self.embedding_function = embedding_function
            logger.info("Using provided embedding function")
        else:
            try:
                embedding_model = config_loader.get_embedding_model()
                self.embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
                    model_name=embedding_model)
                logger.info(f"Created embedding function with model: {embedding_model}")
            except Exception as e:
                logger.error(f"Failed to create embedding function: {e}")
                raise
    
    def list_collections(self) -> List[Dict[str, Any]]:
        """
        Получить список всех коллекций в ChromaDB
        
        Returns:
            Список коллекций с их метаданными
        """
        try:
            collections = self.chroma_client.list_collections()
            logger.info(f"Listed {len(collections)} collections in ChromaDB")
            return collections
        except Exception as e:
            logger.error(f"Error listing collections: {e}")
            return []
    
    def create_collection(self, name: str, metadata: Optional[Dict[str, Any]] = None) -> Any:
        """
        Создать новую коллекцию
        
        Args:
            name: Имя коллекции
            metadata: Метаданные коллекции (опционально)
            
        Returns:
            Объект коллекции
        """
        try:
            # Пробуем получить существующую коллекцию
            collection = self.chroma_client.get_collection(
                name=name, 
                embedding_function=self.embedding_function
            )
            logger.info(f"Collection '{name}' already exists.")
        except Exception:
            # Если коллекция не существует, создаем новую
            collection = self.chroma_client.create_collection(
                name=name,
                metadata=metadata,
                embedding_function=self.embedding_function
            )
            logger.info(f"Created new collection '{name}' with metadata: {metadata}")
        
        return collection
    
    def get_collection(self, name: str) -> Any:
        """
        Получить коллекцию по имени
        
        Args:
            name: Имя коллекции
            
        Returns:
            Объект коллекции или None, если коллекция не найдена
        """
        try:
            collection = self.chroma_client.get_collection(
                name=name, 
                embedding_function=self.embedding_function
            )
            logger.info(f"Retrieved collection '{name}'")
            return collection
        except Exception as e:
            logger.error(f"Error getting collection '{name}': {e}")
            return None
    
   
    
    def add_documents(self, 
                     collection_name: str, 
                     documents: List[str], 
                     metadatas: Optional[List[Dict[str, Any]]] = None, 
                     ids: Optional[List[str]] = None) -> bool:
        """
        Добавить документы в коллекцию
        
        Args:
            collection_name: Имя коллекции
            documents: Список текстов документов
            metadatas: Список метаданных для каждого документа (опционально)
            ids: Список идентификаторов для каждого документа (опционально)
            
        Returns:
            True, если документы успешно добавлены, иначе False
        """
        try:
            collection = self.get_collection(collection_name)
            if not collection:
                logger.error(f"Collection '{collection_name}' not found")
                return False
            
            # Генерируем идентификаторы, если они не предоставлены
            if not ids:
                ids = [f"id{i}" for i in range(len(documents))]
            
            # Добавляем документы в коллекцию
            collection.add(
                documents=documents,
                metadatas=metadatas,
                ids=ids
            )
            
            logger.info(f"Added {len(documents)} documents to collection '{collection_name}'")
            return True
        except Exception as e:
            logger.error(f"Error adding documents to collection '{collection_name}': {e}")
            return False
    

    
    def get_collection_contents(self, 
                              collection_name: str, 
                              include: Optional[List[str]] = None) -> Dict[str, Any]:
        """
        Получить все содержимое коллекции
        
        Args:
            collection_name: Имя коллекции
            include: Что включать в результаты ("documents", "metadatas", "embeddings")
            
        Returns:
            Содержимое коллекции или пустой словарь в случае ошибки
        """
        try:
            collection = self.get_collection(collection_name)
            if not collection:
                logger.error(f"Collection '{collection_name}' not found")
                return {}
            
            # Устанавливаем, что включать в результаты по умолчанию
            if include is None:
                include = ["documents", "metadatas"]
            
            # Получаем все содержимое коллекции
            contents = collection.get(include=include)
            
            logger.info(f"Got contents for collection '{collection_name}': {len(contents.get('ids', []))} documents")
            return contents
        except Exception as e:
            logger.error(f"Error getting collection contents for '{collection_name}': {e}")
            return {}
    

    
    def add_from_excel(self, file_name: str, sheet_name: str, collection_name: str) -> bool:
        """
        Добавить данные из Excel-файла в коллекцию ChromaDB
        
        Args:
            file_name: Путь к Excel-файлу
            sheet_name: Имя листа в Excel-файле
            collection_name: Имя коллекции для создания/обновления
            
        Returns:
            True, если данные успешно добавлены, иначе False
        """
        try:
            # Создаем процессор эмбеддингов для обработки данных
            processor = EmbeddingProcessor()
            
            # Обрабатываем данные из Excel
            data = processor.process_excel_data(file_name, sheet_name)
            
            # Создаем или получаем коллекцию
            self.create_collection(
                name=collection_name, 
                metadata=data["collection_metadata"]
            )
            
            # Добавляем документы в коллекцию
            success = self.add_documents(
                collection_name=collection_name,
                documents=data["documents"],
                metadatas=data["metadatas"],
                ids=data["ids"]
            )
            
            if success:
                logger.info(f"Successfully added data from {file_name} ({sheet_name}) to collection '{collection_name}'")
            else:
                logger.error(f"Failed to add data from {file_name} to collection '{collection_name}'")
            
            return success
        except Exception as e:
            logger.error(f"Error adding data from Excel to collection: {e}")
            return False
        
        
    def delete_all_collections(self) -> Dict[str, Any]:
        """
        Удалить все коллекции в ChromaDB
        
        Returns:
            Словарь с информацией о результатах удаления:
            {
                "success": bool - общий успех операции,
                "total": int - общее количество коллекций,
                "deleted": int - количество успешно удаленных коллекций,
                "failed": int - количество коллекций, которые не удалось удалить,
                "failed_collections": List[str] - имена коллекций, которые не удалось удалить
            }
        """
        try:
            # Получаем список всех коллекций
            collections = self.list_collections()
            
            if not collections:
                logger.info("No collections found to delete")
                return {
                    "success": True,
                    "total": 0,
                    "deleted": 0,
                    "failed": 0,
                    "failed_collections": []
                }
            
            # Счетчики для отслеживания прогресса
            deleted_count = 0
            failed_count = 0
            failed_collections = []
            
            # Удаляем каждую коллекцию
            for collection in collections:
                collection_name = collection.name
                try:
                    self.chroma_client.delete_collection(name=collection_name)
                    deleted_count += 1
                    logger.info(f"Deleted collection '{collection_name}'")
                except Exception as e:
                    failed_count += 1
                    failed_collections.append(collection_name)
                    logger.error(f"Failed to delete collection '{collection_name}': {e}")
            
            # Определяем общий успех операции
            success = failed_count == 0
            
            # Результат операции
            result = {
                "success": success,
                "total": len(collections),
                "deleted": deleted_count,
                "failed": failed_count,
                "failed_collections": failed_collections
            }
            
            logger.info(f"Delete all collections operation completed: {deleted_count} deleted, {failed_count} failed")
            return result
            
        except Exception as e:
            logger.error(f"Error in delete_all_collections: {e}")
            return {
                "success": False,
                "total": 0,
                "deleted": 0,
                "failed": 0,
                "failed_collections": [],
                "error": str(e)
            }

In [12]:
# Создаем экземпляр ChromaDBManager
chroma_manager = ChromaDBManager()

# Получаем список всех коллекций
collections = chroma_manager.list_collections()

# Выводим только имена коллекций
collection_names = [collection.name for collection in collections]
print(f"Найдено {len(collection_names)} коллекций:")
print(", ".join(collection_names))

[32m2025-04-18 22:42:28.004[0m | [1mINFO    [0m | [36mrag.core.utils.config_loader[0m:[36mload_config[0m:[36m47[0m - [1mConfiguration loaded successfully: {'count_keywords': 10, 'host': 'localhost', 'port': 8080, 'collections_file': 'all_collections.json', 'embedding_model': 'BAAI/bge-m3'}[0m
[32m2025-04-18 22:42:28.006[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m18[0m - [1mInitializing ChromaDBManager with host: localhost, port: 8080[0m
[32m2025-04-18 22:42:28.239[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m23[0m - [1mSuccessfully connected to ChromaDB[0m
[32m2025-04-18 22:42:28.239[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m37[0m - [1mCreated embedding function with model: BAAI/bge-m3[0m
[32m2025-04-18 22:42:28.241[0m | [1mINFO    [0m | [36m__main__[0m:[36mlist_collections[0m:[36m51[0m - [1mListed 5 collections in ChromaDB[0m


Найдено 5 коллекций:
KMU, FRUCT, EPI, YSC, PPS


In [10]:
# Удаляем все коллекции
result = chroma_manager.delete_all_collections()

[32m2025-04-18 22:19:53.492[0m | [1mINFO    [0m | [36m__main__[0m:[36mlist_collections[0m:[36m51[0m - [1mListed 1 collections in ChromaDB[0m
[32m2025-04-18 22:19:53.507[0m | [1mINFO    [0m | [36m__main__[0m:[36mdelete_all_collections[0m:[36m268[0m - [1mDeleted collection 'PPS'[0m
[32m2025-04-18 22:19:53.508[0m | [1mINFO    [0m | [36m__main__[0m:[36mdelete_all_collections[0m:[36m286[0m - [1mDelete all collections operation completed: 1 deleted, 0 failed[0m


In [11]:
# Добавляем данные из Excel в коллекцию
specifics_path = "D:/GitHub/Edulytica/src/rag/data/Specification.xlsx"
chroma_manager.add_from_excel(specifics_path, "ППС", "PPS")
chroma_manager.add_from_excel(specifics_path, "КМУ", "KMU")
chroma_manager.add_from_excel(specifics_path, "ЭПИ", "EPI")
chroma_manager.add_from_excel(specifics_path, "YSC", "YSC")
chroma_manager.add_from_excel(specifics_path, "FRUCT", "FRUCT")









[32m2025-04-18 22:22:17.948[0m | [1mINFO    [0m | [36mrag.core.utils.config_loader[0m:[36mload_config[0m:[36m47[0m - [1mConfiguration loaded successfully: {'count_keywords': 10, 'host': 'localhost', 'port': 8080, 'collections_file': 'all_collections.json', 'embedding_model': 'BAAI/bge-m3'}[0m
[32m2025-04-18 22:22:17.948[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m15[0m - [1mInitializing EmbeddingProcessor with model: BAAI/bge-m3[0m
[32m2025-04-18 22:22:17.948[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m20[0m - [1mSuccessfully initialized embedding function with model BAAI/bge-m3[0m
[32m2025-04-18 22:22:17.949[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_excel_data[0m:[36m45[0m - [1mProcessing Excel data from file: D:/GitHub/Edulytica/src/rag/data/Specification.xlsx, sheet: ППС[0m
[32m2025-04-18 22:22:17.963[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_excel_data[0m:[36m51[0m - [1mSuccessful

True

### Get data

In [15]:
# Получаем все содержимое коллекции "PPS"
pps_data = chroma_manager.get_collection_contents(
    collection_name="PPS",
    include=["documents", "embeddings", "metadatas"]
)
pps_data


[32m2025-04-18 23:02:36.004[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_collection[0m:[36m101[0m - [1mRetrieved collection 'PPS'[0m
[32m2025-04-18 23:02:36.057[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_collection_contents[0m:[36m177[0m - [1mGot contents for collection 'PPS': 8 documents[0m


{'ids': ['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7'],
 'embeddings': array([[-0.05616511, -0.01541862, -0.02970634, ...,  0.016752  ,
          0.00894882, -0.00475067],
        [-0.02731149,  0.00175687, -0.03545567, ...,  0.01454608,
         -0.05446029, -0.02225705],
        [-0.0324773 ,  0.01229401, -0.01113146, ..., -0.01004309,
         -0.05448178, -0.01861655],
        ...,
        [-0.03238031,  0.01179582, -0.0283231 , ..., -0.01092045,
         -0.01374846,  0.01287064],
        [-0.05800085,  0.01502216, -0.04128072, ..., -0.00755922,
         -0.03570972, -0.00576454],
        [-0.01509039,  0.00757372,  0.01312466, ..., -0.02653653,
         -0.02884256, -0.03378146]]),
 'metadatas': [{'column_title': 'О конференции'},
  {'column_title': 'Программа конференции'},
  {'column_title': 'Секции__Секция 1'},
  {'column_title': 'Секции__Секция 2'},
  {'column_title': 'Секции__Секция 3'},
  {'column_title': 'Секции__Секция 4'},
  {'column_title': 'Требования__Требов