In [1]:
import warnings
warnings.filterwarnings("ignore")

In [27]:
import json
import pandas as pd
from tqdm import tqdm
from sentence_transformers import SentenceTransformer

In [13]:
from dotenv import load_dotenv
import psycopg2
from psycopg2.extras import execute_values

#### Подготовка данных

In [14]:
# Загружаем модель для эмбеддингов
model = SentenceTransformer('ai-forever/ru-en-RoSBERTa')

Some weights of RobertaModel were not initialized from the model checkpoint at ai-forever/ru-en-RoSBERTa and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
# Загружаем данные из файла
with open('./characteristics/time_series.json', 'r', encoding='utf-8') as f:
    ts_data = json.load(f)

In [None]:
# Подгружаем характеристики
ts_characteristics = ts_data['sets']

In [31]:
ts_characteristics_extended = []
for ts in tqdm(ts_characteristics):
    name = ts['name']
    methodologicas_desc = ts['description']['methodological']
    subject_desc = ts['description']['subject']
    emb_subject_desc = model.encode(subject_desc, convert_to_numpy=True, normalize_embeddings=True)

    ts_characteristics_extended.append({
        'name': name,
        'desc_subject': subject_desc,
        'desc_methodological': methodologicas_desc,
        'emb_subject_description': emb_subject_desc.tolist()
    })

100%|██████████| 7/7 [00:01<00:00,  3.50it/s]


#### Запрос и выполнение

In [20]:
# Подключение к БД
load_dotenv()
conn = psycopg2.connect()

In [30]:
query = """
        INSERT INTO time_series_cases (
            name, desc_subject, desc_methodological, emb
        ) VALUES %s
        """

In [32]:
# Подготовка структур к выводу
values_to_insert = [
    (
        case["name"],
        case["desc_subject"],
        case["desc_methodological"],
        case["emb_subject_description"]
    )
    for case in ts_characteristics_extended
]

In [34]:
with conn, conn.cursor() as cur:
    execute_values(cur, query, values_to_insert)

In [35]:
conn.close()