# Connect

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams

client = QdrantClient("localhost", port=6333)

if not client.collection_exists("articles"):
    client.create_collection(
        collection_name="articles",
        vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    )

# Add data

In [None]:
import requests


def get_response(messages):
    request = {
        "model": "qwen2.5:3b",
        "messages": messages,
        "stream": False,
        "options": {
            "temperature": 1.0,
        },
    }

    response = requests.post("http://localhost:11434/api/chat", json=request)
    if response.status_code != 200:
        raise Exception(response.text)
    return response.json()


In [None]:
import pandas as pd

df = pd.read_csv("articles.csv")
df[["link", "title", "text"]]

In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import tqdm

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

df = pd.read_csv("/Users/senya/projects/history-project/test/articles.csv")
articles = df[["link", "title", "text"]].to_dict(orient="records")

for i, article in tqdm.tqdm(enumerate(articles[:20])):
# for i, article in tqdm.tqdm(enumerate([{
#     "title": "List of mass stabbings in the United States",
#     "text": ''' This is an incomplete list of mass stabbings in the United States. A mass stabbing is a single incident in which multiple victims are harmed or killed in a knife-enabled crime. In such attacks, sharp objects are thrust at the victim, piercing through the skin and harming the victim.[1] Examples of sharp instruments used in mass stabbings may include kitchen knives, utility knives, sheath knives, scissors, Katanas, hammers, screwdrivers, icepicks, bayonets, axes, machetes and glass bottles.[1] Knife crime poses security threats to many countries around the world.[1]
#
# A mass stabbing can be defined from a number of different perspectives. The Oxford English Dictionary defines the verb ‘stab’ as an action that propels a pointed weapon with the intention of harm or murder.[2] A mass stabbing is an incident involving the use of pointed weapons to wound or kill multiple people. Mass stabbings can also be looked at from the scope of knife crime. Based on a publication by the Parliament of the United Kingdom, ‘knife-enabled crime’ is an incident where harm is threatened or caused with the use of bladed weapons.[3] The media also refers to ‘knife crime’ as a stabbing incident or the illegal possession of knives by a person in the public.[3]
#
# From a legal perspective, the phrase mass killing can be used to define a mass stabbing. Based on section 2 of the Investigative Assistance for Violent Crimes Act of 2012 of the United States of America, which was signed into law and published by the US Congress on 13 January 2013, ‘mass killing’ is an individual occasion with three or more people murdered.[4] Mass stabbings can also be looked at from the perspective of mass murder. The Federal Bureau of Investigation (FBI) of the United States of America has defined mass murder as an incident where four or more people are killed in a single incident on a continuing basis without any significant time period in between each of the murders.[5]
#
# High-profile crimes in the United States in which multiple people were stabbed are listed here. According to a database published by USA Today, as of March 5, 2024, 44 mass murders in the United States since 2006 involved a knife, and 44 involved another sharp object.[6] ''',
#     "link": "https://en.wikipedia.org/wiki/List_of_mass_stabbings_in_the_United_States"
# }])):
    # Создаём embedding на основе заголовка + текста статьи
    embedding = model.encode(f"{article['title']} {article['text']}").tolist()

    # Сохраняем в Qdrant
    summary = get_response([
        {
            "role": "system",
            "content": "summarize text. maximum 250 words. write only summary.",
        },
        {
            "role": "user",
            "content": article["text"],
        },
    ])["message"]["content"]
    client.upsert(
        collection_name="articles",
        points=[
            {
                "id": i,
                "vector": embedding,
                "payload": {
                    "id": i,
                    "title": article["title"],
                    "text": article["text"],
                    "summarized_text": summary,
                    "url": article["link"],
                },
            }
        ],
    )


# Search

In [None]:
query = "я хочу узнать про гаити. расскажи, пожалуйста"
query_vector = model.encode(query).tolist()

search_results = client.search(
    collection_name="articles",
    query_vector=query_vector,
)

for result in search_results:
    print(result.score)
    print(f"🔹 {result.payload['title']}")
    print(f"🔗 {result.payload['url']}")
    print(f"📄 {result.payload['text'][:200]}...\n")