In [16]:
#Importing the necessary libraries
from sentence_transformers import SentenceTransformer
import numpy as np
import json
import pandas as pd
from tqdm.notebook import tqdm



In [17]:
#Initialising the Sentence Transformer model for encoding the sentences
model = SentenceTransformer(
    "all-MiniLM-L6-v2", device="cuda"
)  # or device="cpu" if you don't have a GPU

df = pd.read_json("G2_Cleaned.json", lines=True)

vectors = model.encode(
    [row.name + ". " + row.domain + ". " + row.description for row in df.itertuples()],
    show_progress_bar=True,
)

vectors.shape

Batches: 100%|██████████| 498/498 [00:07<00:00, 66.95it/s] 


(15920, 384)

In [18]:
#Saving the vectors to a file
np.save("product_vectors.npy", vectors, allow_pickle=False)

In [19]:
# Import client library
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance


#Initialising the Qdrant client
client = QdrantClient("http://localhost:6333")

In [20]:
#Creating a collection in Qdrant
client.recreate_collection(
    collection_name="G2products",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

True

In [21]:
fd = open("G2_Cleaned.json")

# payload is now an iterator over startup data
payload = map(json.loads, fd)

# Load all vectors into memory, numpy array works as iterable for itself.
# Other option would be to use Mmap, if you don't want to load all data into RAM
vectors = np.load("product_vectors.npy")

In [22]:
# Upload vectors to Qdrant for Neural Search
client.upload_collection(
    collection_name="G2products",
    vectors=vectors,
    payload=payload,
    ids=None,  # Vector ids will be assigned automatically
    batch_size=256,  # How many vectors will be uploaded in a single request?
)