In [4]:
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient
from dotenv import load_dotenv
import os
from qdrant_client.models import Distance, VectorParams


load_dotenv()

openAI_key = os.getenv("OPENAI_KEY")
qdrant_key = os.getenv("QDRANT_KEY")
qdrant_url = "https://25c776da-e46f-4017-a9ae-57be7489d68e.us-east4-0.gcp.cloud.qdrant.io" 
qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_key)

In [20]:
# Nietszche texts.
manifesto = None

with open("../documents/kaczynski/manifesto.txt", "r", encoding="utf-8") as file:
    manifesto = file.read()  # This loads the entire text as a single string
    
collection_name = "kaczynski"

In [21]:
# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split texts into chunks
documents = []

chunks = text_splitter.create_documents([manifesto])
documents.extend(chunks)

# Initialize embeddings (replace with your OpenAI API key)
embeddings = OpenAIEmbeddings(openai_api_key=openAI_key)

In [None]:
# Generate and upload embeddings
for i, document in enumerate(documents):
    embedding = embeddings.embed_query(document.page_content)  # Generate embedding for the document
    qdrant_client.upsert(
        collection_name=collection_name,
        points=[{"id": i, "vector": embedding, "payload": {"text": document.page_content}}]
    )