In [1]:
import setup

setup.init_django()

In [2]:
from decouple import config
from blog.models import BlogPost
from blog import services

In [3]:
# qs = BlogPost.objects.filter(can_delete=True)
# qs

In [4]:
# !pip install llama-index sqlalchemy llama-index-vector-stores-postgres

In [5]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

In [6]:
LLM_MODEL = config("LLM_MODEL", default="gpt-4o") # not in use use
EMEDDING_LENGTH = config("EMEDDING_LENGTH", default=1536, cast=int)
EMEDDING_MODEL =config("EMEDDING_MODEL", default="text-embedding-3-small")
OPENAI_API_KEY = config("OPENAI_API_KEY")

llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY)
embed_model = OpenAIEmbedding(model=EMEDDING_MODEL, api_key=OPENAI_API_KEY)

In [7]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [8]:
vector_db_name = "vector_db"
vector_db_table_name = "blogpost" # -> data_blogpost

In [9]:
DATABASE_URL = config("DATABASE_URL_POOL")
if DATABASE_URL.startswith("postgres://"):
    DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)

In [10]:
# create a new database
from sqlalchemy import create_engine, text

engine = create_engine(DATABASE_URL, isolation_level="AUTOCOMMIT")
with engine.connect() as connection:
    result = connection.execute(text("SELECT 1 FROM pg_database WHERE datname = :db_name"), {"db_name": vector_db_name})
    db_exists = result.scalar() == 1
    if not db_exists:
        session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
        connection.execute(text(f"CREATE DATABASE {vector_db_name}"))

In [11]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

url = make_url(DATABASE_URL)
vector_store = PGVectorStore.from_params(
    database=vector_db_name,
    host=url.host,
    password=url.password,
    port=url.port or 5432,
    user=url.username,
    table_name=vector_db_table_name,
    embed_dim=EMEDDING_LENGTH,
)

In [12]:
from llama_index.core import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)
query_engine = index.as_query_engine()

In [13]:
from llama_index.core import Document

docs = []
qs = BlogPost.objects.filter(can_delete=True)
for obj in qs:
    docs.append(
        Document(
            text=f"{obj.get_embedding_text_raw()}",
            doc_id=str(obj.id),
            metadata = {
                "pk": obj.pk,
                "title": obj.title
            }
        )
    )

docs

[Document(id_='100', embedding=None, metadata={'pk': 100, 'title': '“I’ll go with my principles tomorrow”'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='In the short run, it’s easy to abandon what we believe. Deep down, we assume that once things go back to normal, so will we.Organizations end up with bullies, predators and bad actors for only one reason: In this moment, it’s easier to keep them. There’s some sort of urgency that makes asking them to leave too difficult right now, so we put it off for a little while. When we make a “just this once” exception, we’ve already made a decision about what’s truly important.And the same goes for those moments when we’re inclined to be, just for a moment, a bully, a predator or a bad actor as well. Few people decide to be selfish for the long haul.What makes it a principle is that we do it now, even though (especially though) it’s hard.', mimetype='text/plain', start_char_idx=None, end_char_idx=None,

In [14]:
for doc in docs:
    index.delete_ref_doc(f"{doc.id_}", delete_from_docstore=True)
    index.insert(doc)

In [15]:
response = query_engine.query("The dog jumped")

In [16]:
for k in response.metadata.keys():
    for subk, v in response.metadata[k].items():
        print(subk, v)

pk 35
title Blog Post 2
pk 34
title Blog Post 1
