In [13]:
# Set this to true this if you want to use cloudsql
# USE_CLOUDSQL = False
USE_CLOUDSQL = True

project_id = "imrenagi-gemini-experiment" #change this to your project id
region = "us-central1"
gemini_embedding_model = "text-embedding-004"

if not USE_CLOUDSQL:
    # use pgvector docker image for local development
    database_password = "pyconapac"
    database_name = "pyconapac"
    database_user = "pyconapac"
    database_host = "localhost"
else:
    # use cloudsql credential if you want to use cloudsql
    instance_name="pyconapac-demo"
    database_password = 'testing'
    database_name = 'testing'
    database_user = 'testing'

assert database_name, "⚠️ Please provide a database name"
assert database_user, "⚠️ Please provide a database user"
assert database_password, "⚠️ Please provide a database password"

embeddings_table_name = "course_content_embeddings"


In [14]:
if USE_CLOUDSQL:
    # get the ip address of the cloudsql instance
    ip_addresses = !gcloud sql instances describe {instance_name} --format="value(ipAddresses[0].ipAddress)"
    database_host = ip_addresses[0]

In [15]:
db_conn_string = f"postgres://{database_user}:{database_password}@{database_host}:5432/{database_name}"
db_conn_string

'postgres://testing:testing@35.232.5.157:5432/testing'

In [16]:
import vertexai
vertexai.init(project=project_id, location=region)

from langchain_google_vertexai import VertexAIEmbeddings
embeddings_service = VertexAIEmbeddings(model_name=gemini_embedding_model)

In [17]:
from langchain_google_cloud_sql_pg import PostgresLoader, PostgresEngine, Column

pg_engine = PostgresEngine.from_instance(
    project_id=project_id,
    instance=instance_name,
    region=region,
    database=database_name,
    user=database_password,
    password=database_password,
)

In [18]:
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_cloud_sql_pg import PostgresVectorStore

vector_store = PostgresVectorStore.create_sync(
            pg_engine,
            table_name=embeddings_table_name,
            embedding_service=embeddings_service,
        )
retriever = vector_store.as_retriever(search_kwargs={"k": 10})

In [19]:
retriever.invoke("how to design forgot password?")

[Document(metadata={'course_content_id': 4, 'title': 'Password Storage Cheat Sheet'}, page_content='Like [Argon2id](#argon2id), scrypt has three different parameters that can be configured: the minimum CPU/memory cost parameter (N), the blocksize (r) and the degree of parallelism (p). Use one of the following settings:\n\n- N=2^17 (128 MiB), r=8 (1024 bytes), p=1\n- N=2^16 (64 MiB), r=8 (1024 bytes), p=2\n- N=2^15 (32 MiB), r=8 (1024 bytes), p=3\n- N=2^14 (16 MiB), r=8 (1024 bytes), p=5\n- N=2^13 (8 MiB), r=8 (1024 bytes), p=10\n\nThese configuration settings provide an equal level of defense. The only difference is a trade off between CPU and RAM usage.\n\n### bcrypt\n\nThe [bcrypt](https://en.wikipedia.org/wiki/bcrypt) password hashing function should be the best choice for password storage in legacy systems or if PBKDF2 is required to achieve FIPS-140 compliance.\n\nThe work factor should be as large as verification server performance will allow, with a minimum of 10.\n\n#### Input 