<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/PGvectorEmbedding_CPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [None]:
#Install Libraries to access Google Drive and OpenAI resources.
%pip install colab-env --upgrade --quiet --root-user-action=ignore
%pip install openai==0.28  --root-user-action=ignore
%pip install langchain
%pip install "unstructured[all-docs]"
%pip install tiktoken
%pip install -q -U sentence-transformers

# Environment variables

In [36]:
import colab_env
import os
import openai
from openai.embeddings_utils import cosine_similarity

connection_string = os.getenv("DATABASE_URL")
openai.api_key = os.getenv("OPENAI_API_KEY")

# Embedding settings - OpenAI

In [37]:
def get_embedding(text: str) -> list:
 response = openai.Embedding.create(
     input=text,
     model="text-embedding-ada-002"
 )
 return response['data'][0]['embedding']

good_ride = "good ride"
good_ride_embedding = get_embedding(good_ride)

len(good_ride_embedding)
# 1536

#Example
propmpt = "I really enjoyed the trip! The ride was incredibly smooth, the pick-up location was convenient, and the drop-off point was right in front of the coffee shop."
propmpt_embedding = get_embedding(propmpt)
similary=1-cosine_similarity(propmpt_embedding, good_ride_embedding)
# 0.8300454513797334
similary

0.1699545486202666

# PostgreSQL Settings - PGVECTOR Extension

In [38]:
# https://python.langchain.com/docs/integrations/vectorstores/pgembedding

# install PSQL WITH DEV Libraries AND PGVECTOR
!apt install postgresql postgresql-contrib &>log
!service postgresql restart
!sudo apt install postgresql-server-dev-all

%pip install colab-env
import colab_env

#%cd /content/gdrive/MyDrive/tools/pgvector
!cp -pr /content/gdrive/MyDrive/tools/pgvector /content/
%cd /content/pgvector/
print()
print('START: PG VECTOR COMPILATION')
!make
!make install
print('END: PG VECTOR COMPILATION')
print()

#!ls /usr/share/postgresql/14/extension/*control*

 * Restarting PostgreSQL 14 database server
   ...done.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
postgresql-server-dev-all is already the newest version (238).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.
/content/pgvector

START: PG VECTOR COMPILATION
make: Nothing to be done for 'all'.
/bin/mkdir -p '/usr/lib/postgresql/14/lib'
/bin/mkdir -p '/usr/share/postgresql/14/extension'
/bin/mkdir -p '/usr/share/postgresql/14/extension'
/usr/bin/install -c -m 755  vector.so '/usr/lib/postgresql/14/lib/vector.so'
/usr/bin/install -c -m 644 .//vector.control '/usr/share/postgresql/14/extension/'
/usr/bin/install -c -m 644 .//sql/vector--0.1.0--0.1.1.sql .//sql/vector--0.1.1--0.1.3.sql .//sql/vector--0.1.3--0.1.4.sql .//sql/vector--0.1.4--0.1.5.sql .//sql/vector--0.1.5--0.1.6.sql .//sql/vector--0.1.6--0.1.7.sql .//sql/vector--0.1.7--0.1.8.sql .//sql/vector--0.1.8--0.2.0.sql .//sql/vector--0.2.0--0.2.1.sql .//sql/vector--0

In [39]:
import psycopg2 as ps
import os
#%pip install openai==0.28  --root-user-action=ignore
import openai
from openai.embeddings_utils import cosine_similarity

%cd /content/

!sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"

############ VECTOR #############################
!sudo -u postgres psql -c "CREATE EXTENSION vector"

import psycopg2 as ps

DB_NAME = "postgres"
DB_USER = "postgres"
DB_PASS = "postgres"
DB_HOST = "localhost"
DB_PORT = "5432"

conn = ps.connect(database=DB_NAME,
							user=DB_USER,
							password=DB_PASS,
							host=DB_HOST,
							port=DB_PORT)

!sudo -u postgres psql -c "DROP TABLE reviews"

cur = conn.cursor() # creating a cursor

cur.execute("""
                            CREATE TABLE reviews
                            (text TEXT, embedding vector(1536))
                         """)

conn.commit()
print("TABLE REVIEWS Created successfully")
conn.close()
cur.close()

##### TEST THE VECTOR EXTENSION
prompt_0 = "good ride"
prompt_0_embedding = get_embedding(prompt_0)
len(prompt_0_embedding)

prompt_1 = "I really enjoyed the trip! The ride was incredibly smooth, the pick-up location was convenient, and the drop-off point was right in front of the coffee shop."
prompt_1_embedding = get_embedding(prompt_1)
distance_prompt_1=1-cosine_similarity(prompt_1_embedding, prompt_0_embedding)

prompt_2 = "The drive was exceptionally comfortable. I felt secure throughout the journey and greatly appreciated the on-board entertainment, which allowed me to have some fun while the car was in motion."
prompt_2_embedding = get_embedding(prompt_2)
distance_prompt_2=1-cosine_similarity(prompt_2_embedding, prompt_0_embedding)

prompt_3 = "A sudden hard brake at the intersection really caught me off guard and stressed me out. I was not prepared for it. Additionally, I noticed some trash left in the cabin from a previous rider."
prompt_3_embedding = get_embedding(prompt_3)
distance_prompt_3=1-cosine_similarity(prompt_3_embedding, prompt_0_embedding)


print()
print('Prompt0: %s'%prompt_0)
print('Prompt1: %s'%prompt_1)
print('Prompt2: %s'%prompt_2)
print('Prompt3: %s'%prompt_3)
print()


print()
print('distances-report: prompt_1:%s, prompt_2:%s, prompt_3:%s'%(distance_prompt_1,distance_prompt_2,distance_prompt_3))
print()

num_characters1 = int(len(prompt_1_embedding))
num_characters2 = int(len(prompt_2_embedding))
num_characters3 = int(len(prompt_3_embedding))

print()
#print(num_characters1,num_characters2,num_characters3)
print()

def text_and_embedding(text,textid):
    review_embedding=get_embedding(text)
    ### INSERT INTO DB
    DB_NAME = "postgres"
    DB_USER = "postgres"
    DB_PASS = "postgres"
    DB_HOST = "localhost"
    DB_PORT = "5432"
    conn = ps.connect(database=DB_NAME,
							user=DB_USER,
							password=DB_PASS,
							host=DB_HOST,
							port=DB_PORT)


    cur = conn.cursor() # creating a cursor

    cur.execute("""
        INSERT INTO reviews
        (text, embedding)
        VALUES ('%s',
                vector('%s'))""" % (text,review_embedding))

    conn.commit()
    print("INSERT TEXTID %s successfully"%textid)
    conn.close()
    cur.close()


print()
text_and_embedding(prompt_1,1)
text_and_embedding(prompt_2,2)
text_and_embedding(prompt_3,3)
print()

/content
ALTER ROLE
ERROR:  extension "vector" already exists
DROP TABLE
TABLE REVIEWS Created successfully

Prompt0: good ride
Prompt1: I really enjoyed the trip! The ride was incredibly smooth, the pick-up location was convenient, and the drop-off point was right in front of the coffee shop.
Prompt2: The drive was exceptionally comfortable. I felt secure throughout the journey and greatly appreciated the on-board entertainment, which allowed me to have some fun while the car was in motion.
Prompt3: A sudden hard brake at the intersection really caught me off guard and stressed me out. I was not prepared for it. Additionally, I noticed some trash left in the cabin from a previous rider.


distances-report: prompt_1:0.16993998750465067, prompt_2:0.17829993875501293, prompt_3:0.2058307681256315




INSERT TEXTID 1 successfully
INSERT TEXTID 2 successfully
INSERT TEXTID 3 successfully



In [40]:
DB_NAME = "postgres"
DB_USER = "postgres"
DB_PASS = "postgres"
DB_HOST = "localhost"
DB_PORT = "5432"
conn = ps.connect(database=DB_NAME,
          user=DB_USER,
          password=DB_PASS,
          host=DB_HOST,
          port=DB_PORT)

good_ride = "good ride"
good_ride_embedding = get_embedding(good_ride)
len(good_ride_embedding)
num_characters = int(len(good_ride_embedding))/1
print(num_characters)
cur = conn.cursor() # creating a cursor

cur.execute("""
    SELECT substring(text, 0, %s) FROM reviews ORDER BY embedding <-> vector('%s')
""" % (int(num_characters),good_ride_embedding))

#conn.commit()
print()
print("QUERY SELECTION successfully")
print()

records = cur.fetchall()
print("Total rows are:  ", len(records))
print("Printing each row")
print()
n=0
for row in records:
    n=n+1
    #print()
    print("TEXT %s: "%n, row[0])

conn.close()
cur.close()

1536.0

QUERY SELECTION successfully

Total rows are:   3
Printing each row

TEXT 1:  I really enjoyed the trip! The ride was incredibly smooth, the pick-up location was convenient, and the drop-off point was right in front of the coffee shop.
TEXT 2:  The drive was exceptionally comfortable. I felt secure throughout the journey and greatly appreciated the on-board entertainment, which allowed me to have some fun while the car was in motion.
TEXT 3:  A sudden hard brake at the intersection really caught me off guard and stressed me out. I was not prepared for it. Additionally, I noticed some trash left in the cabin from a previous rider.
