In [None]:
!pip install -qU langchain langchain-openai

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

## Converting Text to Embedding

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model = "text-embedding-3-small")

In [None]:
text = "This statement will be converting to embedding"

vector = embeddings.embed_query(text)
print(vector)

[0.004193167852690175, -0.006237382582849107, 0.019979849982380112, 0.004669910915386848, -0.025455167675035563, -0.020962229457393407, 0.03525006162496846, -0.015515805114357904, 0.013522154677356391, -0.013919440097275696, 0.060098465232695075, 0.00503830252002495, -0.01183910868009183, -0.018116219618668366, 0.0014329366590484357, 0.008841408755862062, 0.011275685568535313, 0.015501358439547929, 0.004879388631453981, 0.05018799788428239, -0.0009688349024717456, -0.040450890633589386, 0.00678635901959565, 0.009773223937717936, 0.007960156935841097, -0.023649324010563712, -0.02607637655450946, 0.003987301805325536, 0.032360711761813495, -0.0019159996763132164, -0.02721766945243247, -0.04854106578007522, 0.0018744653698192276, -0.037994941014733635, 0.0003004471744839285, 0.0012866633781055663, 0.02396715271902816, 0.032418498461053394, -0.01694603337112541, 0.017755050885773994, 0.04906114979852433, 0.024747276884056795, -0.014909041978371465, 0.00494078699939637, 0.009975478316380083

In [None]:
print(type(vector))

<class 'list'>


In [None]:
print(len(vector))

1536


In [None]:
def embed_text(text):
  return embeddings.embed_query(text)

## Measuring Semantic Similarity

In [None]:
import numpy as np

def cosine_similarity(statement1, statement2):
    vec1 = embed_text(statement1)
    vec2 = embed_text(statement2)
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    similarity = dot_product / (norm_vec1 * norm_vec2)
    return similarity

In [None]:
statement1 = "I went for a run"
statement2 = "Sprinting is great"
statement3 = "Smoking is injurious to health"

In [None]:
embed_text(statement1)

[0.0036376682016086045,
 0.04096766812893841,
 -0.048744752271740695,
 -0.08143359177792095,
 -0.01625661279271711,
 -0.0060460552561410665,
 0.018978593174020543,
 0.03384285628042738,
 0.006362783182060077,
 0.03123377082855286,
 -0.019254553383054377,
 0.05288416844576504,
 0.0025636152541906377,
 -0.02754592878430838,
 0.003581221435386122,
 -0.006999375262860745,
 -0.0020524602247015697,
 -0.013095605060129661,
 -0.06402296221095671,
 0.05115314142174908,
 -0.0018768486322621209,
 -0.00992832486961692,
 -0.05293434438387684,
 -0.019530515454733475,
 -0.0038132797940480536,
 0.005726191566920726,
 0.03517248697542054,
 -0.0153158372361867,
 0.021499873528167315,
 0.016469855873079094,
 -0.006510171507803614,
 -0.01835140884878518,
 -0.06020968032143273,
 -0.051203313634570355,
 0.02349431770801179,
 -0.037806662258996225,
 -0.005713647582392777,
 -0.01867754499593081,
 0.021612766594950962,
 -0.003847774820177283,
 0.020446203042207987,
 -0.05464028530148216,
 0.05479080939052703,


In [None]:
cosine_similarity(statement1, statement2)

0.351636617021594

In [None]:
cosine_similarity(statement1, statement3)

0.05448617373651612

In [None]:
cosine_similarity(statement2, statement3)

0.19091814656278513

## Storing in a Vector Database

### Chroma

In [None]:
!pip install -qU chromadb

In [None]:
statements = [
"Build Fast with AI introduces a 4-week crash course on Generative AI, providing a comprehensive understanding of AI technologies and empowering participants to create innovative applications.",
"The course is designed for aspiring AI developers, professionals, students, and hobbyists looking to delve into the world of AI, with no prior experience required.",
"Participants will engage in project-based learning, developing two real-world projects that demonstrate their acquired skills and align with industry demands.",
"The course spans five weeks, with 3-hour offline workshops held every Saturday from March 9th to March 30th, 2024, fostering innovation and skill development.",
"Throughout the course, participants will build real-world applications such as an Email Generation App, ChatGPT Replica, Story Telling App, Chat with PDF, and YouTube Bot.",
"By the end of the course, participants will have a solid foundation in Generative AI, a portfolio showcasing their projects, and highly sought-after skills in the technology industry.",
"The course fee is Rs 10,000, with an exclusive 50% discount offered to the first cohort.",
"Satvik, the founder of Build Fast with AI and an IIT Delhi alumnus with over 4 years of experience in data science and machine learning, serves as the lead mentor for the course.",
"As a leading consultant in generative AI, Satvik has helped numerous startups successfully implement AI features and has coached over 3,000 people on leveraging generative AI.",
"The course emphasizes practical application, ensuring that learners can apply their skills in real-world scenarios, reflecting Satvik's 'Learning by Doing' philosophy."
]

In [None]:
from langchain.schema import Document

docs = [
Document(
page_content="Build Fast with AI introduces a 4-week crash course on Generative AI, providing a comprehensive understanding of AI technologies and empowering participants to create innovative applications.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="The course is designed for aspiring AI developers, professionals, students, and hobbyists looking to delve into the world of AI, with no prior experience required.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="Participants will engage in project-based learning, developing two real-world projects that demonstrate their acquired skills and align with industry demands.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="The course spans five weeks, with 3-hour offline workshops held every Saturday from March 9th to March 30th, 2024, fostering innovation and skill development.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="Throughout the course, participants will build real-world applications such as an Email Generation App, ChatGPT Replica, Story Telling App, Chat with PDF, and YouTube Bot.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="By the end of the course, participants will have a solid foundation in Generative AI, a portfolio showcasing their projects, and highly sought-after skills in the technology industry.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="The course fee is Rs 20,000. Exclusive discounts are offered for every cohort.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="Satvik, the founder of Build Fast with AI and an IIT Delhi alumnus with over 4 years of experience in data science and machine learning, serves as the lead mentor for the course.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="As a leading consultant in generative AI, Satvik has helped numerous startups successfully implement AI features and has coached over 3,000 people on leveraging generative AI.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
Document(
page_content="The course emphasizes practical application, ensuring that learners can apply their skills in real-world scenarios, reflecting Satvik's 'Learning by Doing' philosophy.",
metadata={"course_name": "Crash Course - Generative AI", "duration": "4 weeks", "format": "offline"},
),
]

In [None]:
from langchain_community.vectorstores import Chroma
# create the open-source embedding function
embedding_function = OpenAIEmbeddings()

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "Who is the founder of Build Fast with AI?"
similar_docs = db.similarity_search(query)

# print results
print(len(similar_docs))
print(similar_docs[0].page_content)


4
Satvik, the founder of Build Fast with AI and an IIT Delhi alumnus with over 4 years of experience in data science and machine learning, serves as the lead mentor for the course.


### Supabase

In [None]:
!pip install -qU supabase pypdf

In [None]:
import os
from google.colab import userdata

from langchain_community.vectorstores import SupabaseVectorStore
from langchain_openai import OpenAIEmbeddings
from supabase.client import Client, create_client

supabase_url = userdata.get('SUPABASE_URL')
supabase_key = userdata.get('SUPABASE_KEY')
supabase: Client = create_client(supabase_url, supabase_key)

embeddings = OpenAIEmbeddings()

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter

loader = PyPDFLoader("/content/CrashCourse_Info_BuildFastwithAI_Cohort2.pdf")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

In [None]:
len(docs)

In [None]:
vector_store = SupabaseVectorStore.from_documents(
    docs,
    embeddings,
    client=supabase,
    table_name="myvectorstore",
    chunk_size=500,
)

In [None]:
query = "Who is the founder of Build Fast with AI?"
matched_docs = vector_store.similarity_search(query)