In [27]:
from langchain_community.document_loaders import TextLoader # document loader
from langchain_text_splitters import CharacterTextSplitter #split text
from langchain_huggingface import HuggingFaceEmbeddings    #embedding model
from langchain_community.vectorstores import FAISS    #vector db
from langchain_chroma import Chroma


In [28]:
load_speech = TextLoader("aws.txt")
doc_speech = load_speech.load()
doc_speech

[Document(metadata={'source': 'aws.txt'}, page_content='Deployment on AWS with Laravel\nAWS offers a Free Tier that includes many services useful for deploying Laravel apps, such as AWS Lambda, API Gateway, and Lightsail. For example, AWS Lambda provides 1 million free requests and 400,000 GB-seconds of compute time per month under its free tier, which is suitable for serverless deployment of Laravel APIs.\n\nYou can deploy Laravel on AWS using services like AWS API Gateway and AWS Lambda with tools such as the Serverless Framework and Bref package, which help package and deploy Laravel applications serverlessly. This approach is cost-effective because you pay only for usage beyond the free tier, and the services scale automatically.\n\nAlternatively, AWS Lightsail offers simple VPS instances starting as low as $3.50/month, which can run a LAMP/LEMP stack for Laravel with MySQL, suitable for small projects or testing.\n\nFor easier deployment and management, you can also consider Larav

In [29]:
#split doc
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=30)
new_doc = text_splitter.split_documents(doc_speech)
new_doc

Created a chunk of size 352, which is longer than the specified 200
Created a chunk of size 330, which is longer than the specified 200


[Document(metadata={'source': 'aws.txt'}, page_content='Deployment on AWS with Laravel\nAWS offers a Free Tier that includes many services useful for deploying Laravel apps, such as AWS Lambda, API Gateway, and Lightsail. For example, AWS Lambda provides 1 million free requests and 400,000 GB-seconds of compute time per month under its free tier, which is suitable for serverless deployment of Laravel APIs.'),
 Document(metadata={'source': 'aws.txt'}, page_content='You can deploy Laravel on AWS using services like AWS API Gateway and AWS Lambda with tools such as the Serverless Framework and Bref package, which help package and deploy Laravel applications serverlessly. This approach is cost-effective because you pay only for usage beyond the free tier, and the services scale automatically.'),
 Document(metadata={'source': 'aws.txt'}, page_content='Alternatively, AWS Lightsail offers simple VPS instances starting as low as $3.50/month, which can run a LAMP/LEMP stack for Laravel with MyS

Before the pip installation of faiss-cpu package, the local machine(Mac) must installed the swig through the Homebrew command.
faiss-cpu package requires SWIG (a system tool) to build

#faiss_db = FAISS.from_documents(new_doc, embed_speech)
#faiss_db

In [30]:
embed_speech = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_data = embed_speech.embed_query(new_doc[0].page_content) # for first document
vector_data


[0.01854437030851841,
 0.009226388297975063,
 -0.06439439952373505,
 -0.0245850570499897,
 0.036156971007585526,
 -0.023718804121017456,
 -0.09348659217357635,
 -0.028422538191080093,
 0.10557043552398682,
 0.1258162260055542,
 0.06403977423906326,
 -0.04843844473361969,
 0.0687398612499237,
 -0.053926125168800354,
 0.14453360438346863,
 -0.02957627736032009,
 0.031208297237753868,
 -0.06834765523672104,
 -0.049515485763549805,
 -0.03156919777393341,
 -0.03671207278966904,
 -0.004911673720926046,
 -0.04592547565698624,
 0.058826595544815063,
 -0.04486639052629471,
 -0.054349955171346664,
 -0.04481207951903343,
 -0.019061723724007607,
 0.02737344428896904,
 -0.046819381415843964,
 -0.027922142297029495,
 0.020237497985363007,
 -0.04860681667923927,
 0.047602780163288116,
 -0.06128402426838875,
 0.06752786785364151,
 0.004921605810523033,
 -0.014854413457214832,
 -0.03446182236075401,
 0.011667510494589806,
 -0.02928311377763748,
 -0.046448059380054474,
 -0.08591816574335098,
 -0.1154704

In [31]:
vector_data[6]

-0.09348659217357635

In [32]:
# vector for the list of documents
texts = [doc.page_content for doc in new_doc]
vector_list_data = embed_speech.embed_documents(texts)
vector_list_data

[[0.01854434609413147,
  0.009226378984749317,
  -0.06439441442489624,
  -0.024585047736763954,
  0.03615698218345642,
  -0.02371879667043686,
  -0.09348659962415695,
  -0.028422564268112183,
  0.10557043552398682,
  0.125816211104393,
  0.06403978168964386,
  -0.048438455909490585,
  0.0687398836016655,
  -0.05392614006996155,
  0.14453360438346863,
  -0.029576275497674942,
  0.031208299100399017,
  -0.06834765523672104,
  -0.049515463411808014,
  -0.03156919777393341,
  -0.036712054163217545,
  -0.004911709111183882,
  -0.04592546448111534,
  0.05882660299539566,
  -0.0448664128780365,
  -0.05434997007250786,
  -0.04481206461787224,
  -0.019061727449297905,
  0.027373462915420532,
  -0.04681937023997307,
  -0.027922125533223152,
  0.020237507298588753,
  -0.04860678315162659,
  0.04760276898741722,
  -0.06128402426838875,
  0.06752785295248032,
  0.004921621643006802,
  -0.014854398556053638,
  -0.0344618521630764,
  0.011667518876492977,
  -0.029283126816153526,
  -0.046448059380054

In [33]:
# store into chroma
vectodb = Chroma.from_documents(documents=new_doc, embedding=embed_speech)
vectodb

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


<langchain_chroma.vectorstores.Chroma at 0x12ff943d0>

In [34]:
# check whether the vector db is created
vectodb._collection.get()
vectodb._collection.count()#

18

In [42]:
#query the vectors
query = "What can AWS Lambda provides?"
docs = vectodb.similarity_search(query)
docs[0].page_content

'Deployment on AWS with Laravel\nAWS offers a Free Tier that includes many services useful for deploying Laravel apps, such as AWS Lambda, API Gateway, and Lightsail. For example, AWS Lambda provides 1 million free requests and 400,000 GB-seconds of compute time per month under its free tier, which is suitable for serverless deployment of Laravel APIs.'

In [47]:
test2 = vectodb.similarity_search("How much is the AWS Lightsail cost?")
test2[0].page_content

'Alternatively, AWS Lightsail offers simple VPS instances starting as low as $3.50/month, which can run a LAMP/LEMP stack for Laravel with MySQL, suitable for small projects or testing.'

In [49]:
# retriever the vector db
retriever = vectodb.as_retriever()
retriever.invoke(test2[0].page_content)

[Document(id='93b632bb-293c-4429-b226-da03806f7d72', metadata={'source': 'aws.txt'}, page_content='Alternatively, AWS Lightsail offers simple VPS instances starting as low as $3.50/month, which can run a LAMP/LEMP stack for Laravel with MySQL, suitable for small projects or testing.'),
 Document(id='42a20dbc-0a97-4af9-8977-ee789d811f80', metadata={'source': 'aws.txt'}, page_content='Deployment on AWS with Laravel\nAWS offers a Free Tier that includes many services useful for deploying Laravel apps, such as AWS Lambda, API Gateway, and Lightsail. For example, AWS Lambda provides 1 million free requests and 400,000 GB-seconds of compute time per month under its free tier, which is suitable for serverless deployment of Laravel APIs.'),
 Document(id='00c2dba4-03b9-4e35-b863-82620b5fe001', metadata={'source': 'aws.txt'}, page_content='You can deploy Laravel on AWS using services like AWS API Gateway and AWS Lambda with tools such as the Serverless Framework and Bref package, which help pack