In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader=TextLoader("speech.txt")
docs=loader.load()
splitter=CharacterTextSplitter(chunk_size=1000,chunk_overlap=30)
documents=splitter.split_documents(docs)


Created a chunk of size 2625, which is longer than the specified 1000
Created a chunk of size 1304, which is longer than the specified 1000
Created a chunk of size 1401, which is longer than the specified 1000
Created a chunk of size 1131, which is longer than the specified 1000
Created a chunk of size 1041, which is longer than the specified 1000
Created a chunk of size 1262, which is longer than the specified 1000
Created a chunk of size 1427, which is longer than the specified 1000
Created a chunk of size 1062, which is longer than the specified 1000
Created a chunk of size 1067, which is longer than the specified 1000
Created a chunk of size 1101, which is longer than the specified 1000
Created a chunk of size 1121, which is longer than the specified 1000
Created a chunk of size 1103, which is longer than the specified 1000
Created a chunk of size 1585, which is longer than the specified 1000
Created a chunk of size 1088, which is longer than the specified 1000


In [4]:
embeddings=OllamaEmbeddings(model="gemma:2b")
db=FAISS.from_documents(documents,embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x27dd17bdbd0>

In [6]:
query="Hitler's vitriolic beer hall speeches began attracting regular audiences"
doccs=db.similarity_search(query)
doccs
docs_with_score=db.similarity_search_with_score(query)
docs_with_score

[(Document(id='2ff1ad9e-8dbf-42ed-aa8c-ac3c6d603849', metadata={'source': 'speech.txt'}, page_content='In February 1921, already highly effective at crowd manipulation, Hitler spoke to a crowd of over 6,000.[104] To publicise the meeting, two truckloads of party supporters drove around Munich waving swastika flags and distributing leaflets. Hitler soon gained notoriety for his rowdy polemic speeches against the Treaty of Versailles, rival politicians, and especially against Marxists and Jews.[105]'),
  np.float32(2878.5444)),
 (Document(id='f73d7bd4-d641-449a-b5ad-9e20dc147fee', metadata={'source': 'speech.txt'}, page_content="Hitler's German Workers' Party (DAP) membership card\nAfter the war, Hitler returned to Munich.[90] Without formal education or career prospects, he remained in the Army.[91] In July 1919, he was appointed Verbindungsmann (intelligence agent) of an Aufklärungskommando (reconnaissance unit) of the Reichswehr, assigned to influence other soldiers and to infiltrate 

RETRIEVER 
We can also covert vectorestore into a retriver class. this allows us to easily use it in other lanchain methods which largerly work with retriver 

In [7]:
retriver=db.as_retriever()
docs=retriver.invoke(query)
docs[0].page_content

'In February 1921, already highly effective at crowd manipulation, Hitler spoke to a crowd of over 6,000.[104] To publicise the meeting, two truckloads of party supporters drove around Munich waving swastika flags and distributing leaflets. Hitler soon gained notoriety for his rowdy polemic speeches against the Treaty of Versailles, rival politicians, and especially against Marxists and Jews.[105]'

In [None]:
#can also be done with embed query try here

In [8]:
db.save_local("faiss_index")

In [11]:
new_db=FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
docs=new_db.similarity_search(query)

CHROMA

In [12]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma

In [13]:
loader=TextLoader("speech.txt")
docs=loader.load()
splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=30)
documents=splitter.split_documents(docs)

In [14]:
embeddings=OllamaEmbeddings(model="gemma:2b")
db=Chroma.from_documents(documents,embeddings)
db

<langchain_chroma.vectorstores.Chroma at 0x27deb531c90>

In [15]:
query="Hitler's vitriolic beer hall speeches began attracting regular audiences"
doccs=db.similarity_search_with_score(query)
doccs

[(Document(id='c12b856c-2f4d-431b-94ef-aa7ad1aaf031', metadata={'source': 'speech.txt'}, page_content='Hitler driven through a crowd in Cheb (German: Eger), in the Sudetenland, October 1938'),
  2845.81298828125),
 (Document(id='8ac3d7a1-a946-4794-9203-e3dc0ae16246', metadata={'source': 'speech.txt'}, page_content='In February 1921, already highly effective at crowd manipulation, Hitler spoke to a crowd of over 6,000.[104] To publicise the meeting, two truckloads of party supporters drove around Munich waving swastika flags and distributing leaflets. Hitler soon gained notoriety for his rowdy polemic speeches against the Treaty of Versailles, rival politicians, and especially against Marxists and Jews.[105]'),
  2878.544189453125),
 (Document(id='c283acd7-dfa1-4156-ad9a-37048564ee05', metadata={'source': 'speech.txt'}, page_content="Hitler's German Workers' Party (DAP) membership card\nAfter the war, Hitler returned to Munich.[90] Without formal education or career prospects, he remain

In [19]:
vectordb=Chroma.from_documents(documents,embeddings,persist_directory="./chroma_db")