In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS

loader = TextLoader('speech.txt')
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=30,chunk_overlap=30)
result = text_splitter.split_documents(docs)
result


Created a chunk of size 636, which is longer than the specified 30
Created a chunk of size 306, which is longer than the specified 30
Created a chunk of size 662, which is longer than the specified 30


[Document(metadata={'source': 'speech.txt'}, page_content='Rajiv Gandhi University of Knowledge Technologies (RGUKT) Basar is unique university which actively uses Information and Communication Technology (ICT) in teaching. It is perhaps the first of its kind in the country with an educational model that is intensely ICT based. Established by the Government of erstwhile Andhra Pradesh vide a special act of legislation, this campus is loacated at the holy land of Basar (the abode of Gnyana Saraswathi, Goddess of knowledge) in Nirmal District (Telangana State). The campus is set in about 272 acres of salubrious and serene surrounding just a short distance from the banks of river Godavari.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The primary objective of establishing RGUKT is to provide high quality educational opportunities for the rural youth of the state. The selection process follows approved rules and has very high competition where only the top rural graduates (

In [3]:
embeddings = OllamaEmbeddings(model="gemma2:2b")
db = FAISS.from_documents(result,embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x70aa0016aa50>

In [4]:
### querying 
query="Tell Something About RGUKT BASAR"
docs=db.similarity_search(query)
docs[0].page_content

'The University houses about 6000 students along with 250 faculty members and 120 support staff within its campus. The residential university comprises of Academic blocks with more than 140 ICT equipped classrooms, well – equipped laboratories, libraries with more than 1,00,000 volumes, boys’ and girls’ hostels and mess blocks that provide quality and nutritious food. The campus is more or less self-sustained with a Laundromat, bank, ATM, Shopping Complex, post office, primary health center etc. It also has indoor and outdoor recreational facilities, well equipped gymnasium, courts for basketball, badminton, table tennies, cricket, other sports and games.'

### As a Retriever
We can also convert the vectorstore into a Retriever class. This allows us to easily use it in other LangChain methods, which largely work with retrievers

In [5]:
retriever = db.as_retriever()
docs = retriever.invoke(query)
docs[0].page_content

'The University houses about 6000 students along with 250 faculty members and 120 support staff within its campus. The residential university comprises of Academic blocks with more than 140 ICT equipped classrooms, well – equipped laboratories, libraries with more than 1,00,000 volumes, boys’ and girls’ hostels and mess blocks that provide quality and nutritious food. The campus is more or less self-sustained with a Laundromat, bank, ATM, Shopping Complex, post office, primary health center etc. It also has indoor and outdoor recreational facilities, well equipped gymnasium, courts for basketball, badminton, table tennies, cricket, other sports and games.'

### Similarity Search with score
There are some FAISS specific methods. One of them is similarity_search_with_score, which allows you to return not only the documents but also the distance score of the query to them. The returned distance score is L2 distance. Therefore, a lower score is better.

In [6]:
docs_and_score=db.similarity_search_with_score(query)
docs_and_score

[(Document(metadata={'source': 'speech.txt'}, page_content='The University houses about 6000 students along with 250 faculty members and 120 support staff within its campus. The residential university comprises of Academic blocks with more than 140 ICT equipped classrooms, well – equipped laboratories, libraries with more than 1,00,000 volumes, boys’ and girls’ hostels and mess blocks that provide quality and nutritious food. The campus is more or less self-sustained with a Laundromat, bank, ATM, Shopping Complex, post office, primary health center etc. It also has indoor and outdoor recreational facilities, well equipped gymnasium, courts for basketball, badminton, table tennies, cricket, other sports and games.'),
  40288.96),
 (Document(metadata={'source': 'speech.txt'}, page_content='The primary objective of establishing RGUKT is to provide high quality educational opportunities for the rural youth of the state. The selection process follows approved rules and has very high competi

### Embedding with vector

In [7]:
embedding_vector = embeddings.embed_query(query)
embedding_vector

[-1.2868715524673462,
 -1.0910780429840088,
 -0.5898383259773254,
 -2.9663524627685547,
 -0.4146234393119812,
 -0.40166211128234863,
 2.1355395317077637,
 0.4111773669719696,
 -4.7124810218811035,
 3.0257465839385986,
 0.6325780749320984,
 1.1223584413528442,
 1.0946496725082397,
 -1.525760531425476,
 0.4721527099609375,
 -1.2026245594024658,
 -0.059056323021650314,
 -1.1547613143920898,
 -0.03388197720050812,
 -0.6863831877708435,
 1.4111027717590332,
 0.27020713686943054,
 1.980574131011963,
 1.2165603637695312,
 2.9490840435028076,
 -2.3305819034576416,
 -3.2949130535125732,
 -2.859347105026245,
 1.1418023109436035,
 -1.2280017137527466,
 -0.1611720621585846,
 1.4093890190124512,
 0.26571425795555115,
 0.3033965229988098,
 -1.0965750217437744,
 2.5310704708099365,
 -1.4606208801269531,
 -1.4536550045013428,
 -3.46095871925354,
 0.07846242934465408,
 1.8316504955291748,
 0.3990134298801422,
 1.9033749103546143,
 1.551784634590149,
 -0.3259612023830414,
 -0.7922623753547668,
 -1.10150

In [8]:
docs_score = db.similarity_search_by_vector(embedding_vector)
docs_score

[Document(metadata={'source': 'speech.txt'}, page_content='The University houses about 6000 students along with 250 faculty members and 120 support staff within its campus. The residential university comprises of Academic blocks with more than 140 ICT equipped classrooms, well – equipped laboratories, libraries with more than 1,00,000 volumes, boys’ and girls’ hostels and mess blocks that provide quality and nutritious food. The campus is more or less self-sustained with a Laundromat, bank, ATM, Shopping Complex, post office, primary health center etc. It also has indoor and outdoor recreational facilities, well equipped gymnasium, courts for basketball, badminton, table tennies, cricket, other sports and games.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The primary objective of establishing RGUKT is to provide high quality educational opportunities for the rural youth of the state. The selection process follows approved rules and has very high competition where only

### Saving and Loading

In [9]:
db.save_local("faiss_index")

In [14]:
new_db = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
docs = new_db.similarity_search(query)
docs

[Document(metadata={'source': 'speech.txt'}, page_content='The University houses about 6000 students along with 250 faculty members and 120 support staff within its campus. The residential university comprises of Academic blocks with more than 140 ICT equipped classrooms, well – equipped laboratories, libraries with more than 1,00,000 volumes, boys’ and girls’ hostels and mess blocks that provide quality and nutritious food. The campus is more or less self-sustained with a Laundromat, bank, ATM, Shopping Complex, post office, primary health center etc. It also has indoor and outdoor recreational facilities, well equipped gymnasium, courts for basketball, badminton, table tennies, cricket, other sports and games.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The primary objective of establishing RGUKT is to provide high quality educational opportunities for the rural youth of the state. The selection process follows approved rules and has very high competition where only