<a href="https://colab.research.google.com/github/advik-7/Deep_Learning_projects/blob/main/Vector_database_nd_retriverchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [2]:
import faiss
import numpy as np
import time
from sklearn.feature_extraction.text import TfidfVectorizer


In [22]:


def create_faiss_index(vectors):
    d = vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(np.array(vectors, dtype=np.float32))
    return index

In [29]:
import faiss
import numpy as np
import time
from sentence_transformers import SentenceTransformer

def read_text_file(file_path):
    with open(file_path, 'r') as file:
        return file.readlines()

def vectorize_text(text_data, model):
    return model.encode(text_data, convert_to_numpy=True)


def adjust_query_vector(query_vector, required_dim):
    current_dim = query_vector.shape[1]
    if current_dim == required_dim:
        return query_vector
    elif current_dim < required_dim:
        padding = np.zeros((query_vector.shape[0], required_dim - current_dim), dtype=np.float32)
        return np.hstack((query_vector, padding))
    else:
        return query_vector[:, :required_dim]
def query_faiss_index(index, query_vector, k):
    query_vector = np.array(query_vector, dtype=np.float32)
    if query_vector.ndim == 1:
        query_vector = query_vector.reshape(1, -1)
    distances, indices = index.search(query_vector, k)
    return distances, indices

if __name__ == "__main__":
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    file_path = "/content/practisedefce3v.txt"
    text_data = read_text_file(file_path)

    vectors = vectorize_text(text_data, model)

    faiss_index = create_faiss_index(vectors)
    query_text = input("Enter a query text: ")
    query_vector = vectorize_text([query_text], model)

    required_dim = vectors.shape[1]
    query_vector_adjusted = adjust_query_vector(query_vector, required_dim)

    k = 5  # Number of nearest neighbors to retrieve
    start_time = time.time()  # Start timing the search

    distances, indices = query_faiss_index(faiss_index, query_vector_adjusted, k)

    # End timing the search process
    end_time = time.time()

    print("Indices of nearest neighbors:", indices)
    print("Distances of nearest neighbors:", distances)

    print(f"Time taken for retrieval: {end_time - start_time:.4f} seconds")

    print("\nRetrieved nearest neighbors:")
    for idx in indices[0]:
        print(f"- {text_data[idx].strip()}")


Enter a query text: cats
Indices of nearest neighbors: [[10 14 11 12 19]]
Distances of nearest neighbors: [[ 5.7380977 39.493248  42.1793    42.25738   43.512363 ]]
Time taken for retrieval: 0.0001 seconds

Retrieved nearest neighbors:
- Cats:
- Cats are expert hunters, often catching small animals like mice and birds.
- Cats are graceful animals that are known for their agility and speed.
- Domestic cats are often kept as pets due to their independent nature.
- Cats have excellent night vision, allowing them to hunt in low-light conditions.
