# Hybrid Search

In [5]:
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

dense_model = SentenceTransformer('all-MiniLM-L6-v2')

documents = [
    "AI is used in healthcare diagnostics to analyze patient data.",  # High Sparse, low Dense
    "The role of AI in healthcare diagnostics is crucial.",  # High Sparse, low Dense

    "Advanced machine learning techniques help in identifying diseases from medical images.",  # High Dense, low Sparse
    "Deep learning models improve the accuracy of disease detection in healthcare.",  # High Dense, low Sparse

    "AI technology is revolutionizing the way healthcare diagnoses are made.",  # High Dense and Sparse
    "Healthcare diagnostics benefit greatly from artificial intelligence applications.",  # High Dense and Sparse

    "Machine learning enhances the efficiency of various sectors, including finance.",  # Low relevance, both Dense and Sparse
    "IoT devices contribute to smart city infrastructure and healthcare monitoring.",  # Low relevance, both Dense and Sparse
]

query = "How is AI used in healthcare diagnostics?"

# Dense Retrieval
query_embedding = dense_model.encode(query, convert_to_tensor=True)
document_embeddings = dense_model.encode(documents, convert_to_tensor=True)
dense_scores = util.pytorch_cos_sim(query_embedding, document_embeddings).numpy()[0]

# Sparse Retrieval (TF-IDF)
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)
query_vector = vectorizer.transform([query])
sparse_scores = np.dot(query_vector, tfidf_matrix.T).toarray()[0]

# Hybrid-Search:
combined_scores = dense_scores + sparse_scores
sorted_indices = np.argsort(-combined_scores)

# print
print("Dense Retrieval Results:")
for idx in np.argsort(-dense_scores):
    print(f"Document: {documents[idx]} - Score: {dense_scores[idx]:.4f}")

print("\nSparse Retrieval Results:")
for idx in np.argsort(-sparse_scores):
    print(f"Document: {documents[idx]} - Score: {sparse_scores[idx]:.4f}")

print("\nHybrid-Search Results:")
for idx in sorted_indices:
    print(f"Document: {documents[idx]} - Combined Score: {combined_scores[idx]:.4f}")

Dense Retrieval Results:
Document: AI is used in healthcare diagnostics to analyze patient data. - Score: 0.9189
Document: The role of AI in healthcare diagnostics is crucial. - Score: 0.8867
Document: AI technology is revolutionizing the way healthcare diagnoses are made. - Score: 0.8040
Document: Healthcare diagnostics benefit greatly from artificial intelligence applications. - Score: 0.7690
Document: Advanced machine learning techniques help in identifying diseases from medical images. - Score: 0.4874
Document: Deep learning models improve the accuracy of disease detection in healthcare. - Score: 0.4621
Document: Machine learning enhances the efficiency of various sectors, including finance. - Score: 0.3148
Document: IoT devices contribute to smart city infrastructure and healthcare monitoring. - Score: 0.3135

Sparse Retrieval Results:
Document: AI is used in healthcare diagnostics to analyze patient data. - Score: 0.6821
Document: The role of AI in healthcare diagnostics is cruci