In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

# Example documents
documents = [
    "The sky is blue and beautiful.",
    "Love this blue and bright sky!",
    "The quick brown fox jumps over the lazy dog.",
    "A king's breakfast has sausages, ham, bacon, eggs, toast and beans.",
    "I love green eggs, ham, sausages and bacon!",
    "The brown fox is quick and the blue dog is lazy!",
    "The sky is very blue and the sky is very beautiful today."
]

# Example user query
query = "blue sky"

# Combine query + documents
all_texts = [query] + documents

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_texts)

# Compute Cosine Similarity between query and all documents
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])  # Only compare query vs docs

# Flatten results
cosine_sim = cosine_sim.flatten()

# Prepare dataframe for easier visualization
df_similarity = pd.DataFrame({
    'Document': documents,
    'Similarity Score': cosine_sim
}).sort_values(by='Similarity Score', ascending=False)

print("\nRanking documents based on query match:\n")
print(df_similarity)

# Top match
best_match_idx = cosine_sim.argmax()
print(f"\nBest matching document:\n- {documents[best_match_idx]}\nSimilarity Score: {cosine_sim[best_match_idx]:.2f}")



Ranking documents based on query match:

                                            Document  Similarity Score
0                     The sky is blue and beautiful.          0.525751
1                     Love this blue and bright sky!          0.442080
6  The sky is very blue and the sky is very beaut...          0.388444
5   The brown fox is quick and the blue dog is lazy!          0.133401
2       The quick brown fox jumps over the lazy dog.          0.000000
4        I love green eggs, ham, sausages and bacon!          0.000000
3  A king's breakfast has sausages, ham, bacon, e...          0.000000

Best matching document:
- The sky is blue and beautiful.
Similarity Score: 0.53
