In [3]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


nlp = spacy.load("en_core_web_sm")

df = pd.read_csv("Reviews.csv")


df = df[['Text', 'Score']]
df = df.dropna(subset=['Text'])
df = df.sample(1000, random_state=42).reset_index(drop=True)
def preprocess_spacy(text):
    doc = nlp(text.lower())
    tokens = [
        token.lemma_
        for token in doc
        if token.is_alpha and not token.is_stop
    ]
    return " ".join(tokens)


df['clean_text'] = df['Text'].apply(preprocess_spacy)

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['clean_text'])


def search_reviews(query, top_k=5):

    query_clean = preprocess_spacy(query)
    query_vec = vectorizer.transform([query_clean])

    similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_indices = similarities.argsort()[-top_k:][::-1]

    results = []
    for i in top_indices:
        results.append({
            "Review": df.loc[i, "Text"],
            "Score": df.loc[i, "Score"],
            "Similarity": round(similarities[i], 4)
        })
    return results

query = "great taste and fast delivery"
results = search_reviews(query, top_k=5)

print("\nTop 5 Relevant Reviews:")
for r in results:
    print(f"\nScore: {r['Score']} | Similarity: {r['Similarity']}")
    print(f"Review: {r['Review'][:300]}...")


Top 5 Relevant Reviews:

Score: 5 | Similarity: 0.5823
Review: good price & fast delivery. i have gout so this stuff works great for me....

Score: 5 | Similarity: 0.3511
Review: Super easy to use and makes just the right amount of popcorn for two of us. Price is right and delivery was fast. (Got here before the popcorn machine....;o)...

Score: 5 | Similarity: 0.283
Review: These are the best blueberries I have ever eaten.  They are better than the "fresh" things you get in the grocery store.  They are full, slightly chewy, and taste like high quality berries.  The price is excellent ($5.00/lb), and delivery is fast.  I am very happy with my ten-pound stash of wonderfu...

Score: 5 | Similarity: 0.2616
Review: Great seller, fast shipping! I love these cookies! I have many allergies and I have no problems when eating these. I will be ordering more....

Score: 5 | Similarity: 0.2253
Review: I drank this tea at least 4 days a week during the last 6 weeks of my pregnancy.  I wish I did i