In [None]:
# --- MODULE 2: VECTORIZATION & SIMILARITY ---
# Goal: Convert text to math and then find similarities.

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [None]:
# 1. THE KNOWLEDGE BASE (Cleaned Corpus)
# These represent documents the AI "knows"
corpus = [
    "ai future technology",
    "students learn artificial intelligence",
    "machine learning transforming education"
]

In [None]:
# 2. THE USER QUERY
# This is what a user asks the AI Agent
query = ["learning about ai"]

# 3. VECTORIZATION (TF-IDF)
# We convert both the knowledge base AND the query into math
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus)
query_vector = vectorizer.transform(query)

In [None]:
# 4. SIMILARITY CALCULATION (The AI's Decision)
# We calculate how 'close' the query is to each sentence in our database
similarities = cosine_similarity(query_vector, tfidf_matrix)

# 5. DISPLAY RESULTS
results = pd.DataFrame({
    'Knowledge Base Sentence': corpus,
    'Similarity Score': similarities[0]
})

print("âœ¨ AI SEARCH RESULTS (Similarity Based):")
print(results.sort_values(by='Similarity Score', ascending=False))