In [None]:
# Text Summarization with Latent Semantic Analysis (SVD)
import numpy as np
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize

# Tiny toy corpus
docs = [
    "Linear algebra is essential for AI agents and machine learning.",
    "Agents use vector similarity and matrix operations.",
    "Singular value decomposition helps in summarization and topic extraction.",
    "Matrix factorization is widely applied in recommendation systems."
]

# Step 1: Build term-document matrix
vectorizer = CountVectorizer(stop_words="english")
X = vectorizer.fit_transform(docs).toarray()
terms = vectorizer.get_feature_names_out()

print("Term-Document Matrix:\n", X)

# Step 2: Apply SVD
U, S, Vt = np.linalg.svd(X, full_matrices=False)

# Keep top k topics
k = 2
U_k = U[:, :k]
S_k = np.diag(S[:k])
Vt_k = Vt[:k, :]

# Approx reconstruction
Xk = U_k @ S_k @ Vt_k

# Step 3: Sentence scores
sentence_strength = np.linalg.norm(Xk, axis=1)
ranked = np.argsort(-sentence_strength)

# Select top 2 sentences as summary
summary = [docs[i] for i in ranked[:2]]

print("\nSummary:\n", "\n".join(summary))