In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

# Sample documents (you can replace them with real text data)
documents = [
    "Cat and dog are common pets.",
    "AI and machine learning are transforming industries.",
    "Dogs are loyal animals.",
    "Artificial Intelligence (AI) is the future of technology.",
    "Cats love to sleep a lot.",
    "Machine learning enables AI models to improve."
]

# 🔹 Step 1: Convert Text to TF-IDF Matrix
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(documents)  # Term-Document Matrix

# 🔹 Step 2: Apply Truncated SVD for LSA
num_concepts = 2  # Number of latent topics to extract
svd = TruncatedSVD(n_components=num_concepts)
X_lsa = svd.fit_transform(X)

# 🔹 Step 3: Display Top Words in Each Concept
terms = vectorizer.get_feature_names_out()

def print_top_words_per_concept():
    for i, comp in enumerate(svd.components_):
        terms_in_comp = zip(terms, comp)
        sorted_terms = sorted(terms_in_comp, key=lambda x: x[1], reverse=True)[:5]
        print(f"Concept {i+1}:")
        print([term for term, _ in sorted_terms])
        print("")

# 🔹 Output Results
print("\n🔹 Top Words in Each Concept (LSA Results):")
print_top_words_per_concept()

print("\n🔹 Document-Concept Matrix:")
df = pd.DataFrame(X_lsa, columns=[f"Concept {i+1}" for i in range(num_concepts)])
df["Document"] = documents
print(df)


🔹 Top Words in Each Concept (LSA Results):
Concept 1:
['ai', 'learning', 'machine', 'industries', 'transforming']

Concept 2:
['cats', 'lot', 'love', 'sleep', 'future']


🔹 Document-Concept Matrix:
      Concept 1     Concept 2  \
0  1.406707e-16  9.107667e-17   
1  8.192129e-01 -2.463892e-15   
2  3.545092e-16 -6.305926e-01   
3  3.762879e-01  1.062870e-14   
4  3.170959e-16  7.761140e-01   
5  8.139025e-01 -2.558177e-15   

                                            Document  
0                       Cat and dog are common pets.  
1  AI and machine learning are transforming indus...  
2                            Dogs are loyal animals.  
3  Artificial Intelligence (AI) is the future of ...  
4                          Cats love to sleep a lot.  
5     Machine learning enables AI models to improve.  
