In [1]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

def bow_and_tfidf_representation(texts):
    # Initialize CountVectorizer for Bag of Words (BoW)
    count_vectorizer = CountVectorizer()
    bow_matrix = count_vectorizer.fit_transform(texts)
    
    # Initialize TfidfVectorizer for TF-IDF
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(texts)
    
    # Convert to array for better readability
    bow_array = bow_matrix.toarray()
    tfidf_array = tfidf_matrix.toarray()
    
    # Get feature names
    bow_features = count_vectorizer.get_feature_names_out()
    tfidf_features = tfidf_vectorizer.get_feature_names_out()
    
    return bow_array, bow_features, tfidf_array, tfidf_features

# Example usage
texts = ["I love machine learning", "Machine learning is fun", "Deep learning is amazing"]
bow, bow_feat, tfidf, tfidf_feat = bow_and_tfidf_representation(texts)

print("Bag of Words Representation:")
print(bow)
print("Features:", bow_feat)

print("\nTF-IDF Representation:")
print(tfidf)
print("Features:", tfidf_feat)


Bag of Words Representation:
[[0 0 0 0 1 1 1]
 [0 0 1 1 1 0 1]
 [1 1 0 1 1 0 0]]
Features: ['amazing' 'deep' 'fun' 'is' 'learning' 'love' 'machine']

TF-IDF Representation:
[[0.         0.         0.         0.         0.42544054 0.72033345
  0.54783215]
 [0.         0.         0.63174505 0.4804584  0.37311881 0.
  0.4804584 ]
 [0.5844829  0.5844829  0.         0.44451431 0.34520502 0.
  0.        ]]
Features: ['amazing' 'deep' 'fun' 'is' 'learning' 'love' 'machine']
