In [34]:
from sklearn.feature_extraction.text import TfidfVectorizer

sentences = [
    "This is a sample sentence",
    "I am interested in politics",
    "You are a very good software engineer, engineer.",
]

# Create TfidfVectorizer.
# stop_words : Get rid of english stop words. 
vectorizer = TfidfVectorizer(stop_words='english')

# Learn vocabulary from sentences. 
vectorizer.fit(sentences)

# Get vocabularies.
vectorizer.vocabulary_

{'sample': 4,
 'sentence': 5,
 'interested': 2,
 'politics': 3,
 'good': 1,
 'software': 6,
 'engineer': 0}

In [35]:
# Transform to document-term matrix
vector_spaces = vectorizer.transform(sentences)
vector_spaces.toarray()

array([[0.        , 0.        , 0.        , 0.        , 0.70710678,
        0.70710678, 0.        ],
       [0.        , 0.        , 0.70710678, 0.70710678, 0.        ,
        0.        , 0.        ],
       [0.81649658, 0.40824829, 0.        , 0.        , 0.        ,
        0.        , 0.40824829]])

In [36]:
# Show sentences and vector space representation.
# 
# (A, B) C
# A : Document Index
# B : Specific word-vector index
# C : TF-IDF score
for i, v in zip(sentences, vector_spaces):
    print(i)
    print(v)

This is a sample sentence
  (0, 5)	0.7071067811865476
  (0, 4)	0.7071067811865476
I am interested in politics
  (0, 3)	0.7071067811865476
  (0, 2)	0.7071067811865476
You are a very good software engineer, engineer.
  (0, 6)	0.40824829046386296
  (0, 1)	0.40824829046386296
  (0, 0)	0.8164965809277259
