In [8]:
text = """The Aston Martin DB11 is a two-door grand touring car. 
It was manufactured as both a coupe and a convertible, the latter known as the Volante. 
The British carmaker Aston Martin produced the DB11 from 2016 to 2023 when it was replaced by the DB12. 
The DB11 succeeded the DB9, which the company made between 2004 and 2016. 
Designed by Marek Reichman, who became lead designer in May 2005, the DB11 debuted at the Geneva Motor Show in March 2016. 
The first model of Aston Martin's "second-century plan",[2][3] the DB11—like its predecessor and its platform siblings—incorporates aluminium extensively throughout its body.
Official manufacture of the DB11 began at the Aston Martin facility in Gaydon, Warwickshire, in September 2016. 
Two engine configurations of the DB11 were available: a 4.0-litre V8-engine model produced by Mercedes-AMG and a 5.2-litre V12-engine model produced by Aston Martin. 
The Volante version of the DB11 was introduced in October 2017. In 2018, Aston Martin and its racing division replaced the DB11 V12 with the DB11 V12 AMR, which brought an increased engine output. The V8-powered model also received an enhancement in engine performance in 2021."""
text = text.split("\n")

In [9]:
import re
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to C:\Users\Abhi K
[nltk_data]     Thakkar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Abhi K
[nltk_data]     Thakkar\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [10]:
corpus = []
for i in range(len(text)):
    review = re.sub('[^a-zA-Z]', ' ', text[i])
    review = review.lower()
    review = review.split()
    review = [lemmatizer.lemmatize(word) for word in review if word not in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)
corpus

['aston martin db two door grand touring car',
 'manufactured coupe convertible latter known volante',
 'british carmaker aston martin produced db replaced db',
 'db succeeded db company made',
 'designed marek reichman became lead designer may db debuted geneva motor show march',
 'first model aston martin second century plan db like predecessor platform sibling incorporates aluminium extensively throughout body',
 'official manufacture db began aston martin facility gaydon warwickshire september',
 'two engine configuration db available litre v engine model produced mercedes amg litre v engine model produced aston martin',
 'volante version db introduced october aston martin racing division replaced db v db v amr brought increased engine output v powered model also received enhancement engine performance']

In [11]:
#Create TF-IDF model
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(max_features=100)

In [12]:
X = tfidf.fit_transform(corpus).toarray()
X

array([[0.        , 0.        , 0.        , 0.        , 0.22304468,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.42900567, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.18172723, 0.        ,
        0.        , 0.        , 0.        , 0.42900567, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.42900567, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.22304468, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.42900567,
        0.36234502, 0.        , 0.        , 0.  

In [13]:
tfidf.vocabulary_

{'aston': np.int64(4),
 'martin': np.int64(45),
 'db': np.int64(18),
 'two': np.int64(70),
 'door': np.int64(23),
 'grand': np.int64(31),
 'touring': np.int64(69),
 'car': np.int64(11),
 'manufactured': np.int64(42),
 'coupe': np.int64(17),
 'convertible': np.int64(16),
 'latter': np.int64(36),
 'known': np.int64(35),
 'volante': np.int64(72),
 'british': np.int64(9),
 'carmaker': np.int64(12),
 'produced': np.int64(58),
 'replaced': np.int64(62),
 'succeeded': np.int64(67),
 'company': np.int64(14),
 'made': np.int64(40),
 'designed': np.int64(20),
 'marek': np.int64(44),
 'reichman': np.int64(61),
 'became': np.int64(6),
 'lead': np.int64(37),
 'designer': np.int64(21),
 'may': np.int64(46),
 'debuted': np.int64(19),
 'geneva': np.int64(30),
 'motor': np.int64(49),
 'show': np.int64(65),
 'march': np.int64(43),
 'first': np.int64(28),
 'model': np.int64(48),
 'second': np.int64(63),
 'century': np.int64(13),
 'plan': np.int64(54),
 'like': np.int64(38),
 'predecessor': np.int64(57),
