**The following python code would build a TF-IDF model for the given corpus:**

In [0]:
# Convert a collection of raw documents to a matrix of TF-IDF features.
from sklearn.feature_extraction.text import TfidfVectorizer
import operator
# Example data set
corpus=["this car got the excellence award",\
         "good car gives good mileage",\
         "this car is very expensive",\
         "the company is growing with very high production",\
         "this company is financially good"]


In [35]:
# Formming vocabulary set from the example data set using pure python
vocabulary = set()
for doc in corpus:
  vocabulary.update(doc.split())

vocabulary = list(vocabulary)
print(vocabulary)

['good', 'the', 'excellence', 'got', 'growing', 'award', 'gives', 'very', 'expensive', 'high', 'this', 'car', 'is', 'with', 'financially', 'mileage', 'production', 'company']


In [48]:
tfidf = TfidfVectorizer(vocabulary=vocabulary)

tfidf.build_preprocessor().  #Return a function to preprocess the text before tokenization.
tfidf.build_tokenizer(). # Return a function that splits a string into a sequence of tokens.
tfidf.fit(corpus) # Learn vocabulary and idf from training set.

print(tfidf.get_feature_names())  # Array mapping from feature integer indices to feature name.




['good', 'the', 'excellence', 'got', 'growing', 'award', 'gives', 'very', 'expensive', 'high', 'this', 'car', 'is', 'with', 'financially', 'mileage', 'production', 'company']
<built-in method findall of _sre.SRE_Pattern object at 0x7fa7feab1288>


In [46]:
for doc in corpus:
  score = {}
  print(doc)
  X = tfidf.transform([doc])  # Transform documents to document-term matrix. Uses the vocabulary and document frequencies (df) learned by fit (or fit_transform).
  for word in doc.split():
    score[word] = X[0, tfidf.vocabulary_[word]]
  sorted_score = sorted(score.items(), key=operator.itemgetter(1), reverse=True)
  print("\t", sorted_score)



this car got the excellence award
	 [('got', 0.4689132131547637), ('excellence', 0.4689132131547637), ('award', 0.4689132131547637), ('the', 0.3783162278555838), ('this', 0.3140366438234139), ('car', 0.3140366438234139)]
good car gives good mileage
	 [('good', 0.7178821805115433), ('gives', 0.4448982295027494), ('mileage', 0.4448982295027494), ('car', 0.2979535293877717)]
this car is very expensive
	 [('expensive', 0.5776914793752232), ('very', 0.4660778481185906), ('this', 0.38688671647327205), ('car', 0.38688671647327205), ('is', 0.38688671647327205)]
the company is growing with very high production
	 [('growing', 0.39524574252810757), ('with', 0.39524574252810757), ('high', 0.39524574252810757), ('production', 0.39524574252810757), ('the', 0.31888177640211135), ('company', 0.31888177640211135), ('very', 0.31888177640211135), ('is', 0.26470068018333703)]
this company is financially good
	 [('financially', 0.5591166343026757), ('company', 0.4510917800707943), ('good', 0.45109178007079