In [None]:
# token to index dictionary is already in the tfidf model 
token2idx = tfidf_vectorizer.vocabulary_ 
# inverse of the dictionary 
idx2token = {v: k for k, v in token2idx.items()} 
  
# clf.coef_ yields matrix with classes as rows and tokens/features as columns 
# we don't have access to anything else, so we retrieve tokens weights from the inverse of this matrix 
idx2weight = {i: weight for i, weight in enumerate(clf.coef_.T)} 
#weight : vector of size 5  
 
top_n = 6 
  
# argsort on the clf.coef_ sorts each row (axis=1) increasingly and yields indices instead of the actual values 
argsorted_cls = np.argsort(clf.coef_, axis=1) 
# argsorted_cls: matrix of size C X D (C: number of classes, D: number of features) 
  
# we loop over the obtained, sorted indices, keeping the index number (representing the class index) 
for class_index, sorted_tokens in enumerate(argsorted_cls): 
    # using idx2target we can obtain classes actual name 
    print(f"Class {idx2token[class_index]} ({class_index}) and it's top {top_n} tokens:")
     
    # we need to inverse the obtained indices from the argsorted_cls, to make it decreasing 
    # we are interested in top 6 results 
    for token in sorted_tokens[::-1][:top_n]:  
        # we can use idx2weight to obtain back the token's weight 
        # from this we can check and verify both: 
        #   1) tokens are really ranked from top 1 to top 6 
        #   2) among classes, the highest value is being assigned to the class 
        #      to which the token has been located at as the top one 
        reformatted_weights = ', '.join([f"{x:.4f}" for x in idx2weight[token].tolist()]) 
        # idx2token allows us to obtain the token's actual name 
        print(f"Token {idx2token[token]} ({token}) has a weight:\n\t[{reformatted_weights}]") 
    print() 