# Abilities texts
This file shows how to get access to abilities texts.

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
from stemming.porter2 import stem

from atod import Abilities, Heroes

In [2]:
# create stop-words list
heroes = Heroes.all()
heroes_names = [h.name for h in heroes]
words_in_heroes_names = [word.lower() for name in heroes_names for word in name.split(' ')]

eng_stop_words = TfidfVectorizer(stop_words='english').get_stop_words()
stop_words = set(words_in_heroes_names + list(eng_stop_words) 
                 + ['font', 'color', '7998b5'])

No abilities for this HeroID == 16


In [3]:
# get all texts for all abilities
texts = Abilities.all().get_texts()
# get descriptions
descriptions = texts[['description', 'name']]
corpus = [a.replace('\\n', ' ').replace('%%', '%') for a in descriptions['description']]
corpus = [stem(word) for doc in corpus for word in doc.split(' ')]
corpus.extend(['stun', 'silence', 'blink'])

In [6]:
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2,  
                                   stop_words=stop_words,
                                   ngram_range=(1,3))
tfidf_vectorizer.fit(corpus)
tf_corpus = tfidf_vectorizer.transform(corpus)
tf_features_names = tfidf_vectorizer.get_feature_names()

tf_corpus.shape

(14143, 758)

In [7]:
nmf = NMF(n_components=30, init='nndsvd').fit(tf_corpus)

for topic_idx, topic in enumerate(nmf.components_):
    print("Topic %d:" % (topic_idx))
    print(", ".join([tf_features_names[i]
                    for i in topic.argsort()[:-10 - 1:-1]]))

Topic 0:
damag, zombi, everi, explod, expired, expire, experience, experi, exort, exist
Topic 1:
enemi, everi, explod, expired, expire, experience, experi, exort, exist, exhal
Topic 2:
unit, zombi, evasion, expired, expire, experience, experi, exort, exist, exhal
Topic 3:
attack, zombi, everi, explod, expired, expire, experience, experi, exort, exist
Topic 4:
deal, zombi, everi, explod, expired, expire, experience, experi, exort, exist
Topic 5:
target, duration, zombi, everi, expired, expire, experience, experi, exort, exist
Topic 6:
speed, units, creat, level, effect, grant, summon, duration, hit, type
Topic 7:
scepter, cast, level, summon, power, enemies, armor, energi, creep, current
Topic 8:
aghanim, magic, heal, effect, strike, duration, spell, friend, type, path
Topic 9:
upgradabl, units, heal, effect, strike, grant, hit, reduc, type, use
Topic 10:
hero, creat, cast, level, grant, summon, power, enemies, spell, type
Topic 11:
damage, cast, magic, effect, heal, power, enemies, dur

In [None]:
test = tfidf_vectorizer.transform([descriptions['description'][1]])
weights = nmf.transform(test)

print(weights)
print(weights.argsort())

topic_index = weights.argsort()[0][-1]

for i in nmf.components_[topic_index].argsort()[-1:]:
    print(tf_features_names[i])