## TODO
1. (One day) Write distance function which is going to use nmf classification of abilities texts.
2. write a method to binarize columns by given column name
3. would be nice to use some anchor abilities as centers
4. try KMeans without numeric variables

In [1]:
import numpy as np
import pandas as pd
from pprint import pprint
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer

from atod import Abilities

In [2]:
specs = pd.read_csv('../data/702/abilities_specs.csv')
specs.head()

Unnamed: 0,2_times,3_times,4_times,AbilityBehavior,AbilityCastPoint,AbilityCastRange,AbilityCastRangeBuffer,AbilityChannelTime,AbilityCooldown,AbilityDamage,...,wolf_bat,wolf_damage,wolf_duration,wolf_hp,wolf_index,zombie_interval,ID,damage_per_burn,lvl,per_hit
0,,,,DOTA_ABILITY_BEHAVIOR_PASSIVE,,,,,,,...,,,,,,,5003.0,0.6,2.5,46.0
1,,,,DOTA_ABILITY_BEHAVIOR_POINT | DOTA_ABILITY_BEH...,0.4,1037.5,,,8.25,,...,,,,,,,5004.0,,2.5,
2,,,,DOTA_ABILITY_BEHAVIOR_PASSIVE,,,,,,,...,,,,,,,5005.0,,2.5,
3,,,,DOTA_ABILITY_BEHAVIOR_UNIT_TARGET | DOTA_ABILI...,0.3,600.0,,,70.0,,...,,,,,,,5006.0,,2.5,
4,,,,DOTA_ABILITY_BEHAVIOR_NO_TARGET | DOTA_ABILITY...,0.4,,,,13.0,0.0,...,,,,,,,5007.0,,2.5,


In [3]:
columns_to_drop = ['lvl', 'AbilityCastRangeBuffer', 'levelkey', 
                   'HotKeyOverride', 'AbilityTextureName',
                   'LevelsBetweenUpgrades', '_sa_instance_state',
                   'OnCastbar', 'OnLearnbar', 'LinkedSpecialBonus',
                   'ID', 'pk']
specs = specs.drop(columns_to_drop, axis=1)
specs = specs.dropna(axis=1, thresh=4)
specs.shape

(519, 97)

In [4]:
# encode AbilityBehavior
# get all possible values
behavior = specs.AbilityBehavior
behavior = behavior.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(behavior)
binary_behavior = pd.DataFrame(mlb.transform(behavior),
                               columns=mlb.classes_).drop([''], axis=1)

X = pd.concat([specs.drop(['AbilityBehavior'], axis=1), binary_behavior], axis=1)

In [5]:
flags = X.AbilityUnitTargetFlags
flags = flags.fillna(value='DOTA_UNIT_TARGET_FLAG_NONE')
flags = flags.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(flags)
binary_flags = pd.DataFrame(mlb.transform(flags), columns=mlb.classes_)

X = pd.concat([X.drop(['AbilityUnitTargetFlags'], axis=1), binary_flags], axis=1)

In [6]:
target_team = X.AbilityUnitTargetTeam
target_team = target_team.fillna(value='DOTA_UNIT_TARGET_TEAM_NONE')
target_team = target_team.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(target_team)
binary_team = pd.DataFrame(mlb.transform(target_team), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['AbilityUnitTargetTeam'], axis=1), binary_team], axis=1)

In [7]:
target_type = X.AbilityUnitTargetType
target_type = target_type.fillna(value='DOTA_UNIT_TARGET_NONE')
target_type = target_type.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(target_type)
binary_type = pd.DataFrame(mlb.transform(target_type), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['AbilityUnitTargetType'], axis=1), binary_type], axis=1)

In [8]:
immunity = X.SpellImmunityType
immunity = immunity.fillna(value='SPELL_IMMUNITY_NONE')
immunity = immunity.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(immunity)
binary_immunity = pd.DataFrame(mlb.transform(immunity), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['SpellDispellableType'], axis=1), 
               binary_immunity], axis=1)

In [9]:
dispel = X.SpellImmunityType
dispel = dispel.fillna(value='DOTA_UNIT_TARGET_NONE')
dispel = dispel.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(dispel)
binary_dispel = pd.DataFrame(mlb.transform(dispel), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['SpellImmunityType'], axis=1), 
               binary_dispel], axis=1)

In [10]:
X['AbilityType'] = X['AbilityType'].apply(
                    lambda x: 1 if x == 'DOTA_ABILITY_TYPE_ULTIMATE' else 0)

X['isUltimate'] = X['AbilityType']
X = X.drop(['AbilityType'], axis=1)

In [11]:
X['AbilityUnitDamageType'] = X['AbilityUnitDamageType'].apply(
                              lambda x: 'DAMAGE_TYPE_NONE' if x is np.NaN else x)

lb = LabelBinarizer().fit(X['AbilityUnitDamageType'])
dmg_type = pd.DataFrame(lb.transform(X['AbilityUnitDamageType']),
                        columns=lb.classes_)

X = pd.concat([X.drop(['AbilityUnitDamageType'], axis=1), dmg_type], axis=1)

In [12]:
X['MaxLevel'] = X['MaxLevel'].fillna(value=3)
X = X.fillna(value=0)

In [13]:
km = KMeans(n_clusters=40, tol=1e-6)
km.fit(X.drop(['name'], axis=1))

ability_by_label = dict()
for name, label in zip(X['name'], km.labels_):
    ability_by_label.setdefault(str(label), [])
    ability_by_label[str(label)].append(name)

pprint(ability_by_label)

{'0': ['silence',
       'shadowraze3',
       'dream_coil',
       'silence',
       'arc_lightning',
       'crystal_nova',
       'midnight_pulse',
       'chronosphere',
       'split_earth',
       'lightning_storm',
       'ion_shell',
       'purification',
       'liquid_fire',
       'sticky_napalm',
       'scorched_earth',
       'chilling_touch',
       'call_down',
       'acid_spray',
       'unstable_concoction',
       'curse_of_the_silent',
       'chaos_meteor',
       'sanity_eclipse',
       'soul_catcher',
       'decay',
       'telekinesis',
       'fade_bolt',
       'thunder_strike',
       'kinetic_field',
       'static_storm',
       'mystic_snake',
       'overwhelming_odds',
       'suicide',
       'sleight_of_fist',
       'firestorm',
       'pit_of_malice',
       'false_promise',
       'winters_curse',
       'magnetic_field',
       'mind_over_matter'],
 '1': ['activate_fire_remnant'],
 '10': ['ball_lightning', 'whirling_axes_melee'],
 '11': ['power

In [14]:
print(len(list(specs.columns)))

97


In [15]:
dbscan = DBSCAN(eps=.2, min_samples=3, metric='cityblock')
dbscan.fit(X.drop(['name'], axis=1))

ability_by_label = dict()
for name, label in zip(X['name'], km.labels_):
    ability_by_label.setdefault(str(label), [])
    ability_by_label[str(label)].append(name)
    
pprint(ability_by_label)

{'0': ['silence',
       'shadowraze3',
       'dream_coil',
       'silence',
       'arc_lightning',
       'crystal_nova',
       'midnight_pulse',
       'chronosphere',
       'split_earth',
       'lightning_storm',
       'ion_shell',
       'purification',
       'liquid_fire',
       'sticky_napalm',
       'scorched_earth',
       'chilling_touch',
       'call_down',
       'acid_spray',
       'unstable_concoction',
       'curse_of_the_silent',
       'chaos_meteor',
       'sanity_eclipse',
       'soul_catcher',
       'decay',
       'telekinesis',
       'fade_bolt',
       'thunder_strike',
       'kinetic_field',
       'static_storm',
       'mystic_snake',
       'overwhelming_odds',
       'suicide',
       'sleight_of_fist',
       'firestorm',
       'pit_of_malice',
       'false_promise',
       'winters_curse',
       'magnetic_field',
       'mind_over_matter'],
 '1': ['activate_fire_remnant'],
 '10': ['ball_lightning', 'whirling_axes_melee'],
 '11': ['power