## TODO
1. (One day) Write distance function which is going to use nmf classification of abilities texts.
2. write a method to binarize columns by given column name
3. would be nice to use some anchor abilities as centers
4. try KMeans without numeric variables

In [1]:
import numpy as np
import pandas as pd
from pprint import pprint
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer

from atod import Abilities

In [2]:
specs = pd.read_csv('../atod/data/702/abilities_specs.csv')
specs.head()

Unnamed: 0,2_times,3_times,4_times,AbilityBehavior,AbilityCastPoint,AbilityCastRange,AbilityCastRangeBuffer,AbilityChannelTime,AbilityCooldown,AbilityDamage,...,wolf_bat,wolf_damage,wolf_duration,wolf_hp,wolf_index,zombie_interval,ID,damage_per_burn,lvl,per_hit
0,,,,DOTA_ABILITY_BEHAVIOR_PASSIVE,,,,,,,...,,,,,,,5003.0,0.6,2.5,46.0
1,,,,DOTA_ABILITY_BEHAVIOR_POINT | DOTA_ABILITY_BEH...,0.4,1037.5,,,8.25,,...,,,,,,,5004.0,,2.5,
2,,,,DOTA_ABILITY_BEHAVIOR_PASSIVE,,,,,,,...,,,,,,,5005.0,,2.5,
3,,,,DOTA_ABILITY_BEHAVIOR_UNIT_TARGET | DOTA_ABILI...,0.3,600.0,,,70.0,,...,,,,,,,5006.0,,2.5,
4,,,,DOTA_ABILITY_BEHAVIOR_NO_TARGET | DOTA_ABILITY...,0.4,,,,13.0,0.0,...,,,,,,,5007.0,,2.5,


In [3]:
columns_to_drop = ['lvl', 'AbilityCastRangeBuffer', 'levelkey', 
                   'HotKeyOverride', 'AbilityTextureName',
                   'LevelsBetweenUpgrades', '_sa_instance_state',
                   'OnCastbar', 'OnLearnbar', 'LinkedSpecialBonus',
                   'ID', 'pk']
specs = specs.drop(columns_to_drop, axis=1)
specs = specs.dropna(axis=1, thresh=4)
specs.shape

(519, 97)

In [4]:
# encode AbilityBehavior
# get all possible values
behavior = specs.AbilityBehavior
behavior = behavior.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(behavior)
binary_behavior = pd.DataFrame(mlb.transform(behavior),
                               columns=mlb.classes_).drop([''], axis=1)

X = pd.concat([specs.drop(['AbilityBehavior'], axis=1), binary_behavior], axis=1)

In [5]:
flags = X.AbilityUnitTargetFlags
flags = flags.fillna(value='DOTA_UNIT_TARGET_FLAG_NONE')
flags = flags.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(flags)
binary_flags = pd.DataFrame(mlb.transform(flags), columns=mlb.classes_)

X = pd.concat([X.drop(['AbilityUnitTargetFlags'], axis=1), binary_flags], axis=1)

In [6]:
target_team = X.AbilityUnitTargetTeam
target_team = target_team.fillna(value='DOTA_UNIT_TARGET_TEAM_NONE')
target_team = target_team.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(target_team)
binary_team = pd.DataFrame(mlb.transform(target_team), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['AbilityUnitTargetTeam'], axis=1), binary_team], axis=1)

In [7]:
target_type = X.AbilityUnitTargetType
target_type = target_type.fillna(value='DOTA_UNIT_TARGET_NONE')
target_type = target_type.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(target_type)
binary_type = pd.DataFrame(mlb.transform(target_type), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['AbilityUnitTargetType'], axis=1), binary_type], axis=1)

In [8]:
immunity = X.SpellImmunityType
immunity = immunity.fillna(value='SPELL_IMMUNITY_NONE')
immunity = immunity.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(immunity)
binary_immunity = pd.DataFrame(mlb.transform(immunity), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['SpellDispellableType'], axis=1), 
               binary_immunity], axis=1)

In [9]:
dispel = X.SpellImmunityType
dispel = dispel.fillna(value='DOTA_UNIT_TARGET_NONE')
dispel = dispel.str.split('|').apply(lambda x: [y.strip() for y in x])

mlb = MultiLabelBinarizer().fit(dispel)
binary_dispel = pd.DataFrame(mlb.transform(dispel), 
                           columns=mlb.classes_)

X = pd.concat([X.drop(['SpellImmunityType'], axis=1), 
               binary_dispel], axis=1)

In [10]:
X['AbilityType'] = X['AbilityType'].apply(
                    lambda x: 1 if x == 'DOTA_ABILITY_TYPE_ULTIMATE' else 0)

X['isUltimate'] = X['AbilityType']
X = X.drop(['AbilityType'], axis=1)

In [11]:
X['AbilityUnitDamageType'] = X['AbilityUnitDamageType'].apply(
                              lambda x: 'DAMAGE_TYPE_NONE' if x is np.NaN else x)

lb = LabelBinarizer().fit(X['AbilityUnitDamageType'])
dmg_type = pd.DataFrame(lb.transform(X['AbilityUnitDamageType']),
                        columns=lb.classes_)

X = pd.concat([X.drop(['AbilityUnitDamageType'], axis=1), dmg_type], axis=1)

In [12]:
X['MaxLevel'] = X['MaxLevel'].fillna(value=3)
X = X.fillna(value=0)

In [13]:
km = KMeans(n_clusters=40, tol=1e-6)
km.fit(X.drop(['name'], axis=1))

ability_by_label = dict()
for name, label in zip(X['name'], km.labels_):
    ability_by_label.setdefault(str(label), [])
    ability_by_label[str(label)].append(name)

pprint(ability_by_label)

{'0': ['mana_break',
       'spell_shield',
       'culling_blade',
       'thirst',
       'trueshot',
       'enchant_totem',
       'aftershock',
       'blade_dance',
       'tidebringer',
       'fiery_soul',
       'invis',
       'morph',
       'morph_agi',
       'morph_str',
       'morph_replicate',
       'necromastery',
       'juxtapose',
       'phantom_edge',
       'ethereal_jaunt',
       'phase_shift',
       'dismember',
       'unstable_current',
       'mortal_strike',
       'great_cleave',
       'gods_strength',
       'electric_vortex',
       'overload',
       'craggy_exterior',
       'grow',
       'thundergods_wrath',
       'sprint',
       'bash',
       'kraken_shell',
       'brilliance_aura',
       'permanent_invisibility',
       'black_hole',
       'rearm',
       'headshot',
       'take_aim',
       'sadist',
       'golem_permanent_immolation',
       'call_of_the_wild',
       'hawk_invisibility',
       'boar_poison',
       'poison_sting',


In [14]:
print(len(list(specs.columns)))

97


In [15]:
dbscan = DBSCAN(eps=.2, min_samples=3, metric='cityblock')
dbscan.fit(X.drop(['name'], axis=1))

ability_by_label = dict()
for name, label in zip(X['name'], km.labels_):
    ability_by_label.setdefault(str(label), [])
    ability_by_label[str(label)].append(name)
    
pprint(ability_by_label)

{'0': ['mana_break',
       'spell_shield',
       'culling_blade',
       'thirst',
       'trueshot',
       'enchant_totem',
       'aftershock',
       'blade_dance',
       'tidebringer',
       'fiery_soul',
       'invis',
       'morph',
       'morph_agi',
       'morph_str',
       'morph_replicate',
       'necromastery',
       'juxtapose',
       'phantom_edge',
       'ethereal_jaunt',
       'phase_shift',
       'dismember',
       'unstable_current',
       'mortal_strike',
       'great_cleave',
       'gods_strength',
       'electric_vortex',
       'overload',
       'craggy_exterior',
       'grow',
       'thundergods_wrath',
       'sprint',
       'bash',
       'kraken_shell',
       'brilliance_aura',
       'permanent_invisibility',
       'black_hole',
       'rearm',
       'headshot',
       'take_aim',
       'sadist',
       'golem_permanent_immolation',
       'call_of_the_wild',
       'hawk_invisibility',
       'boar_poison',
       'poison_sting',


In [16]:
a = specs[specs['name'] == 'silence'].dropna(axis=1, how='all')
a.head()

Unnamed: 0,AbilityBehavior,AbilityCastPoint,AbilityCastRange,AbilityCooldown,AbilityDamage,AbilityDuration,AbilityManaCost,FightRecapLevel,SpellDispellableType,SpellImmunityType,duration,name,radius
16,DOTA_ABILITY_BEHAVIOR_AOE | DOTA_ABILITY_BEHAV...,0.4,900.0,14.5,0.0,4.5,90.0,1.0,SPELL_DISPELLABLE_YES,SPELL_IMMUNITY_ENEMIES_NO,4.5,silence,300.0
82,DOTA_ABILITY_BEHAVIOR_AOE | DOTA_ABILITY_BEHAV...,0.5,900.0,13.5,,4.5,80.0,,SPELL_DISPELLABLE_YES,SPELL_IMMUNITY_ENEMIES_NO,4.5,silence,425.0


In [17]:
print(specs['duration'].describe())
print(specs['AbilityDuration'].describe())


count    228.000000
mean      17.807346
std       46.467843
min        0.500000
25%        4.000000
50%        6.500000
75%       15.000000
max      600.000000
Name: duration, dtype: float64
count    80.000000
mean      9.689812
std       9.111374
min       0.000000
25%       3.402500
50%       5.750000
75%      13.250000
max      40.000000
Name: AbilityDuration, dtype: float64


In [18]:
wow = specs[specs['duration'] == 600].dropna(axis=1, how='all')
wow.head()

Unnamed: 0,AbilityBehavior,AbilityCastPoint,AbilityCastRange,AbilityCooldown,AbilityDamage,AbilityManaCost,AbilityType,AbilityUnitDamageType,HasScepterUpgrade,SpellImmunityType,duration,name,radius,vision_duration,vision_radius
459,DOTA_ABILITY_BEHAVIOR_POINT | DOTA_ABILITY_BEH...,1.0,500.0,10.0,450.0,246.666667,DOTA_ABILITY_TYPE_ULTIMATE,DAMAGE_TYPE_MAGICAL,1.0,SPELL_IMMUNITY_ENEMIES_NO,600.0,remote_mines,425.0,3.0,500.0


In [19]:
corr = specs.corr()
# there are 2 very similar columns AbilityDuration and duration.
# they there left as is because some abilities have 2 durations:
# duration of stun and duration of slow for example.
print(corr['AbilityDuration']['duration'])
corr.head(10)

0.946385036281


Unnamed: 0,AbilityCastPoint,AbilityCastRange,AbilityChannelTime,AbilityCooldown,AbilityDamage,AbilityDuration,AbilityManaCost,AbilityModifierSupportBonus,AbilityModifierSupportValue,CalculateSpellDamageTooltip,...,stun,stun_duration,tick_interval,tick_rate,total_damage,transformation_time,vision,vision_duration,vision_radius,width
AbilityCastPoint,1.0,-0.05762,-0.035359,0.109001,0.074305,-0.039983,0.161782,-0.528439,-0.038064,-0.278236,...,0.966788,0.169499,-0.137262,0.052527,0.84027,,0.510858,-0.206439,0.017562,0.266958
AbilityCastRange,-0.05762,1.0,0.093588,-0.054303,0.007771,-0.057877,0.025149,-0.975417,-0.117079,-0.34641,...,0.693253,-0.255013,0.091893,0.039942,0.968309,,0.448456,0.110587,-0.208976,-0.231706
AbilityChannelTime,-0.035359,0.093588,1.0,0.338586,-0.490522,,0.349292,,-0.998033,,...,,,,1.0,-1.0,,,,0.987829,
AbilityCooldown,0.109001,-0.054303,0.338586,1.0,0.108005,0.3251,0.435086,-0.215375,-0.15753,-0.04934,...,0.640529,-0.08894,-0.107227,-0.592137,0.254605,-0.418323,0.864358,0.151425,0.260934,0.959416
AbilityDamage,0.074305,0.007771,-0.490522,0.108005,1.0,-0.190768,0.436528,0.812392,-0.293812,-0.917273,...,0.439186,-0.089634,-0.946729,-0.341644,,,0.736366,-0.075727,0.47065,0.56169
AbilityDuration,-0.039983,-0.057877,,0.3251,-0.190768,1.0,0.128627,-0.528604,-0.293121,,...,,-0.249354,-1.0,,,,,,,0.943483
AbilityManaCost,0.161782,0.025149,0.349292,0.435086,0.436528,0.128627,1.0,-0.19649,-0.338138,-0.247456,...,0.210674,0.003476,-0.026331,-0.62376,0.792128,-0.489887,0.546979,0.168106,0.378375,0.735394
AbilityModifierSupportBonus,-0.528439,-0.975417,,-0.215375,0.812392,-0.528604,-0.19649,1.0,,,...,,,,,,,,,,
AbilityModifierSupportValue,-0.038064,-0.117079,-0.998033,-0.15753,-0.293812,-0.293121,-0.338138,,1.0,-0.684186,...,,0.216217,-1.0,1.0,,,,,-0.188982,
CalculateSpellDamageTooltip,-0.278236,-0.34641,,-0.04934,-0.917273,,-0.247456,,-0.684186,1.0,...,,,,,,,,,,
