In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

In [2]:
def encode(data):
    res = []
    for i in range(0,len(data)):
        t0 = data.iloc[i][0]
        t1 = data.iloc[i][1]
        tdict0 = {x: 1 for x in t0}
        tdict0.update({y+135: 1 for y in t1})
        res += [tdict0]
    return pd.DataFrame.from_dict(res)

In [3]:
# Data
df = pd.read_csv("data.zip")

In [4]:
# Seperate Outcome Col
radiant_win = df["radiant_win"]
del df
radiant_win

0          False
1           True
2          False
3          False
4          False
           ...  
5861247    False
5861248    False
5861249    False
5861250     True
5861251     True
Name: radiant_win, Length: 5861252, dtype: bool

In [5]:
training_data_encoded = pd.read_csv("training_data_encoded_270.zip")
training_data_encoded.columns = training_data_encoded.columns.astype(int)

In [6]:
training_data_encoded

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,248,249,254,255,256,258,261,263,264,270
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5861247,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
5861248,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5861249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5861250,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Create models
models = [
    #GaussianNB(),
    LogisticRegression()
]

In [8]:
res = pd.DataFrame(columns = ['LogisticRegression_ACC', 'LogisticRegression_AUC'])
res

Unnamed: 0,LogisticRegression_ACC,LogisticRegression_AUC


In [9]:
for i in range(5):
    # Split Data -> use same set of split data for each model for even comparisons
    X_train, X_test, out_train, out_test = train_test_split(training_data_encoded, radiant_win, test_size=0.1)
    acc = []
    auc = []
    for model in models:
        print("Iteration: ", i, "  Model: ",model)
        name = str(type(model)).split('.')[-1][:-2]

        model.fit(X_train, out_train)
        out_pred = model.predict(X_test)
        
        acc.append(accuracy_score(out_test, out_pred))
        auc.append(roc_auc_score(out_test, out_pred))
    
    # Store in res dataframe
    res.loc[i]=[acc,auc]
    print("ACC: ", acc, "\nAUC: ", auc)

Iteration:  0   Model:  LogisticRegression()
ACC:  [0.5636108959507] 
AUC:  [0.5576283478908055]
Iteration:  1   Model:  LogisticRegression()
ACC:  [0.564769349934997] 
AUC:  [0.5590444439965836]
Iteration:  2   Model:  LogisticRegression()
ACC:  [0.5648546558248567] 
AUC:  [0.5589720567010418]
Iteration:  3   Model:  LogisticRegression()
ACC:  [0.5650610960783177] 
AUC:  [0.558967740528767]
Iteration:  4   Model:  LogisticRegression()
ACC:  [0.5648819537096119] 
AUC:  [0.5587613754208247]


In [10]:
res

Unnamed: 0,LogisticRegression_ACC,LogisticRegression_AUC
0,[0.5636108959507],[0.5576283478908055]
1,[0.564769349934997],[0.5590444439965836]
2,[0.5648546558248567],[0.5589720567010418]
3,[0.5650610960783177],[0.558967740528767]
4,[0.5648819537096119],[0.5587613754208247]


In [11]:
import json
import requests
heroStats = 'https://api.opendota.com/api/heroStats'
r = requests.get(heroStats)
data = json.loads(r.text)
df = pd.DataFrame(data)
# Reference
hero_name_number = df[['id','localized_name']].set_index('id')
print(hero_name_number.to_string())

          localized_name
id                      
1              Anti-Mage
2                    Axe
3                   Bane
4            Bloodseeker
5         Crystal Maiden
6            Drow Ranger
7            Earthshaker
8             Juggernaut
9                 Mirana
10             Morphling
11          Shadow Fiend
12        Phantom Lancer
13                  Puck
14                 Pudge
15                 Razor
16             Sand King
17          Storm Spirit
18                  Sven
19                  Tiny
20       Vengeful Spirit
21            Windranger
22                  Zeus
23                Kunkka
25                  Lina
26                  Lion
27         Shadow Shaman
28               Slardar
29            Tidehunter
30          Witch Doctor
31                  Lich
32                  Riki
33                Enigma
34                Tinker
35                Sniper
36             Necrophos
37               Warlock
38           Beastmaster
39         Queen of Pain


In [12]:
vals = pd.DataFrame([
        [[14,53,77,19,38],[108,82,55,48,68]],  # Bad vs Good
        [[53,77,19,14,38],[55,68,82,48,108]],  # Bad reshuffled vs Good reshuffled
        [[108,82,55,48,68],[14,53,77,19,38]],  # Good vs Bad
        [[81,27,53,29,111],[26,109,96,74,64]], # Ck,ss,np,tide,oracle vs Lion,tb,centar, invoker,jakiro
        [[26,109,96,74,64],[81,27,53,29,111]], # Lion,tb,centar, invoker,jakiro vs Ck,ss,np,tide,oracle 
        [[1,4,6,8,12],[81,27,53,29,111]],      # am,bs,drow,jugg,pl vs Ck,ss,np,tide,oracle 
        [[81,27,53,29,111],[1,4,6,8,12]],      # reverse
        [[3,5,79,83,102],[1,4,6,8,12]],        # cm,bane,sd,tree,aba vs carries
        [[3,5,79,83,102],[81,27,53,29,111]],   # supports vs regular lineup
        [[14,105,82,53,21],[81,27,53,29,111]], # pudge,techies,meepo,np,wr vs regular
        [[81,27,53,29,111],[14,105,82,53,21]], # reverse 
       ])
vals_encoded = encode(vals)
other_cols = training_data_encoded.iloc[40:42]
vals_encoded = pd.concat([other_cols, vals_encoded]).fillna(value=0)
vals_encoded = vals_encoded.drop([40,41])
#print(test)
vals.columns=['Radiant','Dire']
vals

Unnamed: 0,Radiant,Dire
0,"[14, 53, 77, 19, 38]","[108, 82, 55, 48, 68]"
1,"[53, 77, 19, 14, 38]","[55, 68, 82, 48, 108]"
2,"[108, 82, 55, 48, 68]","[14, 53, 77, 19, 38]"
3,"[81, 27, 53, 29, 111]","[26, 109, 96, 74, 64]"
4,"[26, 109, 96, 74, 64]","[81, 27, 53, 29, 111]"
5,"[1, 4, 6, 8, 12]","[81, 27, 53, 29, 111]"
6,"[81, 27, 53, 29, 111]","[1, 4, 6, 8, 12]"
7,"[3, 5, 79, 83, 102]","[1, 4, 6, 8, 12]"
8,"[3, 5, 79, 83, 102]","[81, 27, 53, 29, 111]"
9,"[14, 105, 82, 53, 21]","[81, 27, 53, 29, 111]"


In [13]:
hero_names = []
for i,r in vals.iterrows():
    rad = r[0]
    rad_names = []
    dire = r[1]
    dire_names = []
    for rhero,dhero in zip(rad,dire):
        rad_names.append(hero_name_number.loc[rhero]['localized_name'])
        dire_names.append(hero_name_number.loc[dhero]['localized_name'])
    hero_names += [[rad_names, dire_names]]
        
hero_names = pd.DataFrame(hero_names)
hero_names.columns = ['Radiant Heroes', 'Dire Heroes']
hero_names

Unnamed: 0,Radiant Heroes,Dire Heroes
0,"[Pudge, Nature's Prophet, Lycan, Tiny, Beastma...","[Underlord, Meepo, Dark Seer, Luna, Ancient Ap..."
1,"[Nature's Prophet, Lycan, Tiny, Pudge, Beastma...","[Dark Seer, Ancient Apparition, Meepo, Luna, U..."
2,"[Underlord, Meepo, Dark Seer, Luna, Ancient Ap...","[Pudge, Nature's Prophet, Lycan, Tiny, Beastma..."
3,"[Chaos Knight, Shadow Shaman, Nature's Prophet...","[Lion, Terrorblade, Centaur Warrunner, Invoker..."
4,"[Lion, Terrorblade, Centaur Warrunner, Invoker...","[Chaos Knight, Shadow Shaman, Nature's Prophet..."
5,"[Anti-Mage, Bloodseeker, Drow Ranger, Juggerna...","[Chaos Knight, Shadow Shaman, Nature's Prophet..."
6,"[Chaos Knight, Shadow Shaman, Nature's Prophet...","[Anti-Mage, Bloodseeker, Drow Ranger, Juggerna..."
7,"[Bane, Crystal Maiden, Shadow Demon, Treant Pr...","[Anti-Mage, Bloodseeker, Drow Ranger, Juggerna..."
8,"[Bane, Crystal Maiden, Shadow Demon, Treant Pr...","[Chaos Knight, Shadow Shaman, Nature's Prophet..."
9,"[Pudge, Techies, Meepo, Nature's Prophet, Wind...","[Chaos Knight, Shadow Shaman, Nature's Prophet..."


In [14]:
for model in models: 
    print(model)
    #res+=[model.predict(test)]
    prob=pd.DataFrame(model.predict_proba(vals_encoded))
    prob.columns=['Dire_Win','Radiant_Win']
    prob=prob[['Radiant_Win','Dire_Win']]

prob

LogisticRegression()


Unnamed: 0,Radiant_Win,Dire_Win
0,0.1421,0.8579
1,0.1421,0.8579
2,0.893269,0.106731
3,0.46276,0.53724
4,0.580274,0.419726
5,0.690464,0.309536
6,0.364739,0.635261
7,0.432592,0.567408
8,0.600911,0.399089
9,0.551503,0.448497


In [22]:
pred_res = pd.concat([vals, hero_names,prob],axis=1)
pd.set_option('max_colwidth', 400)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  
    #pred_res.style.set_properties(subset=['Radiant Heroes', 'Dire Heroes'], **{'width': '1000px'})
    display(pred_res)

Unnamed: 0,Radiant,Dire,Radiant Heroes,Dire Heroes,Radiant_Win,Dire_Win
0,"[14, 53, 77, 19, 38]","[108, 82, 55, 48, 68]","[Pudge, Nature's Prophet, Lycan, Tiny, Beastmaster]","[Underlord, Meepo, Dark Seer, Luna, Ancient Apparition]",0.1421,0.8579
1,"[53, 77, 19, 14, 38]","[55, 68, 82, 48, 108]","[Nature's Prophet, Lycan, Tiny, Pudge, Beastmaster]","[Dark Seer, Ancient Apparition, Meepo, Luna, Underlord]",0.1421,0.8579
2,"[108, 82, 55, 48, 68]","[14, 53, 77, 19, 38]","[Underlord, Meepo, Dark Seer, Luna, Ancient Apparition]","[Pudge, Nature's Prophet, Lycan, Tiny, Beastmaster]",0.893269,0.106731
3,"[81, 27, 53, 29, 111]","[26, 109, 96, 74, 64]","[Chaos Knight, Shadow Shaman, Nature's Prophet, Tidehunter, Oracle]","[Lion, Terrorblade, Centaur Warrunner, Invoker, Jakiro]",0.46276,0.53724
4,"[26, 109, 96, 74, 64]","[81, 27, 53, 29, 111]","[Lion, Terrorblade, Centaur Warrunner, Invoker, Jakiro]","[Chaos Knight, Shadow Shaman, Nature's Prophet, Tidehunter, Oracle]",0.580274,0.419726
5,"[1, 4, 6, 8, 12]","[81, 27, 53, 29, 111]","[Anti-Mage, Bloodseeker, Drow Ranger, Juggernaut, Phantom Lancer]","[Chaos Knight, Shadow Shaman, Nature's Prophet, Tidehunter, Oracle]",0.690464,0.309536
6,"[81, 27, 53, 29, 111]","[1, 4, 6, 8, 12]","[Chaos Knight, Shadow Shaman, Nature's Prophet, Tidehunter, Oracle]","[Anti-Mage, Bloodseeker, Drow Ranger, Juggernaut, Phantom Lancer]",0.364739,0.635261
7,"[3, 5, 79, 83, 102]","[1, 4, 6, 8, 12]","[Bane, Crystal Maiden, Shadow Demon, Treant Protector, Abaddon]","[Anti-Mage, Bloodseeker, Drow Ranger, Juggernaut, Phantom Lancer]",0.432592,0.567408
8,"[3, 5, 79, 83, 102]","[81, 27, 53, 29, 111]","[Bane, Crystal Maiden, Shadow Demon, Treant Protector, Abaddon]","[Chaos Knight, Shadow Shaman, Nature's Prophet, Tidehunter, Oracle]",0.600911,0.399089
9,"[14, 105, 82, 53, 21]","[81, 27, 53, 29, 111]","[Pudge, Techies, Meepo, Nature's Prophet, Windranger]","[Chaos Knight, Shadow Shaman, Nature's Prophet, Tidehunter, Oracle]",0.551503,0.448497


In [16]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  
    display(training_data_encoded.iloc[561:562])

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,119,120,121,123,126,128,129,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,254,255,256,258,261,263,264,270
561,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
