# Skill Estimation Project (using pyGM)

In [95]:
import pyGM as gm
import numpy as np
import matplotlib.pyplot as plt
import pickle
%matplotlib inline  

### Games and Outcomes

In [96]:
import csv
global players
players = 0
player_dict = dict()
with open('train.csv', newline='') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        if str(row[1]) not in player_dict.values():
            player_dict[players] = str(row[1])
            players += 1
print(player_dict)

{0: 'MC', 1: 'Life', 2: 'sOs', 3: 'Polt', 4: 'TY', 5: 'INnoVation', 6: 'Mvp', 7: 'ByuN', 8: 'MMA', 9: 'PartinG', 10: 'Snute', 11: 'Zest', 12: 'Maru', 13: 'Nerchio', 14: 'TaeJa', 15: 'NesTea', 16: 'Stephano', 17: 'Dark', 18: 'Neeb', 19: 'HerO', 20: 'Leenock', 21: 'Bomber', 22: 'DongRaeGu', 23: 'Stats', 24: 'HyuN', 25: 'Rain', 26: 'Solar', 27: 'Jaedong', 28: 'herO', 29: 'Classic', 30: 'MarineKing', 31: 'Scarlett', 32: 'aLive', 33: 'soO', 34: 'NaNiwa', 35: 'viOLet', 36: 'ShoWTimE', 37: 'MaNa', 38: 'Sen', 39: 'ForGG', 40: 'Dear', 41: 'Soulkey', 42: 'PuMa', 43: 'GuMiho', 44: 'HuK', 45: 'Hydra', 46: 'SpeCial', 47: 'TRUE', 48: 'Jim', 49: 'Elazer', 50: 'jjakji', 51: 'XiGua', 52: 'MacSed', 53: 'Patience', 54: 'StarDust', 55: 'San', 56: 'TooDming', 57: 'VortiX', 58: 'FruitDealer', 59: 'Creator', 60: 'YoDa', 61: 'ByuL', 62: 'ThorZaIN', 63: 'iAsonu', 64: 'Trap', 65: 'Rogue', 66: 'TLO', 67: 'Kelazhur', 68: 'Losira', 69: 'uThermal', 70: 'Symbol', 71: 'Genius', 72: 'Bly', 73: 'Lilbow', 74: 'Bunny', 7

In [97]:
def get_id(val):
    for key, value in player_dict.items():
        if val == value:
             return key

games = []
playerInfo = {}
with open('train.csv', newline='') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        player1 = get_id(str(row[1]))
        player2 = get_id(str(row[4]))
        
        if player1 in playerInfo.keys():
            playerInfo[player1][0] += 1
        else:
            # player1 is not in keys
            playerInfo[player1] = [1,0] # [number of games played, games won]
        
        if player2 is None:
            global players
            player_dict[players] = str(row[4])
            player2 = players
            players += 1
        if row[2] == "[loser]":
            score = -1
        else:
            score = +1
            playerInfo[player1][1] += 1
        games.append((player1, player2, score))
        
for player in playerInfo:
    playerInfo[player].append(playerInfo[player][1]/playerInfo[player][0])
#print(games) #it works

### Win probability and graphical model

In [98]:
nplayers = max( [max(g[0],g[1]) for g in games] )+1
nlevels = 10   # let's say 10 discrete skill levels
scale = .3     # this scales how skill difference translates to win probability

# Make variables for each player; value = skill level
X = [None]*nplayers
for i in range(nplayers):
    X[i] = gm.Var(i, nlevels)   

# Information from each game: what does Pi winning over Pj tell us?
#    Win probability  Pr[win | Xi-Xj]  depends on skill difference of players
Pwin = np.zeros( (nlevels,nlevels) )
for i in range(nlevels):
    for j in range(nlevels):
        diff = i-j                   # find the advantage of Pi over Pj, then 
        Pwin[i,j] = (1./(1+np.exp(-scale*diff)))  # Pwin = logistic of advantage

# before any games, uniform belief over skill levels for each player:
factors = [ gm.Factor([X[i]],1./nlevels) for i in range(nplayers) ]

# Now add the information from each game:
for g in games:
    P1,P2,win = g[0],g[1],g[2]
    if P1 != P2:
        if P1>P2: P1,P2,win=P2,P1,-win  # (need to make player IDs sorted...)
        inside = gm.Factor([X[P1],X[P2]], Pwin if win>0 else 1-Pwin)
        factors.append(inside)

In [99]:
model = gm.GraphModel(factors)
model.makeMinimal()  # merge any duplicate factors (e.g., repeated games)
print("Finished here")

Finished here


In [101]:
if model.nvar < 0:       # for very small models, we can do brute force inference:
    jt = model.joint()
    jt /= jt.sum()       # normalize the distribution and marginalize the table
    bel = [jt.marginal([i]) for i in range(nplayers)] 
else:                    # otherwise we need to use some approximate inference:
    from pyGM.messagepass import LBP, NMF
    #lnZ,bel = LBP(model, maxIter=10, verbose=True)   # loopy BP
    lnZ,bel = NMF(model, maxIter=10, verbose=True)  # Mean field

Iter 0: -165566.42189881988
Iter 1: -117917.47443641079
Iter 2: -115262.19936072383
Iter 3: -115009.01531640673
Iter 4: -114967.01834690094
Iter 5: -114953.65604426115
Iter 6: -114945.85968243488
Iter 7: -114939.50959295909
Iter 8: -114935.04759279983
Iter 9: -114932.13796941792
Iter 10: -114930.62859932064


### Ranking players by predicted skill

In [102]:
print("Mean skill estimates: ")
print([ bel[i].table.dot(np.arange(nlevels)) for i in range(nplayers)] )

Mean skill estimates: 
[8.999987601243307, 9.0, 8.999999999999146, 8.999999999975401, 8.999999999994477, 9.0, 8.999853280094044, 9.0, 8.999999994893512, 9.0, 8.999999999999677, 8.999999999999998, 8.999999999989864, 9.0, 9.0, 8.457180140819501, 8.238531768895225, 8.999999999964691, 9.0, 8.001825278762476, 8.999999580849416, 8.999845420060897, 8.999999999569809, 8.999999999999684, 9.0, 8.999999999919082, 9.0, 8.999998500892437, 9.0, 8.999995798595755, 8.999999999899465, 8.999414912061884, 8.831972567915255, 8.999999999999954, 8.361673695419762, 8.02370213417712, 8.999986434506198, 6.999999990010956, 7.977114668405733, 8.999999937667372, 8.999999957046832, 8.99999973597217, 8.277684333972074, 9.0, 6.999999148895726, 8.999999999999808, 8.005035616420875, 8.999999998842107, 6.999761499307689, 7.9826055122057165, 8.99999999997659, 6.6161084826444805, 5.999972471353922, 8.67143214192991, 8.999998105784258, 8.999999845164677, 6.892956168317656, 8.93639545332672, 5.489211422546491, 8.8542731123

### Predicting match outcomes using our algorithm

In [114]:
outcomes = []
with open('valid.csv', newline='') as csv_file2:
    csv_reader2 = csv.reader(csv_file2, delimiter=',')
    for row in csv_reader2:
        player1 = get_id(str(row[1]))
        player2 = get_id(str(row[4]))
        if player2 is None:
            global players
            player_dict[players] = str(row[4])
            player2 = players
            players += 1
        i,j = player1, player2
        # print("Estimated probability P{} beats P{} next time:".format(i,j))
        # Expected value (over skill of P0, P1) of Pr[win | P0-P1]
        if i<j:
            win_percent = (bel[i]*bel[j]*gm.Factor([X[i],X[j]],Pwin)).table.sum()
            if win_percent < 0.5:
                outcomes.append(-1)
            else:
                outcomes.append(1)
        else:
            win_percent = (bel[i]*bel[j]*gm.Factor([X[i],X[j]],1-Pwin)).table.sum()
            if win_percent < 0.5:
                outcomes.append(-1)
            else:
                outcomes.append(1)
print(outcomes)

[1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, -1, 1, -1, 1, 1, -1, 1, -1, 1, -1, -1, -1, 1, -1, 1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, -1, 1, 1, 1, 1, 1, -1, 1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, 1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, 1, -1, 1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, -1, -1, 1, 1, -1, 1, 1, -1, 1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [115]:
correct = []
with open('valid.csv', newline='') as csv_file2:
    csv_reader2 = csv.reader(csv_file2, delimiter=',')
    for row in csv_reader2:
        if row[2] == "[loser]":
            correct.append(-1)
        else:
            correct.append(1)

In [116]:
right = 0
index = 0
for i in outcomes:
    if outcomes[index] == correct[index]:
        right+=1
    index+=1
print(right)

63826


In [127]:
print("Our algorithm estimated the correct outcome ", 63826/94007, "% of the time.")

Our algorithm estimated the correct outcome  0.678949439935324 % of the time.


### Predicting match outcomes using simple method

In [124]:
outcomes = []
with open('valid.csv', newline='') as csv_file2:
    csv_reader2 = csv.reader(csv_file2, delimiter=',')
    for row in csv_reader2:
        player1 = get_id(str(row[1]))
        player2 = get_id(str(row[4]))
        if player2 is None:
            global players
            player_dict[players] = str(row[4])
            player2 = players
            players += 1
        i = playerInfo.get(player1)[2]
        j = 0
        if playerInfo.get(player2) is not None:
            j = playerInfo.get(player2)[2]
        if i > j:
            outcomes.append(1)
        else:
            outcomes.append(-1)

In [125]:
right = 0
index = 0
for i in outcomes:
    if outcomes[index] == correct[index]:
        right+=1
    index+=1
print(right)

61868


In [128]:
print("The simple approach estimated the correct outcome ", 61868/94007, "% of the time.")

The simple approach estimated the correct outcome  0.658121203740147 % of the time.
