# CS 179 Project

In [None]:
import pyGM as gm
import numpy as np 
import matplotlib.pyplot as plt
%matplotlib inline
import networkx as nx
from pyGM.messagepass import LBP, NMF
import pickle

# Load training data and reduce (subsample) if desired

# Read thru file to get numeric ids for each player 
with open('train.csv') as f: lines = f.read().split('\n')
with open('valid.csv') as f: lines2 = f.read().split('\n')
    
p = 0; playerid = {};
for i in range(len(lines)):
  csv = lines[i].split(',');
  if len(csv) != 10: continue;   # parse error or blank line
  player0,player1 = csv[1],csv[4];
  if player0 not in playerid: playerid[player0]=p; p+=1;
  if player1 not in playerid: playerid[player1]=p; p+=1;

nplayers = len(playerid)
playername = ['']*nplayers
for player in playerid: playername[ playerid[player] ]=player;  # id to name lookup

In [None]:
nlevels = 15
scale = 0.3

X = [None]*nplayers
for i in range(nplayers):
    X[i] = gm.Var(i, nlevels)

Pwin = np.zeros( (nlevels,nlevels) )
for i in range(nlevels):
    for j in range(nlevels):
        diff = i-j
        Pwin[i,j] = (1.0/(1+np.exp(-scale*diff)))

factors = [ gm.Factor([X[i]], 1.0/nlevels) for i in range(nplayers) ]
pKeep = 0.15  # keep a fraction of the 193k games

for i in range(len(lines)):
    csv = lines[i].split(',')
    if len(csv) != 10: continue
    P1,P2 = playerid[csv[1]],playerid[csv[4]]
    win = -1
    if csv[2]=='[winner]':
        win = 1
    if P1>P2: P1,P2,win=P2,P1,-win
    if P1 != P2 and np.random.rand() < pKeep:
        factors.append(gm.Factor([X[P1],X[P2]], Pwin if win>0 else 1-Pwin) )
        
model = gm.GraphModel(factors)
model.makeMinimal()

lnZ, bel = NMF(model, maxIter=5)

totalGames = 0
correctlyPredictedGames = 0

for i in range(len(lines2)):
    csv = lines2[i].split(',')
    if len(csv) != 10: continue
    P1,P2 = playerid[csv[1]],playerid[csv[4]]
    win = -1
    if csv[2]=='[winner]':
        win = 1
    if P1>P2: P1,P2,win=P2,P1,-win
    if P1 != P2:
        totalGames += 1
        predictedWin = (bel[P1]*bel[P2]*gm.Factor([X[P1],X[P2]],Pwin)).table.sum()
        if((predictedWin > 0.5 and win == 1) or (predictedWin < 0.5 and win == -1)):
            correctlyPredictedGames += 1

print(correctlyPredictedGames / totalGames)

In [None]:
levels = []
for i in range(nplayers):
    #levels.append(bel[i].table.dot(np.arange(nlevels)))
    print(bel[i].table.dot(np.arange(nlevels)))

levels.sort(reverse=True)
print(levels)
#print([ bel[i].table.dot(np.arange(nlevels)) for i in range(nplayers)] )

In [None]:
#Testing multiple percentage of games kept
#This will calculate skill level and number of correctly predicted games
emptyList = []
with open('data.pickle', 'wb') as handle:
        pickle.dump(emptyList, handle)

pKeepList = [0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.20]
for n in pKeepList:
    
    nlevels = 15
    scale = 0.3

    X = [None]*nplayers
    for i in range(nplayers):
        X[i] = gm.Var(i, nlevels)

    Pwin = np.zeros( (nlevels,nlevels) )
    for i in range(nlevels):
        for j in range(nlevels):
            diff = i-j
            Pwin[i,j] = (1.0/(1+np.exp(-scale*diff)))
            
    factors = [ gm.Factor([X[i]], 1.0/nlevels) for i in range(nplayers) ]
    
    for i in range(len(lines)):
        csv = lines[i].split(',')
        if len(csv) != 10: continue
        P1,P2 = playerid[csv[1]],playerid[csv[4]]
        win = -1
        if csv[2]=='[winner]':
            win = 1
        if P1>P2: P1,P2,win=P2,P1,-win
        if P1 != P2 and np.random.rand() < n:
            factors.append(gm.Factor([X[P1],X[P2]], Pwin if win>0 else 1-Pwin) )
        
    model = gm.GraphModel(factors)
    model.makeMinimal()
    
    lnZ, bel = NMF(model, maxIter=5, verbose=True)
    
    totalGames = 0
    correctlyPredictedGames = 0

    for i in range(len(lines2)):
        csv = lines2[i].split(',')
        if len(csv) != 10: continue
        P1,P2 = playerid[csv[1]],playerid[csv[4]]
        win = -1
        if csv[2]=='[winner]':
            win = 1
        if P1>P2: P1,P2,win=P2,P1,-win
        if P1 != P2:
            totalGames += 1
            predictedWin = (bel[P1]*bel[P2]*gm.Factor([X[P1],X[P2]],Pwin)).table.sum()
            if((predictedWin > 0.5 and win == 1) or (predictedWin < 0.5 and win == -1)):
                correctlyPredictedGames += 1
                
    pCorrect = correctlyPredictedGames / totalGames
    print(pCorrect)
    
    levels = {}
    for i in range(nplayers):
        levels[playername[i]] = bel[i].table.dot(np.arange(nlevels))
        #print(bel[i].table.dot(np.arange(nlevels)))
        
    data = {'pKeep': n, 'pPredicted': pCorrect, 'SkillLevel': levels}
    with open('data.pickle', 'rb') as handle:
        dataList = pickle.load(handle)
        
    dataList.append(data)
    with open('data.pickle', 'wb') as handle:
        pickle.dump(dataList, handle)

In [None]:
with open('data.pickle', 'rb') as handle:
    dataList = pickle.load(handle)

pKeep = []
pPredicted = []
for i in range(len(dataList)):
    pKeep.append(dataList[i]['pKeep'])
    pPredicted.append(dataList[i]['pPredicted'])
    
plt.plot(pKeep, pPredicted, 'ro')
plt.show()