In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from os import listdir
from os.path import join

import matplotlib.style as style
style.use('fivethirtyeight')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Goal: create data set for Bayesian modeling of win probability based on team kills, deaths, and assists

In [28]:
team_data = pd.read_csv('../data/team_matches_master.csv')
print(team_data.shape)
team_data.head()

(128680, 9)


Unnamed: 0,map_id,map,date,team_id,team_name,team_score,team_kills,team_deaths,team_assists
0,12838,,1347562800000,4411,NiP,16,91.0,46.0,0.0
1,12839,,1347562800000,4411,NiP,16,83.0,34.0,0.0
2,12840,,1347562800000,4444,Blight,16,81.0,37.0,0.0
3,12841,,1347562800000,4444,Blight,16,97.0,58.0,0.0
4,12848,,1348401600000,4411,NiP,16,102.0,86.0,0.0


In [41]:
# Drop rows where team assists is zero
team_data = team_data[team_data['team_assists'] != 0]
print(team_data.shape)

# Drop rows where NaN in team_kills
team_data = team_data[~team_data['team_kills'].isna()]
print(team_data.shape)

# Collect maps where match went to overtime and drop
drop_maps = team_data.loc[team_data['team_score'] > 16, 'map_id'].values
team_data = team_data[~team_data['map_id'].isin(drop_maps)]
print(team_data.shape)

# Create dependent feature
team_data['won'] = 0
team_data.loc[team_data['team_score'] == 16, 'won'] = 1
team_data = team_data.reset_index()
team_data.head()

(117442, 10)
(117442, 10)
(117442, 10)


Unnamed: 0,index,map_id,map,date,team_id,team_name,team_score,team_kills,team_deaths,team_assists,won
0,233,13417,,1353348000000,4411,NiP,16,82.0,42.0,19.0,1
1,234,13419,,1353438329000,4411,NiP,16,89.0,65.0,24.0,1
2,235,13420,,1353574800000,4411,NiP,16,88.0,55.0,16.0,1
3,236,13421,,1353574800000,4465,mousesports,16,89.0,60.0,18.0,1
4,237,13422,,1353582000000,4411,NiP,16,98.0,104.0,21.0,1


In [108]:
# Write out dataset for bayesian analysis
bayes_set = team_data[['team_kills', 'team_assists', 'won']].sample(1000, random_state=42)

In [109]:
bayes_set['team_kills'].values

array([ 96.,  31.,  50.,  96.,  82.,  92., 100.,  82.,  96.,  87., 112.,
        88.,  98.,  76.,  81.,  82., 101.,  94., 106., 106.,  85.,  99.,
       102.,  55., 100.,  81.,  59.,  86.,  81.,  84.,  83.,  97.,  94.,
       105.,  87.,  78.,  37., 104.,  92.,  80.,  83., 107.,  39.,  83.,
        98.,  88., 100.,  88.,  45.,  90.,  74.,  89.,  73., 101.,  76.,
        89.,  49.,  72.,  89., 106., 104.,  92.,  92.,  78.,  94., 110.,
        99.,  98., 105.,  99.,  81., 101., 109.,  46.,  93., 100.,  83.,
        90.,  54.,  98.,  79., 101., 103.,  68.,  96.,  82.,  54.,  85.,
        77.,  72.,  33., 106.,  90.,  91.,  84.,  87., 102.,  85., 104.,
       102.,  87.,  99.,  97., 102.,  81.,  58., 108.,  65., 108., 102.,
       101.,  89.,  64.,  89.,  42.,  95.,  70.,  97.,  83.,  82.,  99.,
        49.,  84.,  66.,  82., 108.,  34.,  96.,  90.,  57., 112.,  73.,
       101.,  94.,  93., 109.,  98.,  81.,  86.,  87., 112.,  95.,  93.,
       108.,  84., 100.,  59.,  37.,  70.,  69.,  9

In [110]:
bayes_set['team_assists'].values

array([20.,  4.,  6., 18., 26., 23., 26., 26., 16., 24., 22., 15., 20.,
       14., 16., 16., 13., 13., 26., 20., 18., 18., 18.,  6., 18.,  6.,
       12., 12., 21., 21., 18., 12., 19., 24., 22., 16.,  5., 32., 13.,
       16., 14., 20., 10., 11., 24., 16., 17.,  9., 11., 16., 15., 21.,
       18., 17., 13., 21., 10., 15., 22., 21., 25., 14., 17.,  6., 14.,
       12., 17., 13., 20., 17.,  9., 18., 20.,  3., 19., 14., 12., 20.,
       13., 13., 10., 23., 18., 15., 18., 17.,  7., 20., 13., 19.,  7.,
       25., 21., 19., 13., 16., 29., 11., 21., 19., 17., 20., 24., 14.,
       20., 14., 20., 15., 17., 21., 20., 17., 18., 22.,  6., 15., 12.,
       18., 18., 16., 16.,  5., 12.,  9., 15., 28.,  8., 13., 17.,  8.,
       27., 18., 14., 18., 19., 22., 20., 18., 13., 23., 21., 17., 17.,
       27., 18., 17., 10.,  6., 16., 13., 27., 10., 18., 17., 11., 21.,
       17., 17., 22.,  7., 14., 18., 15.,  8.,  8., 11., 17., 17., 21.,
       19., 10., 12., 23., 29., 17.,  7., 16., 14., 14., 15., 12

In [111]:
bayes_set['won'].values

array([1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1,

In [113]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

train_x = team_data[['team_kills', 'team_assists']].sample(10000, random_state=42)
train_y = team_data['won'].sample(10000, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf2 = LogisticRegression()
clf.fit(train_x, train_y)
clf2.fit(train_x, train_y)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [125]:
test = [
    [101, 19],
    [105, 31],
    [89, 27]
]

In [126]:
clf.predict_proba(test)

array([[0.29157437, 0.70842563],
       [0.27511905, 0.72488095],
       [0.        , 1.        ]])

In [127]:
clf2.predict_proba(test)

array([[0.16119896, 0.83880104],
       [0.1354065 , 0.8645935 ],
       [0.48362853, 0.51637147]])

In [119]:
print('Logistic Regression Params:')
print(clf2.intercept_)
print(clf2.coef_)

Logistic Regression Params:
[-9.77814406]
[[ 0.11729006 -0.02204302]]


In [121]:
team_data[(team_data['team_kills'] > team_data['team_deaths']) & (team_data['won'] == 0)]

Unnamed: 0,index,map_id,map,date,team_id,team_name,team_score,team_kills,team_deaths,team_assists,won
9,242,13427,,1353594600000,4613,gosu,15,101.0,99.0,19.0,0
25,258,13443,,1353682500000,4429,Dynamic,15,105.0,93.0,31.0,0
26,259,13444,,1353682500000,4413,Lemondogs,15,107.0,96.0,30.0,0
56,289,13474,,1354212900000,4548,CPH Wolves,15,105.0,104.0,26.0,0
61,294,13479,,1354356000000,4548,CPH Wolves,13,107.0,103.0,29.0,0
106,339,13525,,1355421600000,4490,n!faculty,14,101.0,100.0,25.0,0
203,442,13643,,1358877600000,4547,Anexis,15,111.0,99.0,26.0,0
238,477,13679,,1359914400000,4494,mousesports,12,97.0,96.0,23.0,0
279,520,13725,,1360782900000,4683,3DMAX,14,99.0,97.0,23.0,0
345,588,13803,,1361390400000,4467,VeryGames,12,99.0,97.0,21.0,0
