# Splatoonのブキの強さ推定

Date: 2017-03-10  
Author: hagino3000

Bradley-Terryモデルでブキの強さを推定する

## 素性

- チーム間の各ブキの人数の差
- エリアバトルに限定する


In [4]:
import tqdm

In [5]:
datapath = '/Users/tnishibayashi/dev/workspace/ika-mining/data/battles_20160517.tsv'

In [6]:
df = pd.read_csv(datapath, sep='\t', index_col='id', usecols=[
        'id', 'is_win', 'rule_name',
        'friend1_weapon',
        'friend2_weapon',
        'friend3_weapon',
        'friend4_weapon',
        'enemy1_weapon',
        'enemy2_weapon',
        'enemy3_weapon',
        'enemy4_weapon',
    ])

In [18]:
df.head()

Unnamed: 0_level_0,rule_name,is_win,friend1_weapon,friend2_weapon,friend3_weapon,friend4_weapon,enemy1_weapon,enemy2_weapon,enemy3_weapon,enemy4_weapon
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
500000,area,0,wakaba,wakaba,52gal,dynamo,barrelspinner_deco,dynamo,rapid,carbon
500002,area,1,sshooter_collabo,96gal_deco,barrelspinner,dynamo,splatspinner_collabo,wakaba,splatcharger_wakame,wakaba
500003,area,1,wakaba,hissen,hydra_custom,96gal_deco,octoshooter_replica,jetsweeper_custom,bucketslosher_deco,splatscope
500007,area,1,sshooter_collabo,wakaba,splatscope,52gal,splatscope,sshooter_collabo,pablo,liter3k_scope
500008,area,0,wakaba,52gal,sshooter_collabo,sshooter_collabo,96gal,octoshooter_replica,splatscope,octoshooter_replica


In [17]:
df.groupby('rule_name').count().is_win

rule_name
area        144084
hoko         63282
nawabari    119547
yagura       73984
Name: is_win, dtype: int64

In [21]:
# エリアバトルに限定する
df = df.query('rule_name == "area"')

In [22]:
df.describe()

Unnamed: 0,is_win
count,144084.0
mean,0.554593
std,0.497012
min,0.0
25%,0.0
50%,1.0
75%,1.0
max,1.0


## 前処理

対戦毎に、各ブキ使用人数をカウント

In [27]:
buki_names = df.friend2_weapon.unique()

In [28]:
buki_names

array(['wakaba', '96gal_deco', 'hissen', '52gal', 'longblaster_custom',
       'liter3k_scope', 'barrelspinner', 'sshooter_collabo', 'prime',
       '96gal', 'splatscope_wakame', 'hotblaster_custom', 'rapid',
       'liter3k', 'bamboo14mk1', 'splatcharger', 'dualsweeper',
       'barrelspinner_deco', 'octoshooter_replica', 'nova_neo', 'nzap85',
       'splatscope', 'bold_neo', 'nzap89', 'splatroller_collabo',
       'screwslosher', 'splatspinner_collabo', 'jetsweeper_custom',
       'promodeler_rg', 'dynamo', 'splatcharger_wakame',
       'liter3k_scope_custom', 'carbon', '52gal_deco', 'squiclean_a',
       'rapid_deco', 'dualsweeper_custom', 'jetsweeper', 'carbon_deco',
       'dynamo_tesla', 'bucketslosher', 'rapid_elite_deco', 'sharp',
       'nova', 'sharp_neo', 'promodeler_mg', 'bucketslosher_deco',
       'splatroller', 'hokusai', 'hydra_custom', 'heroroller_replica',
       'l3reelgun_d', 'hotblaster', 'prime_collabo', 'l3reelgun',
       'screwslosher_neo', 'hissen_hue', 'heros

In [18]:
def count_buki(team, buki):
    def fn(df):
        count = 0
        for i in range(1, 5):
            count += 1 if df['{0}{1}_weapon'.format(team, i)] == buki else 0
        return count
    return fn

In [19]:
for buki in tqdm.tqdm(buki_names):
    df['friend_{0}_num'.format(buki)] = df.apply(count_buki('friend', buki), axis=1)
    df['enemy_{0}_num'.format(buki)] = df.apply(count_buki('enemy', buki), axis=1)
    df['{0}_diff'.format(buki)] = df['friend_{0}_num'.format(buki)] - df['enemy_{0}_num'.format(buki)]

100%|██████████| 82/82 [29:57<00:00, 23.01s/it]


In [37]:
#df.to_pickle('./df_buki_area_only.pickle')

In [24]:
df = pd.read_pickle('./df_buki_area_only.pickle')

In [29]:
y_train = df.is_win
X_train = df[['{0}_diff'.format(buki) for buki in buki_names]]

In [30]:
X_train.head()

Unnamed: 0_level_0,wakaba_diff,96gal_deco_diff,hissen_diff,52gal_diff,longblaster_custom_diff,liter3k_scope_diff,barrelspinner_diff,sshooter_collabo_diff,prime_diff,96gal_diff,...,h3reelgun_d_diff,splatspinner_diff,sshooter_wasabi_diff,prime_berry_diff,squiclean_g_diff,splatspinner_repair_diff,bucketslosher_soda_diff,dynamo_burned_diff,bamboo14mk3_diff,pablo_permanent_diff
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
500000,2,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500002,-2,1,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
500003,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500007,1,0,0,1,0,-1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500008,1,0,0,1,0,0,0,2,0,-1,...,0,0,0,0,0,0,0,0,0,0


## 学習

In [34]:
from sklearn.cross_validation import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression

ためしに適当なパラメータで

In [35]:
def cross_val(X, y, K, random_state=0, clf=None):
    cv = KFold(len(y), K, shuffle=True, random_state=random_state)
    scores = cross_val_score(clf, X, y, cv=cv)
    print('Mean Score: {0:.3f} (+/-{1:.3f})'.format(scores.mean(), scores.std()*2))
    return scores

In [36]:
print("Logistic Regression")
# 切片項ナシ
cross_val(X_train, y_train, 10, clf=LogisticRegression(penalty='l2', fit_intercept=False))

Logistic Regression
Mean Score: 0.569 (+/-0.009)


array([ 0.57290582,  0.56249566,  0.56589631,  0.57304462,  0.57003054,
        0.57232093,  0.56454747,  0.5742643 ,  0.56343698,  0.57322321])

In [37]:
print("Logistic Regression")
# 切片項アリ
cross_val(X_train, y_train, 10, clf=LogisticRegression(penalty='l2', fit_intercept=True))

Logistic Regression
Mean Score: 0.580 (+/-0.009)


array([ 0.5841488 ,  0.5716566 ,  0.57901312,  0.58623083,  0.58189895,
        0.58453637,  0.57537479,  0.58058023,  0.57690172,  0.58460577])

In [38]:
def plot_confusion_matrix(cm):
    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.set_title('Confusion Matrix')
    fig.colorbar(im)

    target_names = ['lose', 'win']

    tick_marks = np.arange(len(target_names))
    ax.set_xticks(tick_marks)
    ax.set_xticklabels(target_names, rotation=45)
    ax.set_yticks(tick_marks)
    ax.set_yticklabels(target_names)
    ax.set_ylabel('True label')
    ax.set_xlabel('Predicted label')
    fig.tight_layout()

In [41]:
def calc_classifier(X_train, y_train):
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.90, random_state=13)
    print('Num of Training Samples: {}'.format(len(X_train)))
    print('Num of Validation Samples: {}'.format(len(X_val)))
    clf = LogisticRegression(penalty='l2',  fit_intercept=True).fit(X_train, y_train)
    y_train_pred = clf.predict(X_train)
    y_val_pred = clf.predict(X_val)
    print('Accuracy on Training Set: {:.3f}'.format(accuracy_score(y_train, y_train_pred)))
    print('Accuracy on Validation Set: {:.3f}'.format(accuracy_score(y_val, y_val_pred)))
    return clf

In [42]:
clf = calc_classifier(X_train, y_train)

Num of Training Samples: 129675
Num of Validation Samples: 14409
Accuracy on Training Set: 0.580
Accuracy on Validation Set: 0.588


In [43]:
clf.intercept_

array([ 0.20306183])

In [44]:
# ブキ毎のweight (= 強さ)
clf.coef_

array([[ 0.18018242,  0.25142752,  0.00592982,  0.03783039,  0.24603016,
         0.05508258,  0.11093777,  0.14554919,  0.01314596,  0.09819135,
         0.10638543,  0.20948723,  0.17261688, -0.2941865 , -0.05508368,
        -0.10772989,  0.00418737,  0.23292959,  0.17393033,  0.16643551,
        -0.06187636,  0.05629517, -0.02721749,  0.0023595 ,  0.09694812,
         0.12409824,  0.31845461,  0.08823171, -0.0568244 ,  0.39630291,
        -0.00635362,  0.12740184, -0.03373788, -0.02041767, -0.15994799,
         0.18436529, -0.00362864, -0.14791132, -0.07706324,  0.18975996,
         0.0206101 , -0.2627034 ,  0.07078551, -0.00653488,  0.01478794,
        -0.1378751 ,  0.09493076, -0.12964205,  0.05042128,  0.12509522,
        -0.17079448,  0.24331676, -0.24836649,  0.03662587, -0.08372524,
         0.0293295 , -0.13055083, -0.00711717, -0.06226956, -0.07387833,
        -0.10098862, -0.14831846, -0.16860983, -0.32409888, -0.12319601,
        -0.03680876, -0.33043251, -0.0762858 , -0.1

In [45]:
buki_result_df = pd.DataFrame({'buki':buki_names, 'weight':clf.coef_[0]})

In [47]:
# 弱いブキ
buki_result_df.sort_values(by='weight').head()

Unnamed: 0,buki,weight
66,longblaster,-0.330433
63,hydra,-0.324099
71,momiji,-0.314735
13,liter3k,-0.294187
41,rapid_elite_deco,-0.262703


In [48]:
# 強いブキ
buki_result_df.sort_values(by='weight').tail()

Unnamed: 0,buki,weight
51,l3reelgun_d,0.243317
4,longblaster_custom,0.24603
1,96gal_deco,0.251428
26,splatspinner_collabo,0.318455
29,dynamo,0.396303
