# 種族値からポケモン対面の勝敗を予測する

In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

## データ読み込み
### Pokemon Image Dataset
https://www.kaggle.com/terminus7/pokemon-challenge

In [0]:
pokemon = pd.read_csv('pokemon.csv')
combats = pd.read_csv('combats.csv')

## 前処理

各ポケモンのタイプ、種族値の結合

In [0]:
#First Pokemon列の並びに合わせて種族値取得
df_first = pokemon.loc[combats['First_pokemon'] - 1]
df_first = df_first.reset_index()

#Second Pokemon列の並びに合わせての種族値取得
df_second = pokemon.loc[combats['Second_pokemon'] - 1]
df_second = df_second.reset_index()

In [0]:
#combatsに種族値を結合・列名を変更する関数
combats = combats.join(df_first).drop(['index', '#', 'Name', 'Generation', 'Legendary'], axis=1)
combats = combats.rename(columns={'Type 1':'First_Type 1'
                              , 'Type 2':'First_Type 2'
                              , 'HP':'First_HP'
                              , 'Attack':'First_Attack'
                              , 'Defense':'First_Defense'
                              , 'Sp. Atk':'First_Sp. Atk'
                              , 'Sp. Def':'First_Sp. Def'
                              , 'Speed':'First_Speed'
                              })
combats = combats.join(df_second).drop(['index', '#', 'Name', 'Generation', 'Legendary'], axis=1)
combats = combats.rename(columns={'Type 1':'Second_Type 1'
                              , 'Type 2':'Second_Type 2'
                              , 'HP':'Second_HP'
                              , 'Attack':'Second_Attack'
                              , 'Defense':'Second_Defense'
                              , 'Sp. Atk':'Second_Sp. Atk'
                              , 'Sp. Def':'Second_Sp. Def'
                              , 'Speed':'Second_Speed'
                              })

In [5]:
# 勝敗を01に変換する。Firstが勝つ場合は0、Secondが勝つ場合は１
combats['Winner'] = (combats['Winner'] == combats['Second_pokemon']).astype(int)
combats

Unnamed: 0,First_pokemon,Second_pokemon,Winner,First_Type 1,First_Type 2,First_HP,First_Attack,First_Defense,First_Sp. Atk,First_Sp. Def,First_Speed,Second_Type 1,Second_Type 2,Second_HP,Second_Attack,Second_Defense,Second_Sp. Atk,Second_Sp. Def,Second_Speed
0,266,298,1,Rock,Ground,50,64,50,45,50,41,Grass,Dark,70,70,40,60,40,60
1,702,701,1,Grass,Fighting,91,90,72,90,129,108,Rock,Fighting,91,129,90,72,90,108
2,191,668,1,Fairy,Flying,55,40,85,80,105,40,Psychic,,75,75,75,125,95,40
3,237,683,1,Fire,,40,40,40,70,40,20,Dragon,,77,120,90,60,90,48
4,151,231,0,Rock,Water,70,60,125,115,70,55,Bug,Rock,20,10,230,10,230,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,707,126,0,Dragon,Fire,100,120,100,150,120,90,Water,,30,40,70,70,25,60
49996,589,664,0,Ground,,60,85,40,30,45,68,Electric,,35,55,40,45,40,60
49997,303,368,1,Water,Flying,60,50,100,85,70,65,Normal,,73,115,60,60,60,90
49998,109,89,0,Electric,,40,30,50,55,55,100,Electric,Steel,25,35,70,95,55,45


### タイプを配列に変換

例えばノーマル単タイプだと[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]、

ゴーストフェアリーだと[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]


In [0]:
def type_to_list(series1, series2):
  tp_list = []
  for tp1, tp2 in zip(series1, series2):
    type_dict = {'Normal':0, 'Fighting':0, 'Poison':0, 'Ground':0, 'Flying':0, 'Bug':0, 'Rock':0
                 , 'Ghost':0, 'Steel':0, 'Fire':0, 'Water':0, 'Electric':0, 'Grass':0, 'Ice':0
                 , 'Psychic':0, 'Dragon':0, 'Dark':0, 'Fairy':0}
    if type(tp2) == str:
      type_dict[tp1] = 1
      type_dict[tp2] = 1
    else:
      type_dict[tp1] = 1
    tp_list.append(list(type_dict.values()))
  return tp_list

### 特徴量、ラベルの作成

In [0]:
def get_features(df):
    # 特徴量として使う連続量
    features_ana = ['First_HP', 'First_Attack', 'First_Defense', 'First_Sp. Atk', 'First_Sp. Def', 'First_Speed'
                    , 'Second_HP', 'Second_Attack', 'Second_Defense', 'Second_Sp. Atk', 'Second_Sp. Def', 'Second_Speed']
    # 特徴量として使うカテゴリ値
    f_type = type_to_list(combats['First_Type 1'], combats['First_Type 2'])
    s_type = type_to_list(combats['Second_Type 1'], combats['Second_Type 2'])
    X = np.concatenate(
        [df[features_ana].values, f_type, s_type],
        axis=1)
    return X

def get_label(df):
    y = df.Winner
    return y

In [8]:
# 特徴量, ラベルを作成
X = get_features(combats)
y = get_label(combats)
X[0]

array([50, 64, 50, 45, 50, 41, 70, 70, 40, 60, 40, 60,  0,  0,  0,  1,  0,
        0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  1,  0],
      dtype=int64)

### 訓練用データと評価用データの分割
ハイパーパラメータを変更する場合は、更に訓練用データから検証用データを分割すること

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

## 学習
アルゴリズムはKNNを使用する

In [0]:
from sklearn import neighbors

### ハイパーパラメータ設定





In [0]:
# 近傍数
n_neighbors = 3

### モデル設定

In [0]:
clf = neighbors.KNeighborsClassifier(n_neighbors)

In [13]:
clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

## 評価
### 訓練データに対する正答率

In [14]:
clf.score(X_train, ｙ_train)

0.9320571428571428

### 評価データに対する正答率

In [15]:
clf.score(X_test, y_test)

0.8672666666666666

## 勝敗予測

In [16]:
## ドラパルトとバンギラスの種族値を入れてみる
combat = [88, 120, 75, 100, 75, 142
          , 100, 134, 110, 95, 100, 61
          , 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0
          , 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
clf.predict([combat])

array([0], dtype=int64)