### <a href="https://zh.wikipedia.org/wiki/%E6%9C%80%E8%BF%91%E9%84%B0%E5%B1%85%E6%B3%95" style="text-decoration:none ; font-family:Microsoft JhengHei;">KNN (K-Nearest Neighbors) 最近鄰居法</a>

* <font face = 'Microsoft JhengHei'>屬於機器學習中的監督式學習(Supervised learning)</font>
* <font face = 'Microsoft JhengHei'>目的：透過一群已經標記好類別的資料，來針對未分類的資料做分類的工具</font>

<td bgcolor = #FFDC00><font face = 'Microsoft JhengHei' size = 3.5><strong>本次應用：透過KNN演算法來判斷遊戲角色設定是否公平</strong></font><td>

In [1]:
import os
import pickle as pk
import pandas as pd
import numpy as np

In [2]:
# ()放入檔案路徑
os.chdir()

In [3]:
data_all = pk.load(open('傳說對決全英雄數值.dat' , 'rb'))
data_all.head()

Unnamed: 0,角色,基礎生命,15級生命,基礎回血 5/S,15級回血,基礎魔力,15級魔力,基礎回魔 5/S,15級回魔,基礎物攻,15級物攻,基礎物防,15級物防,基礎魔防,15級魔防,15級攻速,跑速,射程
牛魔王,坦克,3558,9328,59,128,420,1708,15,36,163,286,132,514,50,169,14,370,200.0
薩尼,坦克,3535,8857,58,121,420,1694,15,36,157,306,123,494,50,169,14,380,200.0
美娜,坦克,3510,8638,58,119,420,1666,14,34,158,288,120,430,50,169,14,380,200.0
歐米茄,坦克,3364,8581,56,118,420,1694,15,36,178,295,139,459,50,169,14,380,200.0
朗博,輔助,3537,8476,58,117,470,1926,17,42,156,273,109,394,50,169,14,380,200.0


In [4]:
data_1 = pk.load(open('傳說對決全英雄數值整理.dat' , 'rb')).drop('跑速' , axis = 1)
data_1.head()

Unnamed: 0,角色,基礎生命,基礎回血 5/S,基礎魔力,基礎回魔 5/S,基礎物攻,基礎物防,基礎魔防
牛魔王,坦克,3558,59,420,15,163,132,50
薩尼,坦克,3535,58,420,15,157,123,50
美娜,坦克,3510,58,420,14,158,120,50
歐米茄,坦克,3364,56,420,15,178,139,50
朗博,輔助,3537,58,470,17,156,109,50


In [5]:
from sklearn.model_selection import train_test_split

#train data
X = data_1.iloc[: , 1:] #特徵
y = data_1.iloc[: , 0] #特徵值

#0.3測試樣本 0.7訓練樣本
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.3 , random_state = 0)

In [6]:
#使用 KNN 分類
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train , y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [7]:
y_pred = pd.Series(knn.predict(X_test) , index = y_test.index , name = 'predict')
score_1 = knn.score(X_test, y_test)
print('Score(精準度)：{0:.2f}'.format(score_1))

Score(精準度)：0.29


In [8]:
predict_1 = pd.concat([y_test , y_pred], axis=1)
predict_1[predict_1['角色'] != predict_1['predict']]

Unnamed: 0,角色,predict
超人,戰士,刺客
洛克,戰士,射手
盧米亞,法師,射手
娜塔亞,法師,戰士
安奈特,輔助,射手
史蘭茲,射手,戰士
摩恩,射手,法師
莫拉,刺客,射手
佩娜,輔助,射手
瑟斐斯,戰士,射手


### <font face = 'Microsoft JhengHei'>加入距離權重</font>

In [9]:
#train data
X = data_1.iloc[: , 1:]
y = data_1.iloc[: , 0]

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.3 , random_state = 0 )

# 加入距離的權重
knn = KNeighborsClassifier(weights = 'distance')
knn.fit(X_train , y_train)

y_pred = pd.Series(knn.predict(X_test) , index = y_test.index , name = 'predict')
score_weighted = knn.score(X_test, y_test)
print('Score(精準度)：{0:.2f}'.format(score_weighted))

Score(精準度)：0.38


In [10]:
predict_weighted = pd.concat([y_test , y_pred], axis=1)
predict_weighted[predict_weighted['角色'] != predict_weighted['predict']]

Unnamed: 0,角色,predict
洛克,戰士,射手
娜塔亞,法師,戰士
安奈特,輔助,射手
史蘭茲,射手,戰士
摩恩,射手,法師
莫拉,刺客,射手
佩娜,輔助,射手
瑟斐斯,戰士,射手
歐米茄,坦克,戰士
塔拉,坦克,戰士


### <font face = 'Microsoft JhengHei'>加入15等的特徵</font>

In [11]:
data_2 = data_all.drop(['跑速' , '射程'] , axis = 1)
data_2.head()

Unnamed: 0,角色,基礎生命,15級生命,基礎回血 5/S,15級回血,基礎魔力,15級魔力,基礎回魔 5/S,15級回魔,基礎物攻,15級物攻,基礎物防,15級物防,基礎魔防,15級魔防,15級攻速
牛魔王,坦克,3558,9328,59,128,420,1708,15,36,163,286,132,514,50,169,14
薩尼,坦克,3535,8857,58,121,420,1694,15,36,157,306,123,494,50,169,14
美娜,坦克,3510,8638,58,119,420,1666,14,34,158,288,120,430,50,169,14
歐米茄,坦克,3364,8581,56,118,420,1694,15,36,178,295,139,459,50,169,14
朗博,輔助,3537,8476,58,117,470,1926,17,42,156,273,109,394,50,169,14


In [12]:
# train data
X = data_2.iloc[: , 1:]
y = data_2.iloc[: , 0]

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.3 , random_state = 0)

knn = KNeighborsClassifier(weights = 'distance')
knn.fit(X_train , y_train)

y_pred = pd.Series(knn.predict(X_test) , index = y_test.index , name = 'predict')
score_2 = knn.score(X_test, y_test)
print('Score(精準度)：{0:.2f}'.format(score_2))

Score(精準度)：0.67


In [13]:
predict_2 = pd.concat([y_test , y_pred], axis=1)
predict_2[predict_2['角色'] != predict_2['predict']]

Unnamed: 0,角色,predict
安奈特,輔助,射手
史蘭茲,射手,刺客
莫拉,刺客,法師
佩娜,輔助,法師
歐米茄,坦克,輔助
塔拉,坦克,戰士
渥馬爾,戰士,輔助
提米,輔助,法師


### <font face = 'Microsoft JhengHei'>加入射程及跑速</font>

In [14]:
data_3 = data_all
data_3.head()

Unnamed: 0,角色,基礎生命,15級生命,基礎回血 5/S,15級回血,基礎魔力,15級魔力,基礎回魔 5/S,15級回魔,基礎物攻,15級物攻,基礎物防,15級物防,基礎魔防,15級魔防,15級攻速,跑速,射程
牛魔王,坦克,3558,9328,59,128,420,1708,15,36,163,286,132,514,50,169,14,370,200.0
薩尼,坦克,3535,8857,58,121,420,1694,15,36,157,306,123,494,50,169,14,380,200.0
美娜,坦克,3510,8638,58,119,420,1666,14,34,158,288,120,430,50,169,14,380,200.0
歐米茄,坦克,3364,8581,56,118,420,1694,15,36,178,295,139,459,50,169,14,380,200.0
朗博,輔助,3537,8476,58,117,470,1926,17,42,156,273,109,394,50,169,14,380,200.0


In [15]:
data_3 = data_all

X = data_3.iloc[: , 1:]
y = data_3.iloc[: , 0]

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.3 , random_state = 0)

knn = KNeighborsClassifier(weights = 'distance')
knn.fit(X_train , y_train)

y_pred = pd.Series(knn.predict(X_test) , index = y_test.index , name = 'predict')
score_3 = knn.score(X_test, y_test)
print('Score(精準度)：{0:.2f}'.format(score_3))

Score(精準度)：0.71


In [16]:
predict_3 = pd.concat([y_test , y_pred], axis=1)
predict_3[predict_3['角色'] != predict_3['predict']]

Unnamed: 0,角色,predict
安奈特,輔助,射手
莫拉,刺客,法師
佩娜,輔助,法師
歐米茄,坦克,輔助
塔拉,坦克,戰士
渥馬爾,戰士,輔助
提米,輔助,法師


### <span style = 'font-family : Microsoft JhengHei;'>使用<a href = "https://zh.wikipedia.org/wiki/%E4%BA%A4%E5%8F%89%E9%A9%97%E8%AD%89" style = 'color:dimgray;text-decoration:none;'>交叉驗證 (Cross Validation)</a></span>

<font face = 'Microsoft JhengHei' size = 3>將訓練集分割成K個子樣本，一個單獨的子樣本被保留作為驗證模型的數據，其他K-1個樣本用來訓練。</font>

In [17]:
from sklearn.model_selection import cross_val_score    
#幫助我們交叉測試集, 確保不是特殊狀況
K = 3
scores = cross_val_score(knn , X, y, scoring = 'accuracy' , cv = K)
average_accuracy = np.mean(scores) * 100
print("平均準確率為 {0:.1f}%".format(average_accuracy))

平均準確率為 48.9%


In [18]:
avg_scores = []
all_scores = []
parameter_values = list(range(1,15))   
for n_neighbors in parameter_values:
    estimator = KNeighborsClassifier(n_neighbors=n_neighbors)  
    scores = cross_val_score(estimator, X , y, scoring = 'accuracy' , cv = K)
    avg_scores.append(np.mean(scores))
    all_scores.append(scores)

In [19]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

trace = [go.Scatter(
    x = parameter_values,
    y = avg_scores
)]

layout = dict(
    xaxis = dict(title = 'neighbor個數'),
    yaxis = dict(title = '準確率'),
)

fig = dict(data = trace , layout = layout)

init_notebook_mode(connected=True)
iplot(fig)
# 圖中可看到neighbor數為9時準確率最高
k = 9

In [20]:
data_set_k = data_all

X = data_set_k.iloc[: , 1:]
y = data_set_k.iloc[: , 0]

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.3 , random_state = 0)

knn = KNeighborsClassifier(weights = 'distance' , n_neighbors = k)
knn.fit(X_train , y_train)

y_pred = pd.Series(knn.predict(X_test) , index = y_test.index , name = 'predict')
score_set_k = knn.score(X_test, y_test)
print('Score(精準度)：{0:.2f}'.format(score_set_k))

Score(精準度)：0.67


In [21]:
predict_set_k = pd.concat([y_test , y_pred], axis=1)
predict_set_k[predict_set_k['角色'] != predict_set_k['predict']]

Unnamed: 0,角色,predict
安奈特,輔助,射手
史蘭茲,射手,法師
莫拉,刺客,法師
佩娜,輔助,法師
塔拉,坦克,戰士
渥馬爾,戰士,坦克
提米,輔助,法師
蝙蝠俠,刺客,法師


In [22]:
print('只有基礎數據的準確率：' , format(score_1 , '.2f'))
print('加入距離權重的準確率：' , format(score_weighted , '.2f'))
print('加入15等特徵的準確率：' , format(score_2 , '.2f'))
print('加入跑速、射程等特徵的準確率：' , format(score_3 , '.2f'))
print('給定k的準確率：' , format(score_set_k , '.2f'))

只有基礎數據的準確率： 0.29
加入距離權重的準確率： 0.38
加入15等特徵的準確率： 0.67
加入跑速、射程等特徵的準確率： 0.71
給定k的準確率： 0.67
