# 5. KNN

###  取data_new.csv，進行KNN 分析

###  可針對所需的模型進行屬性挑選

In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

In [7]:
# 讀取CSV檔案
data = pd.read_csv('datanew.csv', index_col=0)

# 從 data_new.csv 選取Crossing ~ GKReflexes欄位（共34個屬性）
df = data.loc[:,'Crossing':'GKReflexes']

# 計算以上所有欄位的平均
array_data = np.array(df)
column_data_mean = np.mean(array_data, axis =0)
all_data_mean = np.mean(column_data_mean)

# 加標籤（大於平均：’ Above-average Players’, 小於平均：’ Below-average Players’）
df['all_mean'] = df[:].mean(axis=1)
df.loc[df.all_mean > all_data_mean, 'label']  = 'Above-average Players'
df.loc[df.all_mean <= all_data_mean, 'label'] = 'Below-average Players'
df.drop('all_mean', axis=1, inplace=True)

data['label'] = df['label']

feature = data.iloc[:,6:43]
feature['Skill Moves'] = data['Skill Moves']

# 定義 target_name 用於顯示圖表使用
target_names = ['Above-average Players', 'Below-average Players']

### (a) 推薦與 "Neymar Jr" 相像的前五名足球選手

In [8]:
from sklearn.neighbors import NearestNeighbors

# 找出 Neymar Jr 資料 的 index
Neymar_Jr = data[data['Name'] == 'Neymar Jr']
Neymar_Jr_index = Neymar_Jr.index.tolist()[0]
# 找出 Neymar Jr 的資料
Neymar_Jr = feature[Neymar_Jr_index:Neymar_Jr_index+1]

# 將 feature 進行標準化 (以 Neymar Jr 為中心)
normalized_feature=(feature-Neymar_Jr.mean())/(feature.std())

# 找出標準化後的 Neymar Jr 資料
normalized_Neymar_Jr = normalized_feature[Neymar_Jr_index:Neymar_Jr_index+1]

nbrs = NearestNeighbors(n_neighbors=6).fit(normalized_feature)
distances, indices = nbrs.kneighbors(normalized_Neymar_Jr)
for x in indices:                 
    print(data['Name'][x])
distances

2         Neymar Jr
5         E. Hazard
65    Douglas Costa
15        P. Dybala
25        K. Mbappé
68          M. Reus
Name: Name, dtype: object


array([[0.        , 2.51869119, 3.00039668, 3.03490117, 3.29433496,
        3.32595121]])

### (b) 推薦與 " L. Messi " 相像的前五名足球選手

In [9]:
from sklearn.neighbors import NearestNeighbors

# 找出 L. Messi 的 index
L_Messi = data[data['Name'] == 'L. Messi']
L_Messi_index = L_Messi.index.tolist()[0]
# 找出 L_Messi 的資料
L_Messi = feature[L_Messi_index:L_Messi_index+1]

# 將 feature 進行標準化 (以 Neymar Jr 為中心)
normalized_feature=(feature-L_Messi.mean())/(feature.std())

# 找出標準化後的 L_Messi 資料
normalized_L_Messi = normalized_feature[L_Messi_index:L_Messi_index+1]

nbrs = NearestNeighbors(n_neighbors=6).fit(normalized_feature)
distances, indices = nbrs.kneighbors(normalized_L_Messi)
for x in indices:                 
    print(data['Name'][x])
distances

0       L. Messi
2      Neymar Jr
5      E. Hazard
15     P. Dybala
236       Malcom
84     R. Mahrez
Name: Name, dtype: object


array([[0.        , 3.35772582, 3.36771563, 3.43750872, 3.88127403,
        3.98333701]])