# K-means
https://scikit-learn.org/stable/
<img src="https://mofanpy.com/static/results/sklearn/2_1_1.png">

## K-means是一種分群方法，為非監督式學習

### 1. 設定n群
### 2. K-means隨機給予n個群心
### 3. 每個點用距離公式計算並分類給最近的群
### 4. 用每一群的點重新計算群心
### 5. 重複3、4步驟直到收斂

In [None]:
%matplotlib inline

import random
import numpy as np
from sklearn import cluster, metrics
import matplotlib.pyplot as plt

feature = np.array([2, 2])
for i in range(3000):
    if i%3 == 0:
        x = 3 + random.normalvariate(0, 1.2)
        y = 3 + random.normalvariate(0, 1.2)
        feature = np.vstack((feature, [x, y]))
        plt.scatter(x, y , color='b', s=2)
    elif i%3 == 1:
        x = 7 + random.normalvariate(0, 1)
        y = 7 + random.normalvariate(0, 1)
        feature = np.vstack((feature, [x, y]))
        plt.scatter(x, y , color='r', s=2)
    else:
        x = 8 + random.normalvariate(0, 0.7)
        y = 2 + random.normalvariate(0, 0.7)
        feature = np.vstack((feature, [x, y]))
        plt.scatter(x, y , color='g', s=2)
feature = feature[1:]

plt.xlim(0, 10)
plt.ylim(0, 10)
plt.show()

In [None]:
feature

### K-means官方文件
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html

In [None]:
# 迴圈
silhouette_avgs = []
ks = range(2, 7)
for k in ks:
    kmeans_fit = cluster.KMeans(n_clusters = k).fit(feature)
    cluster_labels = kmeans_fit.labels_
    silhouette_avg = metrics.silhouette_score(feature, cluster_labels) # -1 ~ 1
    silhouette_avgs.append(silhouette_avg)

# 作圖並印出 k = 2 到 10 的績效
plt.bar(ks, silhouette_avgs)
plt.show()
print(silhouette_avgs)

In [None]:
print(cluster_labels)

In [None]:
from IPython.display import HTML
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/0DGtyMBOZ-c" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')
# 出處: https://chih-sheng-huang821.medium.com/%E6%A9%9F%E5%99%A8%E5%AD%B8%E7%BF%92-%E9%9B%86%E7%BE%A4%E5%88%86%E6%9E%90-k-means-clustering-e608a7fe1b43

# KNN(k nearest neighbors)
## KNN可以做分類或回歸，為監督式學習
### 1. 設定k值
### 2. 計算距離公式找出k個最相近的特徵
### 3. 分類: k個特徵投票、回歸: 平均k個特徵
<img src="https://ww2.mathworks.cn/matlabcentral/mlc-downloads/downloads/03faee64-e85e-4ea0-a2b4-e5964949e2d1/d99b9a4d-618c-45f0-86d1-388bdf852c1d/images/screenshot.gif">

### 蒐集資料
python MLGame.py -i ml_play_template.py -f 200 -r arkanoid NORMAL 3

In [15]:
import pickle
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import  classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit

#試取資料
file = open("../log/n3 (1).pickle", "rb")
data = pickle.load(file)
file.close()

print(data)

FileNotFoundError: [Errno 2] No such file or directory: '../log/n3 (1).pickle'

In [64]:
game_info = data['scene_info']
game_command = data['command']
# print(game_info)
print(game_command[1][1])

MOVE_RIGHT


In [65]:
game_info=[]
game_command=[]
import os
dir_path = "..\log39"
for file_path in os.listdir(dir_path):
    with open('..\\log\\' + file_path ,'rb') as f:
        data = pickle.load(f)
    game_info = game_info + data['scene_info']
    game_command = game_command + data['command']
print(len(game_info))
print(len(game_command))

6765
6765


### 特徵整理

In [66]:
g = game_info[1]
x = g['ball'][0]
y = g['ball'][1]
xs = g['ball_speed'][0]
ys = g['ball_speed'][0]
P1_plate = g['platform_1P'][0]
P2_plate = g['platform_2P'][0]
blocker_x = g['blocker'][0]
feature = np.array([x,y,xs,ys,P1_plate,P2_plate,blocker_x])
print(feature)

print(game_command[1][0],game_command[1][1])
game_command[1] = 0

[ 93 415   0   0  75  75 170]
MOVE_LEFT MOVE_LEFT


In [67]:
print (game_command[3][0],game_command[3][1])
for i in range(2, len(game_info) - 1):
    
    g = game_info[1]
    x = g['ball'][0]
    y = g['ball'][1]
    xs = g['ball_speed'][0]
    ys = g['ball_speed'][0]
    P1_plate = g['platform_1P'][0]
    P2_plate = g['platform_2P'][0]
    blocker_x = g['blocker'][0]
    feature = np.vstack((feature, [x,y,xs,ys,P1_plate,P2_plate,blocker_x]))
    if game_command[i] == "NONE": game_command[i] = 0
    elif game_command[i] == "MOVE_LEFT": game_command[i] = 1
    else: game_command[i] = 2
    
    
answer = np.array(game_command[1:-1])

# print(feature)
print (game_command[3])
print(feature.shape)
# for i in range(10000):
#     print(answer[i])    
print(answer)

MOVE_LEFT MOVE_LEFT
2
(6763, 7)
[0 2 2 ... 2 2 2]


### KNN官方文件
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
### 交叉驗證
https://chih-sheng-huang821.medium.com/%E4%BA%A4%E5%8F%89%E9%A9%97%E8%AD%89-cross-validation-cv-3b2c714b18db

In [60]:
#資料劃分
x_train, x_test, y_train, y_test = train_test_split(feature, answer, test_size=0.3, random_state=9)
#參數區間
param_grid = {'n_neighbors':[1, 2, 3]}
#交叉驗證 
cv = StratifiedShuffleSplit(n_splits=2, test_size=0.3, random_state=12)
grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=cv, verbose=10, n_jobs=-1) #n_jobs為平行運算的數量
grid.fit(x_train, y_train)
grid_predictions = grid.predict(x_test)

#儲存
file = open('my_model.pickle', 'wb')
pickle.dump(grid, file)
file.close()

Fitting 2 folds for each of 3 candidates, totalling 6 fits


ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

### f1-score
https://medium.com/nlp-tsupei/precision-recall-f1-score%E7%B0%A1%E5%96%AE%E4%BB%8B%E7%B4%B9-f87baa82a47

In [51]:
#最佳參數
print(grid.best_params_)
#預測結果
#print(grid_predictions)
#混淆矩陣
print(confusion_matrix(y_test, grid_predictions))
#分類結果
print(classification_report(y_test, grid_predictions))

{'min_samples_leaf': 50}
[[12365   163   129]
 [  116  1805    19]
 [   96    21  1851]]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98     12657
           1       0.91      0.93      0.92      1940
           2       0.93      0.94      0.93      1968

    accuracy                           0.97     16565
   macro avg       0.94      0.95      0.94     16565
weighted avg       0.97      0.97      0.97     16565



### 執行遊戲
python MLGame.py -i knn.py -f 50 arkanoid NORMAL 3