<a href="https://colab.research.google.com/github/chonholee/tutorial/blob/main/ml/ML04_1_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 演習４　K近傍回帰

mglearnライブラリを利用したｋ近傍回帰の例

※ｋ近傍回帰のアルゴリズムの実装ではありません

In [None]:
!pip install mglearn

In [None]:
import mglearn
import matplotlib.pyplot as plt

# 適当にデータセットを作成
X, y = mglearn.datasets.make_wave(n_samples=40)
plt.plot(X, y, 'o')
plt.xlabel('Feature')
plt.ylabel('Target')

### まずは sklearn ライブラリで用意されているｋ近傍回帰のアルゴリズムを動かしてみる

※ｋ近傍回帰アルゴリズムの実装ではありません

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import euclidean_distances

cm3 = ListedColormap(['#0000aa', '#ff2020', '#50ff50'])

def plot_knn_regression(n_neighbors=1):

    dist = euclidean_distances(X, X_test)
    closest = np.argsort(dist, axis=0)

    plt.figure(figsize=(10, 6))

    reg = KNeighborsRegressor(n_neighbors=n_neighbors).fit(X, y)
    y_pred = reg.predict(X_test)

    for x, y_, neighbors in zip(X_test, y_pred, closest.T):
        for neighbor in neighbors[:n_neighbors]:
                plt.arrow(x[0], y_, X[neighbor, 0] - x[0], y[neighbor] - y_,
                          head_width=0, fc='k', ec='k')

    train, = plt.plot(X, y, 'o', c=cm3(0))
    test, = plt.plot(X_test, -3 * np.ones(len(X_test)), '*', c=cm3(2),
                     markersize=20)
    pred, = plt.plot(X_test, y_pred, '*', c=cm3(0), markersize=20)
    plt.vlines(X_test, -3.1, 3.1, linestyle="--")
    plt.legend([train, test, pred],
               ["training data/target", "test data", "test prediction"],
               ncol=3, loc=(.1, 1.025))
    plt.ylim(-3.1, 3.1)
    plt.xlabel("Feature")
    plt.ylabel("Target")

    return y_pred

In [None]:
# 学習データ：上で作成済み
# X, y = make_wave(n_samples=40)

# テストデータ：３つのｘ値（からｙ値を予測する）
X_test = np.array([[-1.5], [0.9], [1.5]])

In [None]:
y_test = plot_knn_regression(n_neighbors=1)
print('予測値：', y_test)

In [None]:
# 上と同じくｋ＝３の予測値も求めて、図を見ながらなぜそのような予測がされたのか確認してください。

--- here ---


In [None]:
# 上と同じくｋ＝３の予測値も求めて、図を見ながらなぜそのような予測がされたのか確認してください。

--- here ---

### 学習モデルの精度を検証してみる

ｋの値によってモデルの精度が異なることを確認してください。

* 水色の線：学習データセットから計算された予測値
* 青三角：学習データ
* 赤三角：テストデータ

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split

X, y = mglearn.datasets.make_wave(n_samples=40)

# データセットを学習データとテストデータに分ける
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# instantiate the model and set the number of neighbors to consider to 3
reg = KNeighborsRegressor(n_neighbors=3)
# fit the model using the training data and training targets
reg.fit(X_train, y_train)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# create 1,000 data points, evenly spaced between -3 and 3
line = np.linspace(-3, 3, 1000).reshape(-1, 1)
for n_neighbors, ax in zip([1, 3, 9], axes):
    # make predictions using 1, 3, or 9 neighbors
    reg = KNeighborsRegressor(n_neighbors=n_neighbors)
    reg.fit(X_train, y_train)
    ax.plot(line, reg.predict(line))
    ax.plot(X_train, y_train, '^', c=mglearn.cm2(0),
             markersize=8)
    ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)
    ax.set_title("{} neighbor(s)\n train score: {:.2f} testscore: {:.2f}".format(n_neighbors,
              reg.score(X_train, y_train),reg.score(X_test, y_test)))
    ax.set_xlabel("Feature")
    ax.set_ylabel("Target")
axes[0].legend(["Model predictions", "Training data/target","Testdata/target"], loc="best")

### ｋ近傍回帰アルゴリズムの実装

In [None]:
import numpy as np

class KNNRegression:
    def __init__(self, k):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            # 距離の計算
            distances = np.sqrt(np.sum((self.X_train - x) ** 2, axis=1))
            # 近い順にソート
            indices = np.argsort(distances)[:self.k]
            # ｋの値に応じた平均値を計算
            neighbors = self.y_train[indices]
            prediction = np.mean(neighbors)
            predictions.append(prediction)
        return predictions

In [None]:
# データの準備
X_train = np.array([[1.1], [2.2], [3.3], [4.4], [5.5]])
y_train = np.array([2, 4, 6, 8, 10])
X_test = np.array([[2.5], [4.5]])

# KNN回帰モデルの作成と学習
--- here ---


# テストデータに対する予測
--- here ---


print(X_test)
print(predictions)

In [None]:
# プロットしてみる
plt.plot(X_train, y_train, 'o')
plt.plot(X_test, predictions, 'o')
plt.xlabel('Feature')
plt.ylabel('Target')