In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [4]:
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify=cancer.target, random_state=66)

In [11]:
training_accuracy = []
test_accuracy = []

neighbors_settings = range(1,11) # n_neighbors from 1 to 10

for  n_neighbors in neighbors_settings:
 # 모델 만들기
 clf = KNeighborsClassifier(n_neighbors = n_neighbors)
 clf.fit(X_train, y_train)

 # training set 정확도
 y_train_hat = clf.predict(X_train)
 training_accuracy.append(accuracy_score(y_train, y_train_hat))

 # test set 정확도
 y_test_hat = clf.predict(X_test)
 test_accuracy.append(accuracy_score(y_test, y_test_hat))

In [12]:
import pandas as pd
import numpy as np

In [18]:
breast_df = pd.DataFrame({"k" : [i for i in range(1,11)], "train" : training_accuracy, "test" : test_accuracy})
breast_df

Unnamed: 0,k,train,test
0,1,1.0,0.902098
1,2,0.976526,0.888112
2,3,0.957746,0.923077
3,4,0.955399,0.923077
4,5,0.948357,0.923077
5,6,0.946009,0.937063
6,7,0.943662,0.93007
7,8,0.941315,0.93007
8,9,0.934272,0.916084
9,10,0.938967,0.916084


In [20]:
training_accuracy = []
test_accuracy = []

p_settings = range(1,6) # minkowski p from 1 to 5

for p in p_settings:

  # 모델 만들기
  clf = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=p)
  clf.fit(X_train, y_train)

  # training set
  y_train_hat = clf.predict(X_train)
  training_accuracy.append(accuracy_score(y_train, y_train_hat))

  # test set
  y_test_hat = clf.predict(X_test)
  test_accuracy.append(accuracy_score(y_test, y_test_hat))

In [28]:
breast_df2 = pd.DataFrame({ "k" : [i for i in range(1,6)], "train" : training_accuracy, "test" : test_accuracy})

In [29]:
breast_df2

Unnamed: 0,k,train,test
0,1,0.964789,0.937063
1,2,0.948357,0.923077
2,3,0.943662,0.93007
3,4,0.943662,0.923077
4,5,0.943662,0.923077


In [33]:
import mglearn
X, y = mglearn.datasets.make_wave(n_samples=40)

In [32]:
!pip install mglearn

Collecting mglearn
[?25l  Downloading https://files.pythonhosted.org/packages/65/38/8aced26fce0b2ae82c3c87cd3b6105f38ca6d9d51704ecc44aa54473e6b9/mglearn-0.1.9.tar.gz (540kB)
[K     |████████████████████████████████| 542kB 5.8MB/s 
Building wheels for collected packages: mglearn
  Building wheel for mglearn (setup.py) ... [?25l[?25hdone
  Created wheel for mglearn: filename=mglearn-0.1.9-py2.py3-none-any.whl size=582638 sha256=369cbe5422064977e10db98c21aa37fe0bec7ec25604a258e5ce89136ffa51bf
  Stored in directory: /root/.cache/pip/wheels/eb/a6/ea/a6a3716233fa62fc561259b5cb1e28f79e9ff3592c0adac5f0
Successfully built mglearn
Installing collected packages: mglearn
Successfully installed mglearn-0.1.9


In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=0)

In [35]:
from sklearn.neighbors import KNeighborsRegressor
reg = KNeighborsRegressor(n_neighbors=3)
reg.fit(X_train, y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                    weights='uniform')

In [36]:
y_test_hat = reg.predict(X_test)
print(y_test)
print(y_test_hat)

[ 0.37299129  0.21778193  0.96695428 -1.38773632 -1.05979555 -0.90496988
  0.43655826  0.7789638  -0.54114599 -0.95652133]
[-0.05396539  0.35686046  1.13671923 -1.89415682 -1.13881398 -1.63113382
  0.35686046  0.91241374 -0.44680446 -1.13881398]


In [37]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('MAE :', mean_absolute_error(y_test,y_test_hat))
print("RMSE :", mean_squared_error(y_test,y_test_hat)**0.5)
print('R_square:', r2_score(y_test, y_test_hat))

MAE : 0.2537184941556285
RMSE : 0.32966137736851764
R_square: 0.8344172446249605
