In [13]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [14]:
raw_iris = datasets.load_iris()
X = raw_iris.data
y = raw_iris.target

In [15]:
X_tn, X_te, y_tn, y_te = train_test_split(X,y,random_state=0)
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)

In [16]:
best_accuracy = 0
for k in range(1, 11):
    clf_knn = KNeighborsClassifier(n_neighbors=k)
    clf_knn.fit(X_tn_std, y_tn)
    knn_pred = clf_knn.predict(X_te_std)
    accuracy = accuracy_score(y_te, knn_pred)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        final_k = {"k": k}

In [17]:
final_k

{'k': 3}

In [18]:
accuracy

0.9736842105263158

분류 문제에서 성능 평가

In [None]:
y_pred = [0, 2, 1, 3]
y_true = [0, 1, 2, 3]
print(accuracy_score(y_true, y_pred))
print(accuracy_score(y_true, y_pred, normalize=False))

0.5
2.0


In [None]:
y_true = [2, 0, 2, 2, 0, 1]
y_pred = [0, 0, 2, 2, 0, 2]
confusion_matrix(y_true, y_pred) # by class

array([[2, 0, 0],
       [0, 0, 1],
       [1, 0, 2]])

In [24]:
y_true = [0, 1, 2, 2, 0]
y_pred = [0, 0, 2, 1, 0]
target_names = ["class_0", "class_1", "class_2"]
print(classification_report(y_true, y_pred, target_names=target_names))
# macro_avg: 라벨별 가중치를 부여하지 않은 평균값
# weighted_avg: support(y_true, 클래별 데이터 갯수)-weighted된 평균값

              precision    recall  f1-score   support

     class_0       0.67      1.00      0.80         2
     class_1       0.00      0.00      0.00         1
     class_2       1.00      0.50      0.67         2

    accuracy                           0.60         5
   macro avg       0.56      0.50      0.49         5
weighted avg       0.67      0.60      0.59         5



In [34]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

회귀 문제에서의 성능 평가

In [28]:
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]

In [29]:
mean_absolute_error(y_true, y_pred)

0.5

In [30]:
mean_squared_error(y_true, y_pred)

0.375

In [33]:
r2_score(y_true, y_pred)

0.9486081370449679

군집 문제에서의 성능 평가

In [36]:
from sklearn.metrics import silhouette_score

In [None]:
X = [[1, 2], [4, 5], [2, 1], [6, 7], [2, 3]]
labels = [0, 1, 0, 1, 0]
silhouette_score(X, labels)

np.float64(0.5789497702625118)