In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve

# 1. 讀取 CSV 數據
df = pd.read_csv("data.csv")
df.pop('filename')
df.pop('label')
#df["tempo"] = df["tempo"].astype(int)

# 2. 特徵與標籤分離
xc = df.drop(columns=['beats'])  # 移除目標列，保留特徵
yc = df['beats']  # 提取目標列

# 3. 分割訓練集與測試集
xc_train, xc_test, yc_train, yc_test = train_test_split(xc, yc, test_size=0.2, random_state=None)
# 4. 標準化數據（可選，對 KNN 效果通常有幫助）
scaler = StandardScaler()
xc_train = scaler.fit_transform(xc_train)
xc_test = scaler.transform(xc_test)
# 5. 初始化 KNN 模型
knnClass = KNeighborsClassifier(n_neighbors=1, weights='uniform', metric='euclidean')

# 6. 訓練模型
knnClass.fit(xc_train, yc_train)


# 7. 預測
yc_pred = knnClass.predict(xc_test)


# 8. 評估模型
print(confusion_matrix(yc_test, yc_pred))


[[0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [45]:
print(classification_report(yc_test, yc_pred, zero_division=0.0, digits=3))

              precision    recall  f1-score   support

          26      0.000     0.000     0.000         1
          29      0.000     0.000     0.000         0
          30      0.000     0.000     0.000         1
          32      0.000     0.000     0.000         1
          33      0.000     0.000     0.000         0
          34      0.333     0.500     0.400         2
          35      1.000     0.250     0.400         4
          36      0.000     0.000     0.000         2
          37      0.000     0.000     0.000         2
          38      0.000     0.000     0.000         3
          39      0.000     0.000     0.000         1
          40      0.000     0.000     0.000         1
          41      0.000     0.000     0.000         1
          42      0.000     0.000     0.000         4
          43      0.000     0.000     0.000         4
          44      0.000     0.000     0.000         6
          45      0.000     0.000     0.000         6
          46      0.143    

In [15]:
print(f1_score(yc_test, yc_pred, average='', zero_division=0.0))

0.055681818181818186


In [67]:
yc_scores = knnClass.predict_proba(xc_test)[:, 1]
yc_list = yc_test.values.tolist()
for i in range(200):
    if yc_list[i] < 50:
        yc_list[i] = 0
    else:
        yc_list[i] = 1
precision, recall, thresholds = precision_recall_curve(yc_list, yc_scores)
print(precision)
print(recall)
print(thresholds)

[0.67 1.  ]
[1. 0.]
[0.]
