In [1]:
from sklearn import metrics, datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import numpy as np
%matplotlib inline

In [2]:
X, y = datasets.make_regression(n_features=1, random_state=42, noise=4) # 生成資料
model = LinearRegression() # 建立回歸模型
model.fit(X, y) # 將資料放進模型訓練
prediction = model.predict(X) # 進行預測
mae = metrics.mean_absolute_error(prediction, y) # 使用 MAE 評估
mse = metrics.mean_squared_error(prediction, y) # 使用 MSE 評估
r2 = metrics.r2_score(prediction, y) # 使用 r-square 評估
print("MAE: ", mae)
print("MSE: ", mse)
print("R-square: ", r2)

MAE:  2.8417972525655673
MSE:  12.488680067398239
R-square:  0.9916581036260311


In [3]:
cancer = datasets.load_breast_cancer() # 我們使用 sklearn 內含的乳癌資料集
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=50, random_state=0)

In [4]:
print(y_test) # 測試集中的 label

[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0]


In [5]:
y_pred = np.random.random((50,)) # 我們先隨機生成 50 筆預測值，範圍都在 0~1 之間，代表機率值

In [6]:
print(y_pred)

[2.73673394e-01 9.35415848e-01 7.94239735e-01 1.17207548e-01
 1.41560898e-01 1.36764660e-01 7.10961130e-01 7.83220838e-01
 2.90507126e-01 4.04086662e-01 6.86246689e-01 9.21569355e-02
 2.99342266e-01 4.35637328e-01 5.05619688e-02 2.68482016e-01
 6.21685486e-01 7.32816385e-01 6.30311745e-01 6.08545668e-01
 9.87198141e-01 3.72272153e-01 3.17301790e-01 4.92495898e-01
 6.54639889e-01 1.48334479e-01 3.79144307e-01 3.16413726e-04
 3.87957238e-01 2.76460943e-02 6.69687124e-01 9.67554323e-01
 4.48573326e-01 6.91595012e-01 5.97241329e-01 9.06481169e-01
 9.75750948e-01 7.46712585e-01 8.93535441e-01 4.96730681e-01
 5.30514591e-01 7.75861129e-01 2.74992608e-01 9.11492601e-01
 7.25106445e-01 2.19526479e-01 5.81608471e-01 8.01975254e-01
 9.08259034e-01 9.52096167e-01]


In [7]:
auc = metrics.roc_auc_score(y_test, y_pred) # 使用 roc_auc_score 來評估。 **這邊特別注意 y_pred 必須要放機率值進去!**
print("AUC: ", auc) # 得到結果約 0.5，與亂猜的結果相近，因為我們的預測值是用隨機生成的

AUC:  0.4940577249575552


In [8]:
threshold = 0.5 
y_pred_binarized = np.where(y_pred>threshold, 1, 0) # 使用 np.where 函數, 將 y_pred > 0.5 的值變為 1，小於 0.5 的為 0
f1 = metrics.f1_score(y_test, y_pred_binarized) # 使用 F1-Score 評估
precision = metrics.precision_score(y_test, y_pred_binarized) # 使用 Precision 評估
recall  = metrics.recall_score(y_test, y_pred_binarized) # 使用 recall 評估
print("F1-Score: ", f1) 
print("Precision: ", precision)
print("Recall: ", recall)

F1-Score:  0.5517241379310345
Precision:  0.5925925925925926
Recall:  0.5161290322580645


In [9]:
from sklearn.metrics import precision_score, recall_score, fbeta_score

In [10]:
y_pred = np.random.randint(2, size=100)
y_true = np.random.randint(2, size=100)

In [11]:
def custom_fbeta_score(y_true, y_pred, beta=1):
    precision = precision_score(y_true, y_pred) # 計算 Precision
    recall = recall_score(y_true, y_pred) # 計算 Recall
    
    fbeta = (1+ (beta)**2) * (precision*recall) / (((beta)**2*precision) + recall)
    return fbeta  

In [12]:
print(custom_fbeta_score(y_true, y_pred, beta=2))

0.5430711610486891


In [13]:
fbeta_score(y_true, y_pred, beta=2)

0.5430711610486891