In [1]:
from sklearn import metrics, datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import numpy as np
%matplotlib inline

In [2]:
X, y = datasets.make_regression(n_features=1, random_state=42, noise=4) # 生成資料
model = LinearRegression() # 建立回歸模型
model.fit(X, y) # 將資料放進模型訓練
prediction = model.predict(X) # 進行預測
mae = metrics.mean_absolute_error(prediction, y) # 使用 MAE 評估
mse = metrics.mean_squared_error(prediction, y) # 使用 MSE 評估
r2 = metrics.r2_score(prediction, y) # 使用 r-square 評估
print("MAE: ", mae)
print("MSE: ", mse)
print("R-square: ", r2)

MAE:  2.841797252565566
MSE:  12.48868006739824
R-square:  0.9916581036260311


In [3]:
cancer = datasets.load_breast_cancer() # 我們使用 sklearn 內含的乳癌資料集
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=50, random_state=0)

In [4]:
print(y_test)

[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0]


In [5]:
y_pred = np.random.random((50,))
print(y_pred)

[9.86808600e-02 8.89714582e-01 8.82794627e-02 4.66587855e-01
 2.83004901e-01 7.04318903e-01 4.31305012e-01 8.87477022e-01
 8.43912229e-01 3.05456331e-01 8.11935982e-01 7.19530208e-01
 9.92316366e-01 9.84544244e-01 6.01094108e-04 1.13600939e-01
 2.18504157e-01 3.85291808e-01 3.15171225e-01 1.16722171e-01
 2.12452861e-01 9.93941383e-01 9.77895779e-01 3.21697590e-01
 5.31790751e-01 2.53579604e-01 4.71134843e-02 2.25049177e-01
 2.25844052e-01 3.31124453e-01 6.86886598e-01 8.24864783e-01
 9.88543055e-01 8.52420644e-01 4.12180387e-01 6.79250083e-01
 4.70309051e-01 4.97528299e-01 5.07076581e-02 1.68835598e-01
 2.67412972e-01 3.90462878e-03 4.57259494e-01 6.81647821e-01
 7.27880565e-01 8.24928135e-01 7.90952278e-01 4.58543782e-01
 8.19735635e-01 5.72703707e-01]


In [6]:
auc = metrics.roc_auc_score(y_test, y_pred) # 使用 roc_auc_score 來評估。 **這邊特別注意 y_pred 必須要放機率值進去!**
print("AUC: ", auc)

AUC:  0.5721561969439728


In [8]:
threshold = 0.5 

y_pred_binarized = np.where(y_pred>threshold, 1, 0) # 使用 np.where 函數, 將 y_pred > 0.5 的值變為 1，小於 0.5 的為 0
f1 = metrics.f1_score(y_test, y_pred_binarized) # 使用 F1-Score 評估
precision = metrics.precision_score(y_test, y_pred_binarized) # 使用 Precision 評估
recall  = metrics.recall_score(y_test, y_pred_binarized) # 使用 recall 評估
print("F1-Score: ", f1) 
print("Precision: ", precision)
print("Recall: ", recall)

F1-Score:  0.5660377358490567
Precision:  0.6818181818181818
Recall:  0.4838709677419355


In [10]:
f2 = ((5*precision*recall) / ((4*precision)+recall))
print("F2-Score: ", f2)

F2-Score:  0.5136986301369862
