分类模型评估

作者：谢文伟

邮件：jim.xie.cn@outlook.com

主页：https://github.com/jim-xie-cn/ai-cv

导入用到的开发库

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.metrics import precision_score,recall_score,f1_score,roc_curve,auc
import matplotlib.pyplot as plt
import numpy as np

In [None]:
X, Y  = make_classification(n_samples=500, n_features=50, n_classes=2)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.20,shuffle=False)
model = GradientBoostingClassifier() #训练决策树模型
model.fit(X_train,Y_train)
pred_y=model.predict(X_test) #得到模型的预测值

In [None]:
print("混淆矩阵(Confusion Matrix):\n",confusion_matrix(Y_test, pred_y))
print("准确率(Accuracy):",accuracy_score(Y_test,pred_y))
print("精准率(Precision):",precision_score(Y_test,pred_y))
print("召回率(Recall):",recall_score(Y_test,pred_y))
print("F1分值(F1-Score):",f1_score(Y_test,pred_y))
print("分类报告:\n",classification_report(Y_test,pred_y,target_names=['1','0']))

In [None]:
proba_y=model.predict_proba(X_test) #使用模型预测，输出分类的概率
scores = proba_y[:,1]               #scores为属于正例(标签为1)的概率
fpr, tpr, thresholds = roc_curve(Y_test,scores) #调用函数，得到fpr、tpr和对应的阈值
roc_auc = auc(fpr, tpr) #根据fpr和tpr计算AUC值（即ROC曲线下方的面积）
#可视化计算结果
plt.plot(fpr, tpr, label='AUC (area = {0:.2f})'.format(roc_auc))
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC Curve')
plt.legend(loc="lower right")

# 回归模型评估

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np
import math
#随机生成5000组样本数据
X,Y=make_regression(n_samples=5000, n_features=10,n_targets=1,noise=0.25)
#划分成训练集和测试集，测试集占总样本的10%，训练集占90%，不打乱顺序
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,
                                               test_size=0.10,
                                               shuffle=False)
#训练线性回归模型，并在测试集上进行预测
model = LinearRegression()
model.fit(X_train, Y_train)
pred_y = model.predict(X_test)
#评估模型表现
print("平均绝对值误差(MAE):",mean_absolute_error(Y_test,pred_y))
print("均方误差(MSE):",mean_squared_error(Y_test,pred_y))
print("均方根误差(RMSE):",math.sqrt(mean_squared_error(Y_test,pred_y)))
r_square=r2_score(Y_test,pred_y)
print("决定系数(R²)",r_square)
n,p = X_train.shape
print("校正决定系数(adjusted R square)",(1-((1-r_square)*(n-1))/(n-p-1)))