In [None]:
# 导入必要的库
import numpy as np
from sklearn.datasets import fetch_lfw_people
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import joblib

# 定义数据加载函数
def load_data(path):
    """加载数据集"""
    return fetch_lfw_people(data_home=path, min_faces_per_person=60, resize=0.4)

# 加载数据
faces = load_data("data/lfw_funneled")

# 准备数据
X = faces.data
y = faces.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 训练 SVM 模型
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
svm_clf = SVC(kernel="linear", class_weight="balanced", random_state=42)
svm_clf.fit(X_train_scaled, y_train)

# 模型评估
X_test_scaled = scaler.transform(X_test)
y_pred = svm_clf.predict(X_test_scaled)
print("分类报告:")
print(classification_report(y_test, y_pred, target_names=faces.target_names))
accuracy = accuracy_score(y_test, y_pred)
print(f"准确率: {accuracy:.2%}")

# 保存模型
joblib.dump(svm_clf, 'data/svm_clf_model.joblib')
joblib.dump(scaler, 'data/scaler_model.joblib')

print("模型已保存到 'data' 目录下。")