In [1]:
import os
import cv2
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
from sklearn.decomposition import PCA

def load_data_from_folder(folder_path, image_size=(32, 32)):
    X, y = [], []
    for label_name in ['real', 'fake']:
        label_dir = os.path.join(folder_path, label_name)
        label = 0 if label_name == 'real' else 1

        for fname in tqdm(os.listdir(label_dir), desc=f"Loading {label_name}"):
            fpath = os.path.join(label_dir, fname)
            img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, image_size)
                X.append(img.flatten())  # 将图像展平为1D向量
                y.append(label)
    return np.array(X), np.array(y)

BASE_DIR = r'/root/Project/datasets/Celeb_V2/IdentitySplit'

train_dir = os.path.join(BASE_DIR, 'Train')
test_dir  = os.path.join(BASE_DIR, 'Val')

X_train, y_train = load_data_from_folder(train_dir)
X_test, y_test   = load_data_from_folder(test_dir)
print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

Loading real: 100%|██████████| 33217/33217 [00:10<00:00, 3205.99it/s]
Loading fake: 100%|██████████| 31936/31936 [00:09<00:00, 3334.55it/s]
Loading real: 100%|██████████| 7071/7071 [00:02<00:00, 3306.22it/s]
Loading fake: 100%|██████████| 8600/8600 [00:02<00:00, 3257.53it/s]

Train shape: (65153, 1024), Test shape: (15671, 1024)





In [2]:
pca = PCA(n_components=100)  # 可调整维度数
X_train_pca = pca.fit_transform(X_train)
X_test_pca  = pca.transform(X_test)

clf = LinearSVC()
clf.fit(X_train_pca, y_train)

# 预测
y_pred = clf.predict(X_test_pca)

# 评估
print("准确率：", accuracy_score(y_test, y_pred))
print("\n分类报告：\n", classification_report(y_test, y_pred))

准确率： 0.5298321740795099

分类报告：
               precision    recall  f1-score   support

           0       0.48      0.48      0.48      7071
           1       0.57      0.57      0.57      8600

    accuracy                           0.53     15671
   macro avg       0.53      0.53      0.53     15671
weighted avg       0.53      0.53      0.53     15671



In [3]:
import joblib

# 保存模型和 PCA
joblib.dump(clf, 'svc_model.pkl')
joblib.dump(pca, 'pca_model.pkl')

['pca_model.pkl']