In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
import os

# 检查压缩包路径（根据实际路径修改）
zip_path = '/content/drive/MyDrive/archive.zip'  # 如果在子文件夹中：'/content/drive/MyDrive/data/dataset.zip'

# 创建解压目标文件夹
!mkdir -p /content/dataset  # 在Colab临时空间解压（速度快）

# 解压到/content/dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

print("解压完成！文件列表：", os.listdir('/content/dataset'))


解压完成！文件列表： ['Celeb_V2']


In [4]:
import os
import cv2
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
from sklearn.decomposition import PCA

In [5]:
def load_data_from_folder(folder_path, image_size=(32, 32)):
    X, y = [], []
    for label_name in ['real', 'fake']:
        label_dir = os.path.join(folder_path, label_name)
        label = 0 if label_name == 'real' else 1

        for fname in tqdm(os.listdir(label_dir), desc=f"Loading {label_name}"):
            fpath = os.path.join(label_dir, fname)
            img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, image_size)
                X.append(img.flatten())  # 将图像展平为1D向量
                y.append(label)
    return np.array(X), np.array(y)


In [8]:
BASE_DIR = r'/content/dataset/Celeb_V2'

train_dir = os.path.join(BASE_DIR, 'Train')
test_dir  = os.path.join(BASE_DIR, 'Test')

In [7]:
X_train, y_train = load_data_from_folder(train_dir)
X_test, y_test   = load_data_from_folder(test_dir)
print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

Loading real: 100%|██████████| 40288/40288 [00:19<00:00, 2077.44it/s]
Loading fake: 100%|██████████| 40536/40536 [00:45<00:00, 881.88it/s] 
Loading real: 100%|██████████| 5036/5036 [00:05<00:00, 971.31it/s] 
Loading fake: 100%|██████████| 5067/5067 [00:05<00:00, 895.29it/s] 

Train shape: (80824, 1024), Test shape: (10103, 1024)





In [9]:
pca = PCA(n_components=100)  # 可调整维度数
X_train_pca = pca.fit_transform(X_train)
X_test_pca  = pca.transform(X_test)

In [10]:
clf = LinearSVC()
clf.fit(X_train_pca, y_train)

# 预测
y_pred = clf.predict(X_test_pca)

# 评估
print("准确率：", accuracy_score(y_test, y_pred))
print("\n分类报告：\n", classification_report(y_test, y_pred))


准确率： 0.601999406116995

分类报告：
               precision    recall  f1-score   support

           0       0.61      0.55      0.58      5036
           1       0.59      0.66      0.62      5067

    accuracy                           0.60     10103
   macro avg       0.60      0.60      0.60     10103
weighted avg       0.60      0.60      0.60     10103



In [11]:
import joblib

# 保存模型和 PCA
joblib.dump(clf, 'svc_model.pkl')
joblib.dump(pca, 'pca_model.pkl')

# 保存到 Google Drive
!mv svc_model.pkl /content/drive/MyDrive/
!mv pca_model.pkl /content/drive/MyDrive/
