In [1]:
import os
import numpy as np
import cv2
import joblib
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

In [2]:
def load_images_from_folder(folder_path, label, img_size=(64, 64)):
    data = []
    labels = []
    for filename in tqdm(os.listdir(folder_path), desc=f"Loading {label} from {os.path.basename(folder_path)}"):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            img_path = os.path.join(folder_path, filename)
            try:
                img = cv2.imread(img_path)  # BGR 图像
                if img is None:
                    continue
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 转换为 RGB
                img_resized = cv2.resize(img, img_size)
                img_flatten = img_resized.flatten()
                data.append(img_flatten)
                labels.append(label)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
    return np.array(data), np.array(labels)

In [3]:
def load_dataset(base_path, img_size=(64, 64)):
    sets = ['Train', 'Val', 'Test']
    data = {}
    for set_name in sets:
        real_path = os.path.join(base_path, set_name, 'real')
        fake_path = os.path.join(base_path, set_name, 'fake')

        X_real, y_real = load_images_from_folder(real_path, 'real', img_size)
        X_fake, y_fake = load_images_from_folder(fake_path, 'fake', img_size)

        X = np.concatenate([X_real, X_fake], axis=0)
        y = np.concatenate([y_real, y_fake], axis=0)

        data[set_name] = (X, y)
    return data

In [7]:
dataset_path = r'/root/Project/dataset/Celeb_V2'  # 改成你本地的路径
data = load_dataset(dataset_path, img_size=(64, 64))

X_train, y_train = data['Train']
X_val, y_val = data['Val']
X_test, y_test = data['Test']

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_val_enc = encoder.transform(y_val)
y_test_enc = encoder.transform(y_test)


Loading real from real:   1%|          | 182/33217 [00:00<00:18, 1813.20it/s]

Loading real from real: 100%|██████████| 33217/33217 [00:16<00:00, 1991.99it/s]
Loading fake from fake: 100%|██████████| 31936/31936 [00:16<00:00, 1923.53it/s]
Loading real from real: 100%|██████████| 7071/7071 [00:03<00:00, 1963.90it/s]
Loading fake from fake: 100%|██████████| 8600/8600 [00:04<00:00, 1957.19it/s]
Loading real from real: 100%|██████████| 5036/5036 [00:02<00:00, 1972.08it/s]
Loading fake from fake: 100%|██████████| 5067/5067 [00:02<00:00, 1985.15it/s]


In [None]:
svc = SVC(kernel='linear', probability=True)
svc.fit(X_train, y_train_enc)

# 验证集性能
y_val_pred = svc.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val_enc, y_val_pred))


In [None]:
os.makedirs("models", exist_ok=True)
joblib.dump(svc, "models/svc_model_cv2.joblib")
joblib.dump(encoder, "models/label_encoder_cv2.joblib")
print("模型和标签编码器已保存。")


In [None]:
loaded_svc = joblib.load("models/svc_model_cv2.joblib")
loaded_encoder = joblib.load("models/label_encoder_cv2.joblib")

y_test_pred = loaded_svc.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test_enc, y_test_pred))
print("Classification Report:\n", classification_report(y_test_enc, y_test_pred, target_names=loaded_encoder.classes_))


In [None]:
def plot_sample_predictions(X, y_true, y_pred, n=5, img_size=(64, 64)):
    plt.figure(figsize=(15, 3))
    for i in range(n):
        img = X[i].reshape((*img_size, 3))
        true_label = encoder.inverse_transform([y_true[i]])[0]
        pred_label = encoder.inverse_transform([y_pred[i]])[0]
        plt.subplot(1, n, i+1)
        plt.imshow(img.astype(np.uint8))
        plt.title(f'True: {true_label}\nPred: {pred_label}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

plot_sample_predictions(X_test, y_test_enc, y_test_pred, n=5)
