In [106]:
import os
import warnings

import matplotlib.pyplot as plt
import torch
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

warnings.filterwarnings("ignore")

plt.rcParams['font.family'] = 'Microsoft YaHei'


class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = os.listdir(root_dir)
        self.labels = [0 if 'cat' in img else 1 for img in self.images]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.images[idx])
        image = Image.open(img_name).convert("RGB")

        if self.transform:
            image = self.transform(image)

        label = self.labels[idx]

        return image, label


# 设置数据集路径
dataset_path = '../cc'

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 加载图像
dataset = CustomDataset(root_dir=dataset_path, transform=transform)

In [107]:
# 划分训练集和测试集
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
print("train_size:{} and test_size:{}".format(train_size, test_size))
len(train_dataset[0][0][0])

train_size:2240 and test_size:560


224

In [108]:
# 创建 DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
len(train_dataloader), len(test_dataloader)

(70, 18)

In [110]:
# 使用预训练的ResNet模型提取特征
model = models.resnet18(pretrained=True)
# 去掉模型的最后一层，用于分类任务的全连接层。
# 保留了 ResNet-18 模型的特征提取部分
model = torch.nn.Sequential(*(list(model.children())[:-1]))

# 提取特征
features = []
true_labels = []
model.eval()
with torch.no_grad():
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        outputs = model(images)
        features.extend(outputs.squeeze().numpy())
        true_labels.extend(labels.numpy())

        # 输出每个批次的进度
        print(f'批次 {batch_idx + 1}/{len(train_dataloader)}, 提取特征中...')

print("特征提取完成。")

批次 1/70, 提取特征中...
批次 2/70, 提取特征中...
批次 3/70, 提取特征中...
批次 4/70, 提取特征中...
批次 5/70, 提取特征中...
批次 6/70, 提取特征中...
批次 7/70, 提取特征中...
批次 8/70, 提取特征中...
批次 9/70, 提取特征中...
批次 10/70, 提取特征中...
批次 11/70, 提取特征中...
批次 12/70, 提取特征中...
批次 13/70, 提取特征中...
批次 14/70, 提取特征中...
批次 15/70, 提取特征中...
批次 16/70, 提取特征中...
批次 17/70, 提取特征中...
批次 18/70, 提取特征中...
批次 19/70, 提取特征中...
批次 20/70, 提取特征中...
批次 21/70, 提取特征中...
批次 22/70, 提取特征中...
批次 23/70, 提取特征中...
批次 24/70, 提取特征中...
批次 25/70, 提取特征中...
批次 26/70, 提取特征中...
批次 27/70, 提取特征中...
批次 28/70, 提取特征中...
批次 29/70, 提取特征中...
批次 30/70, 提取特征中...
批次 31/70, 提取特征中...
批次 32/70, 提取特征中...
批次 33/70, 提取特征中...
批次 34/70, 提取特征中...
批次 35/70, 提取特征中...
批次 36/70, 提取特征中...
批次 37/70, 提取特征中...
批次 38/70, 提取特征中...
批次 39/70, 提取特征中...
批次 40/70, 提取特征中...
批次 41/70, 提取特征中...
批次 42/70, 提取特征中...
批次 43/70, 提取特征中...
批次 44/70, 提取特征中...
批次 45/70, 提取特征中...
批次 46/70, 提取特征中...
批次 47/70, 提取特征中...
批次 48/70, 提取特征中...
批次 49/70, 提取特征中...
批次 50/70, 提取特征中...
批次 51/70, 提取特征中...
批次 52/70, 提取特征中...
批次 53/70, 提取特征中...
批次

In [111]:
from sklearn.model_selection import GridSearchCV

# 使用PCA降维
n_components = 150
pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(features)
features_pca = pca.transform(features)
features_pca

array([[ 1.18205336e+00, -7.29721801e-01, -1.12696956e+00, ...,
         7.32098361e-01, -5.25039198e-01, -6.22853315e-01],
       [ 1.48848156e+00,  6.36829829e-01, -2.84690868e-01, ...,
        -1.57586254e+00,  5.72960207e-01,  8.94853394e-01],
       [ 1.33407889e+00,  1.80455261e-03, -3.19030747e-01, ...,
        -4.09173442e-01, -6.78014844e-01,  2.56135942e-01],
       ...,
       [ 1.71640993e+00, -1.16847777e+00,  9.38286791e-01, ...,
         3.96090773e-01, -5.96484077e-01, -8.47643643e-01],
       [ 1.57049856e+00, -3.35172163e-01, -1.93209663e+00, ...,
        -1.88350876e+00,  1.08855698e-02, -8.49626104e-01],
       [-1.05756015e+00, -1.14169851e+00,  2.77216889e-01, ...,
        -1.18589317e+00, -1.47108229e+00,  3.64303314e-01]])

In [112]:
from time import time

# 训练SVM模型
print("训练SVM分类模型")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]}
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
clf = clf.fit(features_pca, true_labels)
print("耗时 %0.3fs" % (time() - t0))

训练SVM分类模型
耗时 21.214s


In [113]:
print("网格搜索找到的最佳估计器:")
print(clf.best_estimator_)

网格搜索找到的最佳估计器:
SVC(C=1000.0, class_weight='balanced', gamma=0.001)


In [114]:
# 在测试集上预测
test_features = []
test_true_labels = []
model.eval()
with torch.no_grad():
    for images, labels in test_dataloader:
        outputs = model(images)
        test_features.extend(outputs.squeeze().numpy())
        test_true_labels.extend(labels.numpy())

# 使用PCA降维
test_features_pca = pca.transform(test_features)
# 在测试集上预测
test_pred_labels = clf.predict(test_features_pca)
test_pred_labels

array([0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,

In [115]:
i = 0
true_labels_test = []
for batch_idx, (images, labels) in enumerate(test_dataset):
    print(i, labels)
    true_labels_test.append(labels)
    i += 1

len(true_labels_test)

0 0
1 0
2 0
3 1
4 0
5 1
6 1
7 1
8 0
9 0
10 1
11 1
12 0
13 0
14 0
15 0
16 1
17 0
18 0
19 1
20 1
21 0
22 0
23 0
24 0
25 1
26 1
27 1
28 0
29 0
30 0
31 1
32 0
33 0
34 1
35 1
36 0
37 0
38 0
39 0
40 1
41 1
42 0
43 0
44 0
45 1
46 1
47 0
48 1
49 1
50 0
51 1
52 1
53 0
54 1
55 0
56 0
57 0
58 0
59 0
60 1
61 0
62 1
63 1
64 1
65 1
66 1
67 1
68 1
69 1
70 1
71 0
72 1
73 1
74 1
75 0
76 0
77 1
78 0
79 0
80 1
81 0
82 0
83 0
84 1
85 1
86 0
87 1
88 1
89 0
90 1
91 1
92 1
93 0
94 1
95 1
96 0
97 1
98 1
99 0
100 1
101 0
102 1
103 0
104 1
105 1
106 0
107 1
108 1
109 1
110 0
111 0
112 0
113 0
114 1
115 1
116 0
117 1
118 1
119 1
120 1
121 1
122 1
123 1
124 1
125 0
126 1
127 0
128 0
129 1
130 1
131 1
132 1
133 0
134 0
135 1
136 0
137 0
138 1
139 1
140 1
141 0
142 1
143 1
144 1
145 0
146 0
147 0
148 0
149 1
150 1
151 1
152 0
153 1
154 0
155 0
156 0
157 0
158 1
159 1
160 1
161 0
162 0
163 1
164 0
165 0
166 0
167 1
168 1
169 1
170 0
171 0
172 0
173 0
174 0
175 1
176 1
177 0
178 1
179 1
180 0
181 1
182 1
183 1
184 0


560

In [116]:
import numpy as np

array_test = np.array(true_labels_test - test_pred_labels)
print(len(array_test))
len(array_test) - np.count_nonzero(array_test)  # 长度减去非0的数就是0的数，0就是说真实和预测一样

560


537

In [117]:
num_all = len(array_test)
is_true = len(array_test) - np.count_nonzero(array_test)
is_true / num_all

0.9589285714285715

In [118]:
# 输出分类报告和混淆矩阵
target_names = ['cat', 'dog']
print("分类报告：")
print(classification_report(test_true_labels, test_pred_labels, target_names=target_names))
print("混淆矩阵:")
print(confusion_matrix(test_true_labels, test_pred_labels))

分类报告：
              precision    recall  f1-score   support

         cat       0.96      0.96      0.96       277
         dog       0.96      0.96      0.96       283

    accuracy                           0.96       560
   macro avg       0.96      0.96      0.96       560
weighted avg       0.96      0.96      0.96       560

混淆矩阵:
[[266  11]
 [ 12 271]]


In [None]:
# 保存训练好的SVM模型
# torch.save(clf, 'svm_model.pth') # 0.9642857142857143

In [None]:
# 保存PCA模型
# torch.save(pca, 'pca_model.pth')

In [121]:
# 加载保存的SVM模型
saved_model_path = './pth/svm_model.pth'
clf = torch.load(saved_model_path)

# 读取新图像
new_image_path = r"C:\Users\lenovo\Downloads\archive\cat和dog\test\dog\dog.1487.jpg"
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

image = Image.open(new_image_path).convert("RGB")
image = transform(image).unsqueeze(0)

# 提取特征
model = models.resnet18(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))
model.eval()

with torch.no_grad():
    outputs = model(image)

features = outputs.squeeze().numpy()

# 使用PCA降维
features_pca = pca.transform(features.reshape(1, -1))

# 预测
predicted_label = clf.predict(features_pca)[0]

# 映射预测标签到类别名称
predicted_class = 'cat' if predicted_label == 0 else 'dog'

print(f'新图像的预测类别是：{predicted_class}')

新图像的预测类别是：dog
