In [1]:
import os
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image

# 加载预训练的 ResNet18 模型
model = models.resnet18(pretrained=True)
model = nn.Sequential(*list(model.children())[:-1])  # 去掉最后的分类层
model = model.to('cuda')

# 定义图像预处理
preprocess = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 使用 ImageFolder 加载数据集并应用预处理
class CustomImageFolder(ImageFolder):
    def __init__(self, root, transform=None):
        super(CustomImageFolder, self).__init__(root, transform=transform)

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        return sample  # 只返回图像张量，不返回标签

# 图像文件夹路径
folder_path = "E:/Data/JHA/CASIA_char_imgs/Gnttest"

# 创建数据集和 DataLoader
dataset = CustomImageFolder(folder_path, transform=preprocess)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)  # 批量加载

# 提取图像特征
all_features = []
with torch.no_grad():
    for batch in dataloader:
        batch = batch.to('cuda')
        features = model(batch)  # 批量特征提取
        all_features.append(features.cpu())  # 将特征移动到 CPU 并存储

# 将所有特征合并成一个张量
all_features = torch.cat(all_features, dim=0).squeeze()

# 将特征转为 NumPy 数组
features_np = all_features.numpy()
print(f"All features shape: {features_np.shape}")

# 可视化第一个图像的特征
plt.plot(features_np[0].flatten())  # 展平并绘制第一个图像的特征
plt.show()


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
from sklearn.decomposition import PCA


pca = PCA(n_components=2)
pca.fit(features_np)
plt.scatter(pca.components_[0], pca.components_[1])

NameError: name 'PCA' is not defined