In [7]:
import os
import shutil
import torch
from torchvision import models, transforms
from sklearn.cluster import KMeans
from PIL import Image

# 加载预训练的ResNet模型
model = models.resnet18(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))
model.eval()

# 图像预处理
preprocess = transforms.Compose([
    # transforms.Resize(256),
    # transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 加载图像文件夹中的图像
image_folder_path = "/local/data1/honzh073/data/8bit_downsample_256"
image_file_names = os.listdir(image_folder_path)
images = []
for image_file in image_file_names:
    image_path = os.path.join(image_folder_path, image_file)
    image = Image.open(image_path).convert("RGB")
    image_tensor = preprocess(image)
    images.append(image_tensor)

# 创建一个包含所有图像特征的张量
batch = torch.stack(images)

# 使用ResNet模型提取特征
with torch.no_grad():
    features = model(batch)

# 将特征转换为NumPy数组
features_array = features.squeeze().numpy()

# 使用K均值聚类将图像分成k个类别（这里假设你想要分成k个类别）
k = 2  # 你可以根据需要调整聚类的数量
kmeans = KMeans(n_clusters=k, random_state=0).fit(features_array)

# 创建目标文件夹（例如，cluster_0, cluster_1, ...）
output_folder = "/local/data1/honzh073/data/cluster"
os.makedirs(output_folder, exist_ok=True)

# 将图像复制到相应的文件夹
for i, cluster_label in enumerate(kmeans.labels_):
    cluster_folder = os.path.join(output_folder, f"cluster_{cluster_label}")
    os.makedirs(cluster_folder, exist_ok=True)
    image_file_name = image_file_names[i]
    shutil.copy(os.path.join(image_folder_path, image_file_name), cluster_folder)

print("图像已成功聚类并复制到相应的文件夹中。")


  super()._check_params_vs_input(X, default_n_init=10)


图像已成功聚类并复制到相应的文件夹中。
