In [1]:
# net.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.resnet import resnet50
import os
from torchvision import transforms


# stage one ,unsupervised learning
class SimCLRStage1(nn.Module):
    def __init__(self, feature_dim=128):
        super(SimCLRStage1, self).__init__()

        self.f = []
        for name, module in resnet50().named_children():
            if name == 'conv1':
                module = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if not isinstance(module, nn.Linear) and not isinstance(module, nn.MaxPool2d):
                self.f.append(module)
        # encoder
        self.f = nn.Sequential(*self.f)
        # projection head
        self.g = nn.Sequential(nn.Linear(2048, 512, bias=False),
                               nn.BatchNorm1d(512),
                               nn.ReLU(inplace=True),
                               nn.Linear(512, feature_dim, bias=True))

    def forward(self, x):
        x = self.f(x)
        feature = torch.flatten(x, start_dim=1)
        out = self.g(feature)
        return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)



class Loss(torch.nn.Module):
    def __init__(self):
        super(Loss,self).__init__()

    def forward(self,out_1,out_2,batch_size,temperature=0.5):
        # [2*B, D]
        out = torch.cat([out_1, out_2], dim=0)
        # [2*B, 2*B]
        sim_matrix = torch.exp(torch.mm(out, out.t().contiguous()) / temperature)
        mask = (torch.ones_like(sim_matrix) - torch.eye(2 * batch_size, device=sim_matrix.device)).bool()
        # [2*B, 2*B-1]
        sim_matrix = sim_matrix.masked_select(mask).view(2 * batch_size, -1)

        # 分子： *为对应位置相乘，也是点积
        # compute loss
        pos_sim = torch.exp(torch.sum(out_1 * out_2, dim=-1) / temperature)
        # [2*B]
        pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
        return (- torch.log(pos_sim / sim_matrix.sum(dim=-1))).mean()


train_transform = transforms.Compose([
    transforms.Resize(256),  # 首先将图像缩放到256x256像素
    transforms.CenterCrop(224),  # 然后从中心裁剪到224x224像素
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.ToTensor(),
    transforms.Normalize([0.4914], [0.2023])])

test_transform = transforms.Compose([
    transforms.Resize(256),  # 首先将图像缩放到256x256像素
    transforms.CenterCrop(224),  # 然后从中心裁剪到224x224像素
    transforms.ToTensor(),
    transforms.Normalize([0.4914], [0.2023])])





In [2]:
import os
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
import math
import random
import time



class TestImageDataset:
    def __init__(self, dataset_path, transform):
        self.dataset_path = dataset_path
        self.image_files = self.load_image_files()
        self.transform = transform
        self.count = 0

    def max_boundary(self, a, s):
        return min(s - a[0], a[0], s - a[1], a[1])

    def load_image_files(self):
        # 加载数据集目录中的所有图像文件
        image_files = []
        for filename in os.listdir(self.dataset_path):
            if filename.endswith(('.jpg', '.png', '.jpeg', '.bmp')):
                image_files.append(os.path.join(self.dataset_path, filename))
        return image_files

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        # 加载图像文件
        image_path = self.image_files[index]
        image = cv2.imdecode(np.fromfile(image_path,dtype=np.uint8),-1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = (255 - image)
        
        # 创建一个15x15的矩形结构元素
        kernel = np.ones((15,15), np.uint8)
        # 对图像进行膨胀操作
        dilated_image = cv2.dilate(image, kernel, iterations=2)
        mask = cv2.erode(dilated_image, kernel, iterations=4)
        mask[mask>0] = 255
        
        indices = np.argwhere(mask == 255)
        # 1. 获取文件名
        image_name_with_suffix = os.path.basename(image_path)
        # 2. 移除后缀
        image_name_without_suffix = os.path.splitext(image_name_with_suffix)[0]
        # 3. 提取"_"之后的部分
        extracted_part = image_name_without_suffix.split('_')[1]
        area_median = float(extracted_part)
        min_boundary = int(math.sqrt(area_median) * 10)
        for i in range(indices.shape[0]):
            L_center = indices[np.random.choice(indices.shape[0])]
            if self.max_boundary(L_center, image.shape[0]) > min_boundary: break

        L_random_integer = random.randint(min_boundary, min(self.max_boundary(L_center, image.shape[0]), min_boundary * 2))
        L_cropped_image = image[L_center[0] - L_random_integer: L_center[0] + L_random_integer, L_center[1] - L_random_integer: L_center[1] + L_random_integer]
        cv2.imwrite('/root/project/test1/' + image_name_without_suffix + '_' + str(int(self.count / len(self.image_files))) + '.jpg',L_cropped_image)
        self.count += 1

        L_cropped_image = Image.fromarray(L_cropped_image)

        if self.transform is not None:
            L_img = self.transform(L_cropped_image)

        return L_img, image_name_without_suffix



class TrainImageDataset:
    def __init__(self, dataset_path, transform):
        self.dataset_path = dataset_path
        self.image_files = self.load_image_files()
        self.transform = transform

    def max_boundary(self, a, s):
        return min(s - a[0], a[0], s - a[1], a[1])

    def load_image_files(self):
        # 加载数据集目录中的所有图像文件
        image_files = []
        for filename in os.listdir(self.dataset_path):
            if filename.endswith(('.jpg', '.png', '.jpeg', '.bmp')):
                image_files.append(os.path.join(self.dataset_path, filename))
        return image_files

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        # 加载图像文件
        image_path = self.image_files[index]
        image = cv2.imdecode(np.fromfile(image_path,dtype=np.uint8),-1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = (255 - image)
        # 创建一个15x15的矩形结构元素
        kernel = np.ones((15,15), np.uint8)
        # 对图像进行膨胀操作
        dilated_image = cv2.dilate(image, kernel, iterations=2)
        mask = cv2.erode(dilated_image, kernel, iterations=4)
        mask[mask>0] = 255

        indices = np.argwhere(mask == 255)
        # 1. 获取文件名
        image_name_with_suffix = os.path.basename(image_path)
        # 2. 移除后缀
        image_name_without_suffix = os.path.splitext(image_name_with_suffix)[0]
        # 3. 提取"_"之后的部分
        extracted_part = image_name_without_suffix.split('_')[1]
        area_median = float(extracted_part)
        min_boundary = int(math.sqrt(area_median) * 10)

        if indices.shape[0] == 0: print('warning')
        for i in range(indices.shape[0]):
            L_center = indices[np.random.choice(indices.shape[0])]
            if self.max_boundary(L_center, image.shape[0]) > min_boundary: break

        L_random_integer = random.randint(min_boundary, min(self.max_boundary(L_center, image.shape[0]), min_boundary * 2))
        L_cropped_image = image[L_center[0] - L_random_integer: L_center[0] + L_random_integer, L_center[1] - L_random_integer: L_center[1] + L_random_integer]

        L_cropped_image = Image.fromarray(L_cropped_image)
        if self.transform is not None:
            L_img = self.transform(L_cropped_image)

        for i in range(indices.shape[0]):
            R_center = indices[np.random.choice(indices.shape[0])]
            if np.linalg.norm(R_center - L_center) > 1024: continue 
            if self.max_boundary(R_center, image.shape[0]) > min_boundary: break

        R_random_integer = random.randint(min_boundary, min(self.max_boundary(R_center, image.shape[0]), min_boundary * 2))
        R_cropped_image = image[R_center[0] - R_random_integer: R_center[0] + R_random_integer, R_center[1] - R_random_integer: R_center[1] + R_random_integer]

        R_cropped_image = Image.fromarray(R_cropped_image)

        # Image.fromarray(image).show()
        # Image.fromarray(mask).show()
        # L_cropped_image.show()
        # R_cropped_image.show()

        if self.transform is not None:
            R_img = self.transform(R_cropped_image)
        return L_img, R_img, float(image_name_without_suffix.split('_')[2])


# 使用示例
dataset_path = '/root/project/建筑图像'  # 替换为你的数据集路径
mask_path = '/root/project/建筑mask'
dataset = TrainImageDataset(dataset_path, train_transform)

# 打印数据集大小
print(f'Number of images in the dataset: {len(dataset)}')

# 创建一个数据加载器
batch_size = 1  # 设置批大小
shuffle = True   # 设置是否要进行随机化
train_data = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

# 打印数据加载器的样本
for batch,(imgL,imgR, area_proportion) in enumerate(train_data):
    print(area_proportion)
    numpy_array = imgL[0].numpy()
    # 确保数组的形状和类型适合图像库的要求
    if numpy_array.shape[0] == 1:  # 如果张量是单通道的
        numpy_array = numpy_array.squeeze()  # 将其转换为(224, 224)的数组
        numpy_array = numpy_array.astype('uint8')  # 将其转换为无符号8位整数
    else:  # 如果张量是三通道的
        numpy_array = numpy_array.transpose(1, 2, 0)  # 将其转换为(224, 224, 3)的数组
        numpy_array = numpy_array.astype('uint8')  # 将其转换为无符号8位整数
    
    # 使用PIL将NumPy数组转换为图像
    image = Image.fromarray(numpy_array) 
    # 显示图像
    image.show()

    numpy_array = imgR[0].numpy()
    # 确保数组的形状和类型适合图像库的要求
    if numpy_array.shape[0] == 1:  # 如果张量是单通道的
        numpy_array = numpy_array.squeeze()  # 将其转换为(224, 224)的数组
        numpy_array = numpy_array.astype('uint8')  # 将其转换为无符号8位整数
    else:  # 如果张量是三通道的
        numpy_array = numpy_array.transpose(1, 2, 0)  # 将其转换为(224, 224, 3)的数组
        numpy_array = numpy_array.astype('uint8')  # 将其转换为无符号8位整数
    
    # 使用PIL将NumPy数组转换为图像
    image = Image.fromarray(numpy_array)
    # 显示图像
    image.show()
    break  # 仅打印一个批次


FileNotFoundError: [Errno 2] No such file or directory: '/root/project/建筑图像'

In [3]:
import gc
import torch



# train stage one
def train(batch_size, train_transform, max_epoch):
    gc.collect()
    torch.cuda.empty_cache()   
    DEVICE = torch.device("cuda:0")
    print("current deveice:", DEVICE)

    dataset_path = '/root/project/建筑图像'  # 替换为你的数据集路径
    mask_path = '/root/project/建筑mask'
    dataset = TrainImageDataset(dataset_path, train_transform)
    train_data = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

    model =SimCLRStage1().to(DEVICE)
    lossLR=Loss().to(DEVICE)
    optimizer=torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)

    for epoch in range(1,max_epoch+1):
        model.train()
        total_loss = 0
        for batch,(imgL,imgR,_) in enumerate(train_data):
            if imgL.shape[0] != batch_size:continue
            imgL,imgR=imgL.to(DEVICE),imgR.to(DEVICE)

            _, pre_L=model(imgL)
            _, pre_R=model(imgR)

            loss=lossLR(pre_L,pre_R,batch_size)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.detach().item()

        print("epoch loss:",total_loss/len(dataset)*batch_size)
    gc.collect()
    torch.cuda.empty_cache()   
    return model


model = train(4, train_transform, 30)

torch.save(model, '/root/project/model1.pth')



current deveice: cuda:0
epoch loss: 1.4812226464339093
epoch loss: 1.260064524905879
epoch loss: 1.1165032447600851
epoch loss: 1.2702837632197579
epoch loss: 1.2144389591869615
epoch loss: 1.1259015231143956
epoch loss: 1.2411917000830102
epoch loss: 1.1560972156215543
epoch loss: 1.1476663341041373
epoch loss: 1.1389397055733532
epoch loss: 1.3169498028589182
epoch loss: 1.1188158407217028
epoch loss: 1.0487144016275027
epoch loss: 1.155376302165573
epoch loss: 1.2216583073210745
epoch loss: 1.1314116235540694
epoch loss: 1.1771064999581529
epoch loss: 1.1278295617859189
epoch loss: 1.0647910487752001
epoch loss: 1.041476203470814
epoch loss: 1.1701397674710525
epoch loss: 1.1069976735372646
epoch loss: 1.0219305803795822
epoch loss: 1.0541141383072623
epoch loss: 1.043828462781597
epoch loss: 1.0645004828580145
epoch loss: 1.0323889070913856
epoch loss: 1.1420774244460739
epoch loss: 1.075731297143224
epoch loss: 1.028908107961927


In [3]:
import gc
model = torch.load('/root/project/上海model1.pth')
model.eval()


def test(batch_size, test_transform, max_epoch):
    test_image_path = '/root/project/test'
    model.eval()
    gc.collect()
    torch.cuda.empty_cache()   
    with torch.no_grad():
        DEVICE = torch.device("cuda:0")
        dataset_path = '/root/project/上海建筑图像'  # 替换为你的数据集路径
        dataset = TestImageDataset(dataset_path, test_transform)
        train_data = DataLoader(dataset, batch_size=batch_size)
        id_to_name = {}
        features = np.zeros((0,128))
        for epoch in range(max_epoch):
            for batch,(img,imgName) in enumerate(train_data): 
                print(imgName)
                img = img.to(DEVICE)
                _, pre = model(img) 
                for i in range(pre.shape[0]):
                    new_imgName =  imgName[i] + '_' + str(epoch)
                    features = np.vstack((features, pre[i].cpu().detach().numpy()))
                    id_to_name[epoch*len(dataset) + batch*batch_size + i] = new_imgName
                    # img_ = img[i][0].cpu().detach().numpy()
                    # values, counts = np.unique(img_, return_counts=True)
                    # print(values)
                    # # 将二维张量转换为图像
                    # img_ = Image.fromarray(img[i][0].cpu().detach().numpy())
                    # # 保存图像
                    # img_.save('/root/project/test/' + new_imgName + '.png')
                    # cv2.imwrite('/root/project/test/' + new_imgName ,img[i])
    gc.collect()
    torch.cuda.empty_cache()
    return features, id_to_name

features, id_to_name = test(4, test_transform, 2)
print(id_to_name)



('上海1206_175_0.1313', '上海1207_249_0.2278', '上海1208_353_0.2164', '上海1209_229_0.2396')
('上海1210_210_0.1833', '上海1211_190_0.2935', '上海1212_241_0.2132', '上海1803_3240_0.3772')
('上海1804_105_0.0959', '上海1805_144_0.2', '上海1807_251_0.4668', '上海1808_126_0.1265')
('上海1864_176_0.0557', '上海1865_128_0.0754', '上海1866_296_0.1902', '上海1867_285_0.2903')
('上海1868_148_0.2781', '上海1888_96_0.0877', '上海1889_144_0.1204', '上海1890_312_0.0983')
('上海1891_144_0.0484', '上海1892_150_0.0775', '上海1893_121_0.1703', '上海1894_143_0.0897')
('上海1895_132_0.0626', '上海1896_117_0.1475', '上海1897_205_0.1034', '上海1898_56_0.0348')
('上海1899_105_0.0318', '上海1900_81_0.0261', '上海2048_165_0.2169', '上海2049_96_0.087')
('上海2050_187_0.1176', '上海2052_244_0.1224', '上海2055_79_0.1698', '上海2056_78_0.0487')
('上海2071_150_0.2392', '上海2072_190_0.2696', '上海2073_144_0.1286', '上海2074_135_0.1308')
('上海2075_169_0.1005', '上海2076_144_0.2262', '上海2077_157_0.1314', '上海2078_205_0.1212')
('上海2079_253_0.1452', '上海2080_144_0.2338', '上海2081_147_0.0949', '上海2082_15

In [None]:
from sklearn.cluster import KMeans
import shutil
import json

# 计算每一列的最小值和最大值
data = features.copy()
col_min = data.min(axis=0)
col_max = data.max(axis=0)
# 归一化
data = (data - col_min) / (col_max - col_min)

# 初始化KMeans聚类器
kmeans = KMeans(n_clusters=5)

# 拟合数据
kmeans.fit(data)
  
# 获取聚类中心
centers = kmeans.cluster_centers_

# 获取每个数据点的聚类标签
labels = kmeans.labels_

# 确定源文件夹路径
source_directory = '/root/project/test1'

# # 确定源文件夹路径
# source_directory = '/root/project/建筑图像'
    
for cluster_index in range(kmeans.n_clusters):
    # 计算每个点到聚类中心的距离
    distances = np.linalg.norm(data - centers[cluster_index], axis=1)
    
    # 获取距离最小的10个点的索引
    closest_indices = np.argsort(distances)[:10]
  
    # 确定目标文件夹路径
    target_directory = '/root/project/第一组' + str(cluster_index) + '类'
    
    # 确保目标文件夹存在，如果不存在则创建它
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)
    for i in closest_indices:
        file_name = id_to_name[i] +'.jpg'  
        # 复制文件到目标文件夹
        shutil.copy(source_directory + '/' + file_name, target_directory + '/' + file_name)
        print('bingo ' + file_name)    

# cmbh_to_label = {}
# for i in range(len(labels) - 1):
#     cmbh = id_to_name[i].split('_')[0]
#     cmbh_to_label[cmbh] = labels[i]

# def convert_int32(obj):
#     if isinstance(obj, np.int32):
#         return int(obj)
#     raise TypeError

# # 使用json.dumps()时
# json_string = json.dumps(cmbh_to_label, default=convert_int32)
# print(json_string)

# # 保存到文件
# with open('data.json', 'w', encoding='utf-8') as f:
#     json.dump(cmbh_to_label, f, default=convert_int32)

bingo 上海3500_108_0.0708_0.jpg
bingo 上海4071_168_0.1796_0.jpg
bingo 上海3904_135_0.258_1.jpg
bingo 上海4233_95_0.0646_0.jpg
bingo 上海3500_108_0.0708_1.jpg
bingo 上海3695_75_0.1063_1.jpg
bingo 上海5265_104_0.0973_1.jpg
bingo 上海5017_115_0.1988_1.jpg
bingo 上海3631_130_0.1926_1.jpg
bingo 上海2048_165_0.2169_1.jpg
bingo 上海3732_78_0.1664_1.jpg
bingo 上海3692_126_0.0631_1.jpg
bingo 上海2072_190_0.2696_0.jpg
bingo 上海4317_131_0.0576_0.jpg
bingo 上海4326_104_0.1548_1.jpg
bingo 上海2935_81_0.1497_0.jpg
bingo 上海3503_96_0.0652_0.jpg
bingo 上海2940_136_0.0317_0.jpg
bingo 上海4593_72_0.0461_0.jpg
bingo 上海3690_96_0.1147_0.jpg
bingo 上海4387_125_0.0368_1.jpg
bingo 上海1212_241_0.2132_0.jpg
bingo 上海3016_147_0.2159_0.jpg
bingo 上海4130_42_0.005_1.jpg
bingo 上海2866_99_0.0907_0.jpg
bingo 上海4664_74_0.1138_0.jpg
bingo 上海4717_80_0.0693_0.jpg
bingo 上海4947_181_0.2608_0.jpg
bingo 上海2874_110_0.2701_1.jpg
bingo 上海4786_99_0.0724_1.jpg
bingo 上海5128_95_0.1271_1.jpg
bingo 上海4404_71_0.0375_0.jpg
bingo 上海4322_104_0.025_1.jpg
bingo 上海3911_147_0.1128_1.j

In [8]:
with open('data.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# 输出读取的数据
print(data)

{'常州1000': 1, '常州1001': 0, '常州1002': 0, '常州1004': 4, '常州1003': 4, '常州1005': 1, '常州1006': 1, '常州1007': 1, '常州1008': 1, '常州1009': 4, '常州100': 3, '常州1010': 1, '常州1011': 2, '常州1012': 1, '常州1013': 4, '常州1014': 3, '常州1016': 3, '常州1017': 2, '常州1018': 0, '常州1019': 0, '常州101': 1, '常州1020': 4, '常州1021': 1, '常州1022': 4, '常州1023': 4, '常州1024': 1, '常州1025': 4, '常州1026': 3, '常州1027': 2, '常州1028': 4, '常州1029': 3, '常州102': 1, '常州1030': 1, '常州1031': 1, '常州1032': 1, '常州1033': 2, '常州1034': 1, '常州1035': 4, '常州1036': 2, '常州1037': 4, '常州1038': 4, '常州1039': 4, '常州103': 3, '常州1040': 4, '常州1041': 3, '常州1042': 4, '常州1043': 1, '常州1044': 1, '常州1045': 1, '常州1046': 1, '常州1047': 1, '常州1048': 1, '常州1049': 0, '常州104': 2, '常州1051': 3, '常州1052': 1, '常州1053': 4, '常州1054': 4, '常州1055': 3, '常州1056': 4, '常州1057': 4, '常州105': 0, '常州106': 4, '常州107': 4, '常州108': 0, '常州109': 0, '常州10': 0, '常州110': 3, '常州111': 0, '常州112': 4, '常州113': 4, '常州114': 0, '常州115': 2, '常州116': 2, '常州117': 0, '常州118': 3, '常州119': 0, '常州11': 0, '常州12': 1