In [5]:
import os
import math

def calculate_distance(coord1, coord2):
    """
    计算两个三维坐标之间的欧几里得距离。
    """
    return math.sqrt((coord1[0] - coord2[0]) ** 2 + (coord1[1] - coord2[1]) ** 2 + (coord1[2] - coord2[2]) ** 2)

def process_saltbridge(input_dir, output_dir1, output_dir2):
    os.makedirs(output_dir1, exist_ok=True)
    os.makedirs(output_dir2, exist_ok=True)

    # 获取所有txt文件
    txt_files = [f for f in os.listdir(input_dir) if f.endswith('.txt')]

    for file_name in txt_files:
        input_file_path = os.path.join(input_dir, file_name)
        protein_name = {}

        with open(input_file_path, 'r') as file:
            #print(f"Processing {file_name}")
            lines = file.readlines()

        # 过滤掉RMSD行，提取key和坐标
        for line in lines:
            if line.startswith("RMSD") or line.startswith("No") or not line.strip():
                continue  # 跳过包含RMSD的行，No的行，空行
            parts = line.split(" ")
            key = parts[0]
            try:
                # 提取坐标（最后一个字段）并转换为三维坐标
                bridge_com = eval(parts[9].strip())  # 使用eval解析坐标
                if key not in protein_name:
                    protein_name[key] = []
                protein_name[key].append((line.strip(), bridge_com))  # 保存行内容和坐标
                #print(f"{key} : {bridge_com}")
            except:
                continue  # 忽略格式不正确的行
        
        
        # 文件处理
        #file_written_1 = False
        written_lines_to_folder1 = set()  # 用于记录已经写入文件夹1的行
        #written_lines_to_folder2 = set()  # 用于记录已写入1未写入2的行
        with open(os.path.join(output_dir1, file_name), 'w') as file1, open(os.path.join(output_dir2, file_name), 'w') as file2:
            for i, (key1, coords1) in enumerate(protein_name.items()):
                for key2, coords2 in list(protein_name.items())[i+1:]:
                    # 遍历key1与key2的坐标比较
                    matched_lines = []
                    for line1, coord1 in coords1:
                        for line2, coord2 in coords2:
                            # 如果距离小于5A，写入文件夹1
                            if calculate_distance(coord1, coord2) < 9:
                                matched_lines.append((line1, line2))
                                #print(f"写入1的{line1} {line2}")

                    if matched_lines:
                        file1.write(f"{key1} 与 {key2} :\n")
                        for line1, line2 in matched_lines:
                            #print(f"写入1的{line1} {line2}")
                            file1.write(f"{line1}\n{line2}\n\n")
                            written_lines_to_folder1.add(line1)  # 标记该行已写入文件夹1
                            written_lines_to_folder1.add(line2)  # 标记该行已写入文件夹1
                        file1.write("\n\n")  # 插入两个空行，确保两对坐标配对之间空两行
                            
            # 文件夹2：写入没有符合条件的行
            for key, coords in protein_name.items():
                lines_to_write = []
                for line, _ in coords:
                    if line not in written_lines_to_folder1:
                        lines_to_write.append(line)
                        #print(f"写入2的{line}")
                        #written_lines_to_folder2.add(line)  # 标记该行已写入文件夹2
                
                # 如果有未写入文件夹1的行，写入文件夹2
                if lines_to_write:
                    file2.write(f"{key} :\n")
                    for line in lines_to_write:
                        file2.write(f"{line}\n")
                    file2.write("\n")  # 每个key的内容结束后空一行


# 输入文件夹路径
input_directory = "/home/databank_70t/pengziyu/scop/All/super/saltbridge-all/"
# 输出文件夹路径
output_directory1 = "/home/databank_70t/pengziyu/scop/All/super/con/10A-con-saltbridge/"
output_directory2 = "/home/databank_70t/pengziyu/scop/All/super/uncon/10A-uncon-saltbridge/"

# 执行比较并保存结果
process_saltbridge(input_directory, output_directory1, output_directory2)
