### ## 统计分析分子结构的元素个数

In [1]:
import os
import pandas as pd
from ase.io import read
from collections import defaultdict

In [2]:
# Define a function to read all COF files in a folder (定义读取文件夹中所有 COF 文件的函数)
def process_cof_files(directory):
    all_elements = set()  # Used to store all possible elements (用于存储所有可能出现的元素)
    file_data = []  # Stores element statistics for each file (存储每个文件的元素统计)
    
    # Iterate through all files in a folder(遍历文件夹中的所有文件)
    for filename in os.listdir(directory):
        if filename.endswith(".cif"):  # Process only .cif files(仅处理 .cif 文件)
            file_path = os.path.join(directory, filename)
            
            try:
                # Reading structures using ase(使用 ase 读取结构)
                atoms = read(file_path)
                
                # Extracting a list of elements(提取元素列表)
                elements = atoms.get_chemical_symbols()
                
                # Count the number of each element(统计每种元素的数量)
                element_count = defaultdict(int)
                for element in elements:
                    element_count[element] += 1
                    all_elements.add(element)
                
                # Add file names and element statistics to the data list(将文件名和元素统计结果添加到数据列表)
                file_data.append({
                    'Filename': filename,
                    **element_count  # Unpack element statistics dictionary and automatically assign to columns(解包元素统计字典，自动分配到列)
                })
            
            except Exception as e:
                # If the file reading fails, log the error message(如果文件读取失败，记录错误信息)
                file_data.append({'Filename': filename, 'Error': str(e)})
    
    return file_data, all_elements

# Write the processed data to an Excel file(将处理后的数据写入 Excel 文件)
def write_to_excel(data, all_elements, output_file):
    # 将所有元素转换为列表并排序（方便列展示）
    all_elements = sorted(all_elements)
    
    # 创建 DataFrame，填充缺失值为 0
    df = pd.DataFrame(data).fillna(0)
    
    # 确保 DataFrame 中所有元素都作为列存在
    for element in all_elements:
        if element not in df.columns:
            df[element] = 0  # 如果某些结构没有出现某个元素，则将其数量设为 0
    
    # 将数据写入 Excel
    df.to_excel(output_file, index=False)

# Main process主程序
if __name__ == "__main__":
    # COF 文件夹路径
    cof_directory = 'E:/COF和MOF数据库/Zr-MOFs-Database-master/阴离子柱撑MOF数据库/Database'
    
    # 处理 COF 文件夹中的文件并获取所有可能的元素
    processed_data, all_elements = process_cof_files(cof_directory)
    
    # 输出 Excel 文件路径
    output_excel = 'yinlizhumof_elements_detailed.xlsx'
    
    # 将数据写入 Excel
    write_to_excel(processed_data, all_elements, output_excel)
    
    print(f"处理完成，结果已写入 {output_excel}")


处理完成，结果已写入 yinlizhumof_elements_detailed.xlsx


### 统计分析分子结构的元素相对原子质量百分比

In [3]:
import os
from collections import defaultdict
import pandas as pd
from ase.io import read

# 定义元素相对原子质量（单位：g/mol）
atomic_weights = {
    "H": 1.008, "He": 4.0026, "Li": 6.94, "Be": 9.0122, "B": 10.81, "C": 12.011, "N": 14.007,
    "O": 15.999, "F": 18.998, "Ne": 20.180, "Na": 22.990, "Mg": 24.305, "Al": 26.982, "Si": 28.085,
    "P": 30.974, "S": 32.06, "Cl": 35.45, "Ar": 39.948, "K": 39.098, "Ca": 40.078, "Sc": 44.956,
    "Ti": 47.867, "V": 50.942, "Cr": 51.996, "Mn": 54.938, "Fe": 55.845, "Co": 58.933, "Ni": 58.693,
    "Cu": 63.546, "Zn": 65.38, "Ga": 69.723, "Ge": 72.630, "As": 74.922, "Se": 78.971, "Br": 79.904,
    "Kr": 83.798, "Rb": 85.468, "Sr": 87.62, "Y": 88.906, "Zr": 91.244, "Nb": 92.906, "Mo": 95.95,
    "Tc": 98, "Ru": 101.07, "Rh": 102.91, "Pd": 106.42, "Ag": 107.87, "Cd": 112.41, "In": 114.82,
    "Sn": 118.71, "Sb": 121.76, "Te": 127.60, "I": 126.90, "Xe": 131.29, "Cs": 132.91, "Ba": 137.33,
    "La": 138.91, "Ce": 140.12, "Pr": 140.91, "Nd": 144.24, "Pm": 145, "Sm": 150.36, "Eu": 151.96,
    "Gd": 157.25, "Tb": 158.93, "Dy": 162.50, "Ho": 164.93, "Er": 167.26, "Tm": 168.93, "Yb": 173.05,
    "Lu": 174.97, "Hf": 178.49, "Ta": 180.95, "W": 183.84, "Re": 186.21, "Os": 190.23, "Ir": 192.22,
    "Pt": 195.08, "Au": 196.97, "Hg": 200.59, "Tl": 204.38, "Pb": 207.2, "Bi": 208.98, "Po": 209,"At": 210, "Rn": 222
    # 根据需要添加更多元素及其相对原子质量
}

# 定义读取文件夹中所有 COF 文件并计算元素质量百分比的函数
def process_cof_files(directory):
    all_elements = set()  # 用于存储所有可能出现的元素
    file_data = []  # 存储每个文件的元素质量百分比

    # 遍历文件夹中的所有文件
    for filename in os.listdir(directory):
        if filename.endswith(".cif"):  # 仅处理 .cif 文件
            file_path = os.path.join(directory, filename)

            try:
                # 使用 ase 读取结构
                atoms = read(file_path)
                
                # 提取元素列表
                elements = atoms.get_chemical_symbols()
                
                # 统计每种元素的数量
                element_count = defaultdict(int)
                total_mass = 0  # 用于存储该结构的总质量
                
                for element in elements:
                    element_count[element] += 1
                    total_mass += atomic_weights[element]  # 累加总质量
                    all_elements.add(element)

                # 计算元素质量百分比
                element_mass_percent = {elem: (count * atomic_weights[elem] / total_mass) * 100
                                        for elem, count in element_count.items()}
                
                # 将文件名和元素质量百分比添加到数据列表
                file_data.append({
                    'Filename': filename,
                    **element_mass_percent  # 解包元素质量百分比字典，自动分配到列
                })

            except Exception as e:
                # 如果文件读取失败，记录错误信息
                file_data.append({'Filename': filename, 'Error': str(e)})

    return file_data, all_elements

# 将处理后的数据写入 Excel 文件
def write_to_excel(data, all_elements, output_file):
    all_elements = sorted(all_elements)  # 将所有元素转换为列表并排序
    df = pd.DataFrame(data).fillna(0)  # 创建 DataFrame，填充缺失值为 0

    # 确保 DataFrame 中所有元素都作为列存在
    for element in all_elements:
        if element not in df.columns:
            df[element] = 0  # 如果某些结构没有出现某个元素，则将其百分比设为 0

    df.to_excel(output_file, index=False)  # 将数据写入 Excel

# 主程序
if __name__ == "__main__":
    cof_directory = 'E:/COF和MOF数据库/Zr-MOFs-Database-master/阴离子柱撑MOF数据库/Database'
    processed_data, all_elements = process_cof_files(cof_directory)
    output_excel = 'yinlizhumof_elements_mass_percent-v2.xlsx'
    write_to_excel(processed_data, all_elements, output_excel)
    print(f"处理完成，结果已写入 {output_excel}")


处理完成，结果已写入 yinlizhumof_elements_mass_percent-v2.xlsx


In [4]:
import os
from collections import defaultdict
import pandas as pd
from ase.io import read

# 定义元素相对原子质量（单位：g/mol）
# 定义元素相对原子质量（单位：g/mol）
atomic_weights = {
    "H": 1.008, "He": 4.0026, "Li": 6.94, "Be": 9.0122, "B": 10.81, "C": 12.011, "N": 14.007,
    "O": 15.999, "F": 18.998, "Ne": 20.180, "Na": 22.990, "Mg": 24.305, "Al": 26.982, "Si": 28.085,
    "P": 30.974, "S": 32.06, "Cl": 35.45, "Ar": 39.948, "K": 39.098, "Ca": 40.078, "Sc": 44.956,
    "Ti": 47.867, "V": 50.942, "Cr": 51.996, "Mn": 54.938, "Fe": 55.845, "Co": 58.933, "Ni": 58.693,
    "Cu": 63.546, "Zn": 65.38, "Ga": 69.723, "Ge": 72.630, "As": 74.922, "Se": 78.971, "Br": 79.904,
    "Kr": 83.798, "Rb": 85.468, "Sr": 87.62, "Y": 88.906, "Zr": 91.244, "Nb": 92.906, "Mo": 95.95,
    "Tc": 98, "Ru": 101.07, "Rh": 102.91, "Pd": 106.42, "Ag": 107.87, "Cd": 112.41, "In": 114.82,
    "Sn": 118.71, "Sb": 121.76, "Te": 127.60, "I": 126.90, "Xe": 131.29, "Cs": 132.91, "Ba": 137.33,
    "La": 138.91, "Ce": 140.12, "Pr": 140.91, "Nd": 144.24, "Pm": 145, "Sm": 150.36, "Eu": 151.96,
    "Gd": 157.25, "Tb": 158.93, "Dy": 162.50, "Ho": 164.93, "Er": 167.26, "Tm": 168.93, "Yb": 173.05,
    "Lu": 174.97, "Hf": 178.49, "Ta": 180.95, "W": 183.84, "Re": 186.21, "Os": 190.23, "Ir": 192.22,
    "Pt": 195.08, "Au": 196.97, "Hg": 200.59, "Tl": 204.38, "Pb": 207.2, "Bi": 208.98, "Po": 209,"At": 210, "Rn": 222
    # 根据需要添加更多元素及其相对原子质量
}

# 定义读取文件夹中所有 COF 文件并计算元素质量百分比和晶胞参数的函数
def process_cof_files(directory):
    all_elements = set()  # 用于存储所有可能出现的元素
    file_data = []  # 存储每个文件的元素质量百分比及晶胞参数

    # 遍历文件夹中的所有文件
    for filename in os.listdir(directory):
        if filename.endswith(".cif"):  # 仅处理 .cif 文件
            file_path = os.path.join(directory, filename)

            try:
                # 使用 ase 读取结构
                atoms = read(file_path)
                
                # 提取元素列表
                elements = atoms.get_chemical_symbols()
                
                # 统计每种元素的数量
                element_count = defaultdict(int)
                total_mass = 0  # 用于存储该结构的总质量
                
                for element in elements:
                    element_count[element] += 1
                    total_mass += atomic_weights[element]  # 累加总质量
                    all_elements.add(element)

                # 计算元素质量百分比
                element_mass_percent = {elem: (count * atomic_weights[elem] / total_mass) * 100
                                        for elem, count in element_count.items()}
                
                # 获取晶胞的 a, b, c 长度和 α, β, γ 角度
                cell_lengths_and_angles = atoms.get_cell_lengths_and_angles()
                a, b, c, alpha, beta, gamma = cell_lengths_and_angles

                # 将文件名、晶胞参数和元素质量百分比添加到数据列表
                file_data.append({
                    'Filename': filename,
                    'a': a,
                    'b': b,
                    'c': c,
                    'alpha': alpha,
                    'beta': beta,
                    'gamma': gamma,
                    **element_mass_percent  # 解包元素质量百分比字典，自动分配到列
                })

            except Exception as e:
                # 如果文件读取失败，记录错误信息
                file_data.append({'Filename': filename, 'Error': str(e)})

    return file_data, all_elements

# 将处理后的数据写入 Excel 文件
def write_to_excel(data, all_elements, output_file):
    all_elements = sorted(all_elements)  # 将所有元素转换为列表并排序
    df = pd.DataFrame(data).fillna(0)  # 创建 DataFrame，填充缺失值为 0

    # 确保 DataFrame 中所有元素都作为列存在
    for element in all_elements:
        if element not in df.columns:
            df[element] = 0  # 如果某些结构没有出现某个元素，则将其百分比设为 0

    df.to_excel(output_file, index=False)  # 将数据写入 Excel

# 主程序
if __name__ == "__main__":
    cof_directory = 'E:/COF和MOF数据库/Zr-MOFs-Database-master/阴离子柱撑MOF数据库/Database'
    processed_data, all_elements = process_cof_files(cof_directory)
    output_excel = 'yinlizhumof_elements_mass_percent_with_cell.xlsx'
    write_to_excel(processed_data, all_elements, output_excel)
    print(f"处理完成，结果已写入 {output_excel}")


  cell_lengths_and_angles = atoms.get_cell_lengths_and_angles()


处理完成，结果已写入 yinlizhumof_elements_mass_percent_with_cell.xlsx
