In [3]:
from rdkit import Chem
from rdkit.Chem import BondType, HybridizationType

def smiles_to_graph(smiles, add_hs=False):
    """将SMILES字符串转换为分子图"""
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    
    if add_hs:
        mol = Chem.AddHs(mol)  # 添加氢原子
    
    # 原子特征映射
    hybrid_mapping = {
        HybridizationType.SP: 0,
        HybridizationType.SP2: 1,
        HybridizationType.SP3: 2,
        HybridizationType.SP3D: 3,
        HybridizationType.SP3D2: 4,
        HybridizationType.UNSPECIFIED: 5,
        HybridizationType.S: 6,
    }
    
    # 提取原子特征
    nodes = []
    for atom in mol.GetAtoms():
        node_feats = [
            atom.GetAtomicNum(),           # 原子序数
            atom.GetFormalCharge(),        # 形式电荷
            hybrid_mapping.get(atom.GetHybridization(), 5),  # 杂化方式
            atom.GetTotalNumHs(),          # 总氢数
            int(atom.IsInRing()),          # 是否在环中（0或1）
            int(atom.GetIsAromatic()),     # 是否芳香
        ]
        nodes.append(node_feats)
    
    # 键特征映射
    bond_type_mapping = {
        BondType.SINGLE: 1,
        BondType.DOUBLE: 2,
        BondType.TRIPLE: 3,
        BondType.AROMATIC: 4,
    }
    
    # 提取键特征
    edges = []
    for bond in mol.GetBonds():
        start = bond.GetBeginAtomIdx()
        end = bond.GetEndAtomIdx()
        bond_type = bond_type_mapping.get(bond.GetBondType(), 0)
        bond_feats = [
            bond_type,                        # 键类型
            int(bond.GetIsConjugated()),      # 是否共轭（0或1）
            int(bond.IsInRing()),             # 是否在环中（0或1）
        ]
        # 添加双向边（无向图）
        edges.append((start, end, bond_feats))
        edges.append((end, start, bond_feats))
    
    return {
        'nodes': nodes,   # 节点特征列表
        'edges': edges,   # 边列表（起始索引、终止索引、特征）
    }

# 示例使用
smiles_list = ['CCO', 'C1=CC=CC=C1']  # 乙醇和苯
graphs = []
for smi in smiles_list:
    graph = smiles_to_graph(smi)
    if graph:
        graphs.append(graph)
        print(f"SMILES: {smi}")
        print(f"节点数: {len(graph['nodes'])}")
        print(f"边数: {len(graph['edges'])}")
        print(f"nodes: {graph['nodes']}")

SMILES: CCO
节点数: 3
边数: 4
nodes: [[6, 0, 2, 3, 0, 0], [6, 0, 2, 2, 0, 0], [8, 0, 2, 1, 0, 0]]
SMILES: C1=CC=CC=C1
节点数: 6
边数: 12
nodes: [[6, 0, 1, 1, 1, 1], [6, 0, 1, 1, 1, 1], [6, 0, 1, 1, 1, 1], [6, 0, 1, 1, 1, 1], [6, 0, 1, 1, 1, 1], [6, 0, 1, 1, 1, 1]]
