In [None]:
# spiro_compunds_analysis
from rdkit import Chem
from collections import defaultdict

def is_spiro_compound(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return False
    
    ri = mol.GetRingInfo()
    for atom in mol.GetAtoms():
        ring_indices = ri.NumAtomRings(atom.GetIdx())
        if ring_indices == 2:
            rings = [r for r in ri.AtomRings() if atom.GetIdx() in r]
            if len(rings) == 2:
                shared_atoms = set(rings[0]) & set(rings[1])
                if len(shared_atoms) == 1 and list(shared_atoms)[0] == atom.GetIdx():
                    return True
    return False

def analyze_and_save_spiro_compounds(input_filename, output_spiro_filename):
    spiro_counts = defaultdict(int)
    total_count = 0

    with open(input_filename, 'r') as file, open(output_spiro_filename, 'w') as outfile:
        for line in file:
            smiles = line.strip()
            total_count += 1
            if is_spiro_compound(smiles):
                spiro_counts[smiles] += 1
                outfile.write(f"{smiles}\n")

    # 计算占比
    total_spiro_count = sum(spiro_counts.values())
    for smiles, count in spiro_counts.items():
       占比 = (count / total_count) * 100
    print(f"SMILES: {smiles}, Count: {count}, Percentage: {占比:.2f}%")

    print(f"Total number of compounds: {total_count}")
    print(f"Total number of spiro compounds: {total_spiro_count}")
    print(f"Percentage of spiro compounds: {(total_spiro_count / total_count) * 100:.2f}%")

# Replace 'input_file.txt' with the path to your input text file containing SMILES strings.
# Replace 'spiro_compounds.txt' with the desired output file path.
input_filename = '/home/sunhnayu/jupyterlab/XXI/img2smiles_xuexi/img2smiles/notebook/data_sample/USPTO_full_random_8196_smiles.txt'
output_spiro_filename = '/home/sunhnayu/jupyterlab/XXI/img2smiles_xuexi/img2smiles/notebook/data_sample/USPRO_full_spiro_compounds.txt'
analyze_and_save_spiro_compounds(input_filename, output_spiro_filename)

In [None]:
# bridged_compounds_analysis
from rdkit import Chem
from collections import defaultdict

def find_bridged_rings(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return False, None
    
    ri = mol.GetRingInfo()
    all_rings = ri.AtomRings()
    
    for ring1 in all_rings:
        for ring2 in all_rings:
            if ring1 is not ring2:
                shared_atoms = set(ring1) & set(ring2)
                if len(shared_atoms) >= 3:
                    ring1_size = len(ring1)
                    ring2_size = len(ring2)
                    return True, (ring1_size, ring2_size)
    
    return False, None

def analyze_file(filename):
    bridged_stats = defaultdict(lambda: {'count': 0, 'sizes': []})
    total_smiles = 0
    
    with open(filename, 'r') as file:
        for line in file:
            smiles = line.strip()
            total_smiles += 1
            result, ring_sizes = find_bridged_rings(smiles)
            if result:
                key = f"{ring_sizes[0]} and {ring_sizes[1]}"
                bridged_stats[key]['count'] += 1
                bridged_stats[key]['sizes'].append(smiles)
    
    print("Analysis of Bridged Compounds:")
    total_bridged = sum(stats['count'] for stats in bridged_stats.values())
    for key, stats in bridged_stats.items():
        percentage = (stats['count'] / total_bridged) * 100 if total_bridged else 0
        print(f"Ring sizes {key}: {stats['count']} compounds ({percentage:.2f}%)")
        for smiles in stats['sizes']:
            print(f"  {smiles}")
        print()

    print(f"Total number of smiles: {total_smiles}")
    print(f"Total number of bridged compounds: {total_bridged}")
    print(f"Percentage of bridged compounds: {(total_bridged / total_smiles) * 100:.2f}%")

# Replace 'your_file.txt' with the path to your text file containing SMILES strings.
analyze_file('/home/sunhnayu/jupyterlab/XXI/img2smiles_xuexi/img2smiles/notebook/data_sample/final_unique_standard_merged.txt')