In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np

def one_hot(x, allowable_set):
    """One-hot encoding of an attribute."""
    return [int(i == x) for i in allowable_set]

def get_atom_features(mol):
    """Get one-hot encoded atom features."""
    features = []
    for atom in mol.GetAtoms():
        atom_feature = []
        atom_feature.extend(one_hot(atom.GetSymbol(), ['C', 'N', 'O', 'S', 'F', 'Cl', 'Br', 'I', 'P', 'Si', 'Se', 'B', 'Sn', 'Other']))
        atom_feature.extend(one_hot(atom.GetDegree(), [0, 1, 2, 3, 4, 5, 6]))
        atom_feature.extend(one_hot(atom.GetHybridization(), [
            Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
            Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,
            Chem.rdchem.HybridizationType.SP3D2]))
        atom_feature.extend(one_hot(atom.GetFormalCharge(), [-1, 0, 1]))
        atom_feature.append(atom.GetIsAromatic())
        features.append(atom_feature)
    return np.array(features)

def get_adjacency_matrix(mol):
    """Get adjacency matrix."""
    num_atoms = mol.GetNumAtoms()
    adj_matrix = np.zeros((num_atoms, num_atoms), dtype=int)
    for i in range(num_atoms):
        for j in range(i+1, num_atoms):
            if mol.GetBondBetweenAtoms(i, j) is not None:
                adj_matrix[i, j] = 1
                adj_matrix[j, i] = 1  # symmetric matrix
    return adj_matrix

def get_position_matrix(mol):
    """Get the 3D coordinate matrix."""
    conf = mol.GetConformer()
    num_atoms = mol.GetNumAtoms()
    pos_matrix = np.zeros((num_atoms, 3))
    for i in range(num_atoms):
        pos = conf.GetAtomPosition(i)
        pos_matrix[i] = [pos.x, pos.y, pos.z]
    return pos_matrix

def get_heavy_atom_types(mol):
    """Get the atom types for heavy atoms."""
    return np.array([atom.GetSymbol() for atom in mol.GetAtoms() if atom.GetAtomicNum() != 1])

# Example SMILES string
smiles = "CC(C=C1)=CC=C1CN2C3=CC=C(O[Si](C)(C)C(C)(C)C)C=C3C=C2"

# Convert SMILES to RDKit molecule
mol = Chem.MolFromSmiles(smiles)
# Add hydrogens to the molecule
mol = Chem.AddHs(mol)
# Embed a 3D conformer to the molecule
AllChem.EmbedMolecule(mol)

# Get molecular features
atom_features = get_atom_features(mol)
adjacency_matrix = get_adjacency_matrix(mol)
position_matrix = get_position_matrix(mol)
heavy_atom_types = get_heavy_atom_types(mol)

# Define the output directory and file names
output_dir = ""  # Replace with your desired output directory
smiles_file = f"{output_dir}/smiles.txt"
features_file = f"{output_dir}/atom_features.npy"
adjacency_file = f"{output_dir}/adjacency_matrix.npy"
position_file = f"{output_dir}/position_matrix.npy"
types_file = f"{output_dir}/heavy_atom_types.npy"

# Save the SMILES string to a text file
with open(smiles_file, 'w') as f:
    f.write(f"smiles: \"{smiles}\"\n")

# Save the atom features matrix to a .npy file
np.save(features_file, atom_features)
print(f"Atom features saved to {features_file}")

# Save the adjacency matrix to a .npy file
np.save(adjacency_file, adjacency_matrix)
print(f"Adjacency matrix saved to {adjacency_file}")

# Save the position matrix to a .npy file
# np.save(position_file, position_matrix)
# print(f"Position matrix saved to {position_file}")

# Save the heavy atom types matrix to a .npy file
np.save(types_file, heavy_atom_types)
print(f"Heavy atom types saved to {types_file}")