In [1]:
from pprint import pprint
from convertmol import parse_sdf_file, bond_type_dict, single_bond_stereo_dict, double_bond_stereo_dict
from torch_geometric.data import Data
from torch_geometric.nn import *
from torch_geometric.utils import to_dense_adj, to_dense_batch, add_self_loops
from torch_geometric.nn.conv import MessagePassing
import torch
from torch import nn
import rdkit
from tqdm import tqdm
import itertools
from rdkit import Chem
import pandas as pd
from importlib import reload
import matplotlib.pyplot as plt
from rdkit import RDLogger
from copy import deepcopy
from torch.utils.data import Dataset, DataLoader
# Suppress RDKit warnings
RDLogger.DisableLog('rdApp.*')
import sascorer
torch.set_default_device('cpu:0')#'cuda'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from neo4j import GraphDatabase

In [3]:
from neo4j import GraphDatabase, RoutingControl


URI = "bolt://localhost:7687"
AUTH = ("neo4j", "password")


def add_friend(driver, name, friend_name):
    driver.execute_query(
        "MERGE (a:Person {name: $name}) "
        "MERGE (friend:Person {name: $friend_name}) "
        "MERGE (a)-[:KNOWS]->(friend)",
        name=name, friend_name=friend_name, database_="neo4j",
    )


def print_friends(driver, name):
    records, _, _ = driver.execute_query(
        "MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
        "RETURN friend.name ORDER BY friend.name",
        name=name, database_="neo4j", routing_=RoutingControl.READ,
    )
    for record in records:
        print(record["friend.name"])



In [4]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    print("A")

A


In [34]:
import convertmol
import importlib
importlib.reload(convertmol)

<module 'convertmol' from '/home/maru/Desktop/biocheminformatics/convertmol.py'>

In [47]:
import re
atom_re=re.compile("^atom\d+$")#.match("watom123")
bond_re=re.compile("^bond\d+$")

def create_mol(tx,d,source):
    assert isinstance(source, str), "source must be str"
    atoms={}
    bonds={}
    props={}
    for k,v in d.items():
        if atom_re.match(k):
            atoms[k]=v
        elif bond_re.match(k):
            bonds[k]=v
        else:
            props[k]=v
    tx.run(f"MERGE (:Source {{name: '{source}'}});")
    
    mol_props = ", ".join([f"{key}: ${key}" for key in props.keys()])
    query = f"CREATE (:Molecule {{{mol_props}}});"
    tx.run(query, props)
    for k, atom in atoms.items():
        atom_props = ", ".join([f"{key}: ${key}" for key in atom.keys()])
        query = f"MATCH (m: Molecule {{id: '{props['id']}'}}) CREATE (:Atom {{id: '{k}', {atom_props}}})-[:PART_OF]->(m);"
        tx.run(query, atom)
    for k, bond in bonds.items():
        orig=bond.pop("orig")
        #orig=atoms[orig]
        dest=bond.pop("dest")
        #dest=atoms[dest]
        bond_props = ", ".join([f"{key}: ${key}" for key in bond.keys()])
        query = f"MATCH (orig:Atom {{id: '{orig}'}}), (dest:Atom {{id: '{dest}'}}) CREATE (orig)-[:BOND {{id: '{k}', {bond_props}}}]->(dest);"
        tx.run(query, bond)
    
    tx.run(f"MATCH (m: Molecule {{id: '{props['id']}'}}),(s:Source {{name: '{source}'}}) CREATE (m)-[:PART_OF]->(s);")
    return
    
    #CREATE (d:Dataset {id: 'DS001', source: 'PubChem', description: 'Public chemical dataset'});
    #
    #CREATE (m:Molecule {id: 'MOL001', name: 'Carbon Dioxide', formula: 'CO2'})
    #CREATE (c:Atom {type: 'C', id: 'C1', atomicNumber: 6, valency: 4})
    #CREATE (o1:Atom {type: 'O', id: 'O1', atomicNumber: 8, valency: 2})
    #CREATE (o2:Atom {type: 'O', id: 'O2', atomicNumber: 8, valency: 2})
    #CREATE (c)-[:PART_OF]->(m)
    #CREATE (o1)-[:PART_OF]->(m)
    #CREATE (o2)-[:PART_OF]->(m);
    #
    #MATCH (c:Atom {id: 'C1'}), (o1:Atom {id: 'O1'})
    #CREATE (c)-[:BOND {type: 'DOUBLE'}]->(o1);
    #
    #MATCH (c:Atom {id: 'C1'}), (o2:Atom {id: 'O2'})
    #CREATE (c)-[:BOND {type: 'DOUBLE'}]->(o2);
    #
    #MATCH (m:Molecule {id: 'MOL001'}), (d:Dataset {id: 'DS001'})
    #CREATE (m)-[:PART_OF]->(d);
    #
    ## Dynamically build Cypher query
    #props = ", ".join([f"{key}: ${key}" for key in properties.keys()])
    #query = f"CREATE (n:{label} {{{props}}})"
    #tx.run(query, properties)

# Use the driver
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session() as session:
        for m in parse_sdf_file("./samples.sdf", data_items=True):
            print("processing")
            display(m)
            session.execute_write(create_mol, m, "sample_gdb9")

processing


{'id': 'gdb_1',
 'software': '-OEChem-03231823243D',
 'version': 'V2000',
 'atom1': {'x': -0.0127,
  'y': 1.0858,
  'z': 0.008,
  'symbol': 'C',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom2': {'x': 0.0022,
  'y': -0.006,
  'z': 0.002,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom3': {'x': 1.0117,
  'y': 1.4638,
  'z': 0.0003,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom4': {'x': -0.5408,
  'y': 1.4475,
  'z': -0.8766,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom5': {'x': -0.5238,
  'y': 1.4379,
  'z': 0.9064,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'bond0': {'bo

processing


{'id': 'gdb_2',
 'software': '-OEChem-03231823233D',
 'version': 'V2000',
 'atom1': {'x': -0.0404,
  'y': 1.0241,
  'z': 0.0626,
  'symbol': 'N',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom2': {'x': 0.0173,
  'y': 0.0125,
  'z': -0.0274,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom3': {'x': 0.9158,
  'y': 1.3587,
  'z': -0.0288,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom4': {'x': -0.5203,
  'y': 1.3435,
  'z': -0.7755,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'bond0': {'bond_type': 'Single',
  'bond_stereo': 'Not_stereo',
  'orig': 'atom1',
  'dest': 'atom2'},
 'bond1': {'bond_type': 'Single',
  'bond_stereo': 'Not_stereo',
  'orig': 'atom1',
  'de

processing


{'id': 'gdb_3',
 'software': '-OEChem-03231823243D',
 'version': 'V2000',
 'atom1': {'x': -0.0344,
  'y': 0.9775,
  'z': 0.0076,
  'symbol': 'O',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom2': {'x': 0.0648,
  'y': 0.0206,
  'z': 0.0015,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom3': {'x': 0.8718,
  'y': 1.3008,
  'z': 0.0007,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'bond0': {'bond_type': 'Single',
  'bond_stereo': 'Not_stereo',
  'orig': 'atom1',
  'dest': 'atom2'},
 'bond1': {'bond_type': 'Single',
  'bond_stereo': 'Not_stereo',
  'orig': 'atom1',
  'dest': 'atom3'}}

processing


{'id': 'gdb_4',
 'software': '-OEChem-03231823253D',
 'version': 'V2000',
 'atom1': {'x': 0.5995,
  'y': 0.0,
  'z': 1.0,
  'symbol': 'C',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom2': {'x': -0.5995,
  'y': 0.0,
  'z': 1.0,
  'symbol': 'C',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom3': {'x': -1.6616,
  'y': 0.0,
  'z': 1.0,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom4': {'x': 1.6616,
  'y': 0.0,
  'z': 1.0,
  'symbol': 'H',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'bond0': {'bond_type': 'Triple', 'orig': 'atom1', 'dest': 'atom2'},
 'bond1': {'bond_type': 'Single',
  'bond_stereo': 'Not_stereo',
  'orig': 'atom1',
  'dest': 'atom4'},
 'bond2': {'bond_type': 'Single',
  'bond_stere

processing


{'id': 'CO2',
 'software': 'ChemDraw2024',
 'version': 'V2000',
 'atom1': {'x': -0.72,
  'y': 0.0,
  'z': 0.0,
  'symbol': 'C',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom2': {'x': 0.72,
  'y': 0.0,
  'z': 0.0,
  'symbol': 'O',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'atom3': {'x': -1.44,
  'y': 0.0,
  'z': 0.0,
  'symbol': 'O',
  'mass_diff': '0',
  'charge': 'outside_limits',
  'stereo_parity': 'not_stereo',
  'valence': 'no marking'},
 'bond0': {'bond_type': 'Double',
  'bond_stereo': 'Use_coordinates',
  'orig': 'atom1',
  'dest': 'atom2'},
 'bond1': {'bond_type': 'Double',
  'bond_stereo': 'Use_coordinates',
  'orig': 'atom1',
  'dest': 'atom3'},
 'LogP': '0.83'}

In [32]:
f"1{{{2}}}"

'1{2}'

In [35]:
parse_sdf_file("./samples.sdf",data_items=True)

[{'id': 'gdb_1',
  'software': '-OEChem-03231823243D',
  'version': 'V2000',
  'atom1': {'x': -0.0127,
   'y': 1.0858,
   'z': 0.008,
   'symbol': 'C',
   'mass_diff': '0',
   'charge': 'outside_limits',
   'stereo_parity': 'not_stereo',
   'valence': 'no marking'},
  'atom2': {'x': 0.0022,
   'y': -0.006,
   'z': 0.002,
   'symbol': 'H',
   'mass_diff': '0',
   'charge': 'outside_limits',
   'stereo_parity': 'not_stereo',
   'valence': 'no marking'},
  'atom3': {'x': 1.0117,
   'y': 1.4638,
   'z': 0.0003,
   'symbol': 'H',
   'mass_diff': '0',
   'charge': 'outside_limits',
   'stereo_parity': 'not_stereo',
   'valence': 'no marking'},
  'atom4': {'x': -0.5408,
   'y': 1.4475,
   'z': -0.8766,
   'symbol': 'H',
   'mass_diff': '0',
   'charge': 'outside_limits',
   'stereo_parity': 'not_stereo',
   'valence': 'no marking'},
  'atom5': {'x': -0.5238,
   'y': 1.4379,
   'z': 0.9064,
   'symbol': 'H',
   'mass_diff': '0',
   'charge': 'outside_limits',
   'stereo_parity': 'not_stereo',
