# Prototype queries: Compounds of [element], inorganic

In [None]:
from itertools import islice
import os
import sys

import rdkit
from rdkit import Chem

import sqlalchemy
from sqlalchemy import create_engine, Table, MetaData

sys.path.append('../..')
from camelid.env import CamelidEnv
from camelid.cmgroup import CMGroup, collect_to_json
from camelid.googlesheet import SheetManager
from camelid.hypertext import cids_to_html, directory
from camelid.query import get_query_results, substructure_query, get_element_inorganic

## Setup environment, Google access credentials, etc.

In [None]:
env = CamelidEnv('inorganic')
config = env.read_config()

# Google Sheets
sheet = SheetManager(config['title'],
                     'inorganic',
                     config['key_file'])

# Database connection & metadata
eng = create_engine(config['database_url'])
con = eng.connect()
meta = MetaData(con)

# Identify the table and columns used for DB queries
cpds = Table('cpds', meta, autoload=True)
mol = cpds.c.molecule

### Get compound group parameters from the Google Sheet

There is a worksheet containing all the groups that are "[Element] compounds, inorganic".

In [None]:
cmgs = list(islice(sheet.get_cmgs(env), None))

## Define function to do the proper query

In [None]:
def result_cids(df):
    cids = df['cid'].dropna()
    return cids

## Execute SQL queries and update `CMGroup` info

**Group population strategy:** Search for all compounds of an element but exclude compounds that contain certain patterns which make them "not inorganic". The `get_element_inorganic` function used here currently excludes C-C bonds *and* C-H bonds.

- This is documented in the query function's docstring, which can be used as part of the program output.

In [None]:
for cmg in cmgs:
    # Generate the query, get its textual form (for documentation), and execute it
    que = get_element_inorganic(cmg.structure, mol, [cpds.c.cid])
    sql_txt = str(que.compile(compile_kwargs={'literal_binds': True}))
    result = get_query_results(que, con)

    # Get the CIDs
    cids = result_cids(result)

    # Add method documentation and summary of results to CMG
    summ = {'sql': sql_txt,
            'method_doc': get_element_inorganic.__doc__,
            '# results': len(result),
            '# cids': len(cids)}
    cmg.add_info(summ)

    # Output HTML page for results
    html_file = '{}.html'.format(os.path.join(cmg.results_path, cmg.cmg_id))
    cids_to_html(cids, html_file, title=cmg.name, info=cmg.info)

## Dump all CMG objects to JSON

In [None]:
collect_to_json(cmgs, env)

## Create HTML directory of all CMGs

In [None]:
directory(cmgs, env, formats=['json'])

# Why are some organics still there?

Tackle this next...

In [None]:
contains_phenyl = Chem.MolFromInchi('InChI=1S/4C6F5.Sn/c4*7-2-1-3(8)5(10)6(11)4(2)9;')

In [None]:
contains_phenyl.HasSubstructMatch(Chem.MolFromSmarts('[C,c]~[C,c]'))