In [2]:
import sqlite3
import csv


class DatabaseManager():

    def __init__(self, database_path, data_path):
        self._database_path = database_path
        self._conn = sqlite3.connect(database_path)
        self.data_path = data_path

    def setup(self):
        conn = self._conn
        conn.execute(
            '''
CREATE TABLE assays
(
    cid VARCHAR(20) PRIMARY KEY,
    f_avg_IC50 DECIMAL not null,
    r_avg_IC50 DECIMAL not null
)
'''
        )
        conn.execute(
            '''
CREATE TABLE compounds
(
    cid VARCHAR(20) PRIMARY KEY,
    smiles VARCHAR(2000) not null,
    hbd DECIMAL,
    hba DECIMAL,
    c_logp DECIMAL,
    mw DECIMAL,
    FOREIGN KEY(cid) REFERENCES assays(cid)
)
'''
        )
        with open(self.data_path, mode='r') as file:
            csv_reader = csv.DictReader(file)
            assay_data = {}
            compound_data = {}
            for row in csv_reader:
                CID = row['CID']
                assay_data[CID] = (CID, row['f_avg_IC50'], row['r_avg_IC50'])
                compound_data[CID] = (CID, row['SMILES'])
        conn.executemany('INSERT INTO assays (cid, f_avg_IC50, r_avg_IC50) VALUES (?, ?, ?)', assay_data.values())
        conn.executemany('INSERT INTO compounds (cid, smiles) VALUES (?, ?)', compound_data.values())
        conn.commit()



In [3]:
database_path = 'database.db'
data_path = 'covid_submissions_all_info.csv'

manager = DatabaseManager(database_path=database_path, data_path=data_path)
manager.setup() 

In [4]:
import sqlite3

from rdkit import Chem
from rdkit.Chem import Descriptors

# Connect to the SQLite Database
conn = sqlite3.connect('database.db')  # Replace with your database path
cursor = conn.cursor()

# Fetch the SMILES strings from your table
query = "SELECT smiles FROM compounds"  # Replace with your query
cursor.execute(query)

for row in cursor.fetchall():
    smiles = row[0]
    mol = Chem.MolFromSmiles(smiles)

    MW = Descriptors.MolWt(mol)
    HBA = Descriptors.NOCount(mol)
    HBD = Descriptors.NHOHCount(mol)
    LogP = Descriptors.MolLogP(mol) 

    update_query = "UPDATE compounds SET mw=?, hba=?, hbd=?, c_logp=? WHERE smiles=?"
    cursor.execute(update_query, (MW, HBA, HBD, LogP, smiles))

conn.commit()




In [None]:
from rdkit import Chem
from rdkit.Chem import Descriptors

conditions = [MW <= 500, HBA <= 10, HBD <= 5, LogP <= 5]
pass_ro5 = conditions.count(True) >= 3

: 