In [None]:
import glob
import pandas as pd
import moles

import matplotlib.pyplot as plt

In [None]:
mol_info = []
table = []

for i in glob.glob('mol2_files/*.mol2'):
    
    mol2 = moles.parse_mol2(i)
    
    db = moles.Datablocks(mol2)
    
    basic_info = db.mol_info()
    
    mol_info.append(basic_info[0][0])
    mol_info.append(basic_info[1][0])
    mol_info.append(basic_info[1][1])
    
    drug = moles.Chem_Info(db.atom_info(), db.bond_info())
    
    mol_info.append(drug.molar_mass())
    mol_info.append(drug.HB_donor())
    mol_info.append(drug.HB_acceptor())
    mol_info.append(drug.net_charge())
    
    for j in range(len(drug.atom_count())):
        mol_info.append(drug.atom_count()[j])
        
    table.append(mol_info)
    mol_info = []

molecules = pd.DataFrame(table, columns = ['Molecule ID', '# of Atoms', '# of Bonds',
                                           'Molar Mass', 'HB Donors', 'HB Acceptors',
                                           'Net Charge', 'Carbons', 'Hydrogens',
                                           'Nitrogens', 'Oxygens', 'Halogens',
                                           'Sulfurs', 'Phosph.', 'Other'])
    
molecules

In [None]:
molecules['Molar Mass'].describe()

In [None]:
small_molecules = molecules.to_csv('zdd_fda_approved.csv')

In [None]:
test1 = molecules.loc[molecules['Molar Mass'] <= 500]
test2 = test1.loc[molecules['HB Donors'] <= 5]
test3 = test2.loc[molecules['HB Acceptors'] <= 10]
test3

In [None]:
phos = molecules.loc[molecules['Phosph.'] >= 1]
phos

In [None]:
molecules.plot(kind='scatter', x='Carbons', y='Molar Mass', c=['darkgray'], s=100)

In [None]:
molecules.plot(kind='scatter', x='Hydrogens', y='Molar Mass', c=['darkgray'], s=100)

In [None]:
import numpy as np

# Data to plot
labels = ('C', 'H', 'N', 'O', 'P', 'S', 'H', 'Other')
y_pos = np.arange(len(labels))
amount = [molecules['Carbons'].sum(),
          molecules['Hydrogens'].sum(),
          molecules['Nitrogens'].sum(),
          molecules['Oxygens'].sum(),
          molecules['Phosph.'].sum(),
          molecules['Sulfurs'].sum(),
          molecules['Halogens'].sum(),
          molecules['Other'].sum()]

# Plot
plt.bar(y_pos, amount, width=0.8, align='center', alpha=1.0)
plt.xticks(y_pos, labels)
plt.ylabel('Usage')
plt.title('Atom Count')
 
plt.show()