Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#svl
//
// MACCSkeyCounts.svl MACCS key counts per molecule (1D molecular descriptors)
//
// 11-jul-2009 (exe) created
// 23-dec-2011 (exe) option to include endpoint
// 07-mar-2013 (exe) updated documentation
// 20-mar-2013 (exe) updated comments
// 16-sep-2014 (exe) option to use a column of compound names instead of cName Chains[]
// 22-oct-2014 (exe) use SMILES string if the compound's name is not present
// 18-may-2015 (exe) updated documentation
//
// Copyright (C) 2008-2015 Emilio Xavier Esposito. All rights reserved.
//
// exeResearch LLC | 32 University Drive | East Lansing, Michigan 48823 USA
// http://www.exeResearch.com
// emilio AT exeResearch DOT com
// emilio DOT esposito AT gmail DOT com
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// http://www.gnu.org/licenses/gpl-3.0.html
//
//
/*
Description
This function counts the occurrence of the 166 MACCS keys for each
molecule in a MOE database (mdb). The number of each key's occurrence
is written to a user specified CSV file. The option to include the
compounds' names and endpoint values in the CSV file is an option from the
opts variable.
The default values are:
- Column (field) with molecule is 'mol' (molField:'mol')
- Include the endpoints if the endpoint column name is provided (endpointField:'')
- Include the compound's name or SMILES string. Not indicating a compound name file
(nameField:'') uses the compound's name returned with 'cName Chains[]' or the
compound's SMILES string if no name is present.
- Column headers for MACCS keys will be returned in lower case letters (uppers:0)
Instructions
// load this function
load '~/code/svl/utilities/MACCSkeyCounts.svl';
// calculate the MACCS Key Counts for a MDB of compounds
MACCSkeyCounts['Selwood_dataset.mdb', 'Selwood_MACCSkeyCounts.csv', [molField:'mol', endpointField:'pEC50', nameField:'name']]
*/
#set title 'MACCSkeyCounts: MACCS key counts per molecule (1D molecular descriptors)'
#set class 'exeResearch:QSARmodeling'
#set version 'october.22.2014'
function MACCS_KeyCount;
const DEFAULTS = [
molField:'mol',
endpointField:'',
nameField:'',
uppers:0,
uniqNames:0
];
function MACCSkeyCounts [mdbfile, CSVoutfile, opts]
//----- close the current system
Close[];
//----- get the options
opts = cat [opts, DEFAULTS];
opts = opts | m_uniq tags opts;
//--- print the options
print[opts];
//----- get the mdb information
local mdbkey, fieldname, fieldtype;
mdbkey = db_Open mdbfile;
[fieldname, fieldtype] = db_Fields mdbkey;
//----- set the variables
local asciiFileNum;
//----- open the csv file
asciiFileNum = fopenx CSVoutfile;
//----- create the csv header
//--- create the compound name column
fwrite[asciiFileNum, 'compound'];
//--- create the endpoint column
if tok_length opts.endpointField > 0 then
fwrite[asciiFileNum, ',{}', opts.endpointField];
endif
//--- create the MACCS Key columns
if opts.uppers > 0 then
apt fwrite[asciiFileNum, ',MACCS{}', igen[166] ];
else
apt fwrite[asciiFileNum, ',maccs{}', igen[166] ];
endif
//--- add the carriage return
fwrite[asciiFileNum, '\n'];
//----- set the variables within the loop
local entry, molecule, moleculeAKs, moleculeCKs, moleculeName, moleculeSMILES;
local MACCSKeyCount;
//----- loop over the database...
entry = 0;
while entry = db_NextEntry [mdbkey, entry] loop
molecule = mol_Create first db_ReadFields [mdbkey, entry, opts.molField];
moleculeAKs = Atoms[];
moleculeCKs = Chains[];
//--- get the compound's name
if tok_length opts.nameField > 0 then
moleculeName = db_ReadFields [mdbkey, entry, opts.nameField];
else
moleculeName = cName moleculeCKs;
endif
//--- if the compound's name is not present, use the SMILES string
if tok_length(moleculeName) == 0 then
moleculeName = sm_ExtractUnique moleculeAKs;
endif
//--- write the compound's name to file
fwrite[asciiFileNum, '{}', moleculeName];
//--- write the compound's endpoint value to file if requested
if tok_length opts.endpointField > 0 then
fwrite[asciiFileNum, ',{}', db_ReadFields [mdbkey, entry, opts.endpointField] ];
endif
//--- determine the MACCS key count
MACCSKeyCount = MACCS_KeyCount moleculeAKs;
//--- write counts to file
apt fwrite[asciiFileNum, ',{}', MACCSKeyCount];
fwrite[asciiFileNum, '\n'];
//--- remove (delete) the current compound to get ready for the next compound
oDestroy moleculeCKs;
endloop
//----- close the database
db_Close mdbkey;
//----- close the ascii (csv) file
fclose asciiFileNum;
endfunction
#eof