Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
203 lines (182 sloc) 5.28 KB
/****************************************************************************
* Copyright (C) 2009-2015 EPAM Systems
*
* This file is part of Indigo toolkit.
*
* This file may be distributed and/or modified under the terms of the
* GNU General Public License version 3 as published by the Free Software
* Foundation and appearing in the file LICENSE.GPL included in the
* packaging of this file.
*
* This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
* WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
***************************************************************************/
//
// This is a command line utility for producing canonical SMILES
// or layered code for molecules in MOL or SDF format
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "indigo.h"
void onError (const char *message, void *context)
{
fflush(stdout);
fprintf(stderr, "%s\n", message);
fflush(stderr);
exit(-1);
}
void usage ()
{
printf(
"Usage:\n"
" indigo-cano filename.{mol,smi,cml,sdf,sdf.gz,rdf,rdf.gz} [parameters]\n"
" indigo-cano - SMILES [parameters]\n"
"Parameters:\n"
" -smiles Output canonical SMILES (default)\n"
" -layered Output canonical layered code\n"
" -id <string> ID field in SDF file\n"
" -no-arom Do not aromatize molecules\n"
" -no-tetrahedral Ignore tetrahedral stereocenters\n"
" -no-cistrans Ignore cis-trans bonds information\n"
"Examples:\n"
" indigo-cano infile.sdf\n"
" indigo-cano infile.sdf.gz -id molregno > results.txt\n"
" indigo-cano infile.smi -layered -no-cistrans\n"
" indigo-cano - 'NC1C=CC(O)=CC=1'\n"
);
}
int processMolecule (int mol, int smiles, int no_arom, int no_cistrans, int no_tetra)
{
if (no_cistrans)
if (indigoClearCisTrans(mol) < 0)
return -1;
if (no_tetra)
if (!indigoClearStereocenters(mol))
return -1;
if (smiles && !no_arom)
if (indigoAromatize(mol) < 0)
return -1;
if (smiles)
{
const char *res = indigoCanonicalSmiles(mol);
if (res == 0)
return -1;
printf("%s\n", res);
}
else
{
const char *res = indigoLayeredCode(mol);
if (res == 0)
return -1;
printf("%s\n", res);
}
return 1;
}
int main (int argc, char *argv[])
{
int smiles = 1;
int no_cistrans = 0;
int no_tetra = 0;
int no_arom = 0;
int i = 2;
const char *idfield = 0;
const char *filename = 0;
const char *ext = 0;
if (argc < 2)
{
usage();
return -1;
}
if (strcmp(argv[1], "-") != 0)
filename = argv[1];
else if (argc >= 3 && strcmp(argv[1], "-") == 0)
i = 3;
else
{
usage();
return -1;
}
while (i < argc)
{
if (strcmp(argv[i], "-smiles") == 0)
smiles = 1;
else if (strcmp(argv[i], "-layered") == 0)
smiles = 0;
else if (strcmp(argv[i], "-no-cistrans") == 0)
no_cistrans = 1;
else if (strcmp(argv[i], "-no-arom") == 0)
no_arom = 1;
else if (strcmp(argv[i], "-no-tetrahedral") == 0)
no_tetra= 1;
else if (strcmp(argv[i], "-id") == 0)
{
if (++i >= argc)
{
fprintf(stderr, "expecting an identifier after -id\n");
return -1;
}
idfield = argv[i];
}
else
{
fprintf(stderr, "unknown parameter: %s\n", argv[i]);
return -1;
}
i++;
}
indigoSetErrorHandler(onError, 0);
if (filename == 0)
{
int mol = indigoLoadMoleculeFromString(argv[2]);
processMolecule(mol, smiles, no_arom, no_cistrans, no_tetra);
indigoFree(mol);
return 0;
}
if (strlen(filename) > 4 && filename[strlen(filename) - 4] == '.')
ext = filename + strlen(filename) - 3;
else if (strlen(filename) > 7 && filename[strlen(filename) - 7] == '.')
ext = filename + strlen(filename) - 6;
else
{
fprintf(stderr, "input file format not recognized\n");
return -1;
}
if (strcmp(ext, "mol") == 0)
{
int mol = indigoLoadMoleculeFromFile(filename);
processMolecule(mol, smiles, no_arom, no_cistrans, no_tetra);
indigoFree(mol);
return 0;
}
else if (strcmp(ext, "cml") == 0 ||
strcmp(ext, "sdf") == 0 || strcmp(ext, "sdf.gz") == 0 ||
strcmp(ext, "rdf") == 0 || strcmp(ext, "rdf.gz") == 0 ||
strcmp(ext, "smi") == 0 || strcmp(ext, "smi.gz") == 0)
{
int item, iter;
if ((strstr(ext, "cml") != NULL))
iter = indigoIterateCMLFile(filename);
else if (strstr(ext, "sdf") != NULL)
iter = indigoIterateSDFile(filename);
else if (strstr(ext, "rdf") != NULL)
iter = indigoIterateRDFile(filename);
else
iter = indigoIterateSmilesFile(filename);
while ((item = indigoNext(iter)))
{
indigoSetErrorHandler(0, 0);
if (processMolecule(item, smiles, no_arom, no_cistrans, no_tetra) == -1)
printf("%s\n", indigoGetLastError());
indigoSetErrorHandler(onError, 0);
indigoFree(item);
}
indigoFree(iter);
}
else
{
fprintf(stderr, "input file format not recognized\n");
return -1;
}
return 0;
}
You can’t perform that action at this time.