/
molget
executable file
·145 lines (129 loc) · 4.12 KB
/
molget
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env bash
# Original work by Jan Jensen (https://github.com/jensengroup/molget).
# Rewritten by Felipe Schneider.
#
#? molget 0.3
#?
#? Molget: bash script to get coordinates from chemical name using Cactus.
#?
#? Usage: molget chemical_name [chemical_name...]
#?
#? -h show this message
#? -v show version
#? -s assume chemical_name is a SMILES string
#? -o [TYPE] selects an output file type supported by OpenBabel (defaults to
#? xyz)
#?
#? Examples:
#?
#? ./molget methane
#? ./molget hexacyanoiron
#? ./molget water thf dmso dmf
#? ./molget "propylene carbonate"
#? ./molget -s "C(=O)N"
#? ./molget -s "C/C=C\C" "C/C=C/C"
#? ./molget -o gamin p-nitrobenzene
#? ./molget -o pdb nadh nad+
#? ./molget -o gamin -s "C#C"
#? (remember to "chmod 755 molget")
use_smiles=false
ob_out_type="xyz"
while getopts ":hvso:" opt "$@"
do
case $opt in
h) # Help message.
grep "^#?" "$0" | cut -c 4-
exit 0
;;
v) # Version message.
grep "^#? molget" "$0" | cut -c 4-
exit 0
;;
s) # Use SMILES.
use_smiles=true
;;
o) # Set output type.
ob_out_type=$OPTARG
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
# Recover $@ for posterior use.
shift $((OPTIND-1))
for molecule in "$@"
do
echo -n "$molecule"
# Some preferences are preset, such as file types and options.
cactus_repr="sdf"
ob_inp_type="sdf"
ob_ext_opts="-c"
if [ "$use_smiles" = true ]; then
# We need to change the input type.
ob_inp_type="smi"
# We also need to tell OpenBabel to generate 3D coordinates from SMILES.
ob_ext_opts="${ob_ext_opts} --gen3d"
fi
# It is also wise to translate spaces in filenames to underscores.
molecule_wo_spaces=${molecule// /_}
# Slashes should be escaped in Linux, backslashes in Windows/Wine.
molecule_wo_spaces=${molecule_wo_spaces//\//slash}
molecule_wo_spaces=${molecule_wo_spaces//\\/backslash}
# After, file names are generated.
ob_inp_file="${molecule_wo_spaces}.${ob_inp_type}"
ob_out_file="${molecule_wo_spaces}.${ob_out_type}"
# Now we need to generate the input file for OpenBabel.
if [ "$use_smiles" = true ]; then
echo "$molecule" > "$ob_inp_file"
else
# A URL is then generated.
url="https://cactus.nci.nih.gov/chemical/structure/${molecule}/${cactus_repr}"
# Spaces in the URL should be escaped.
url=${url// /%20}
# The data is downloaded and transformed.
# stderr is stored in case the exit status is not 0
error=$( curl -# "$url" -o "$ob_inp_file" >/dev/null 2>&1 )
check=$?
# Check for errors. These are critical so exit program
if [ ! $check = "0" ]
then
echo " FAILED"
echo "Exit status $check of command 'curl -# $url -o $ob_inp_file':" >&2
echo $error >&2
rm "${ob_inp_file}"
exit 1
fi
# Check for 404 error
if grep -q "Page not found" "${ob_inp_file}"
then
echo " FAILED"
rm "${ob_inp_file}"
continue
fi
fi
# Avoid cases in which the output file type is the same as the input one.
if [ "$ob_inp_type" != "$ob_out_type" ]
then
error=$( obabel -i "$ob_inp_type" "$ob_inp_file" -o "$ob_out_type" -O "$ob_out_file" $ob_ext_opts >/dev/null 2>&1 )
# Check for errors. These are critical so exit program
check=$?
if [ ! $check = "0" ]
then
echo " FAILED"
echo ${ob_inp_file}
echo "Exit status $check of command 'obabel -i "$ob_inp_type" "$ob_inp_file" -o "$ob_out_type" -O "$ob_out_file" $ob_ext_opts':" >&2
echo $error >&2
rm "${ob_inp_file}"
exit 1
fi
# clean sdf input file
rm "${ob_inp_file}"
fi
echo " SUCCESS"
done