Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
215 lines (182 sloc)
4.89 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Script for Biomaj PostProcess | |
# author : ofilangi, osallou | |
# date : 19/06/2007 | |
# update : 22/10/2010 fix bug in generated alias file + a few cleanups | |
# 23/12/2015 use makeblastdb for ncbi blast+ | |
# | |
# -title Title for database file [String] Optional | |
# -in Input file(s) for formatting [File In] Optional | |
# -logfile Logfile name: [File Out] Optional | |
# default = makeblastdb.log | |
# -dbtype nucl | |
# -parse_seqids | |
# | |
source activate blast | |
#---------- | |
#GLOBAL DEF | |
#---------- | |
BLASTDB_DIR="$datadir/index-blast"; # Path where aliases files should be generated | |
mkdir -p $BLASTDB_DIR | |
MAKEBLASTDB="makeblastdb"; # Path to makeblastdb executable | |
#---------- | |
# FUNCTIONS | |
#---------- | |
# createAlias: builds an alias file | |
# arg1: file to write to | |
# arg2: bank name | |
# arg3: db file list | |
createAlias() { | |
local file=$1; | |
local nomBanque=$2; | |
local lFiles=$3; | |
rm -f $file; | |
echo "#" > $file | |
echo "# Alias file created "`date` >>$file | |
echo "#" >>$file ; | |
echo "#">> $file ; | |
echo "TITLE "$nomBanque >> $file; | |
echo "#" >> $file; | |
echo "DBLIST "$lFiles >>$file; | |
echo "#" >> $file; | |
echo "#GILIST" >> $file; | |
echo "#" >> $file; | |
echo "#OIDLIST" >> $file; | |
echo "#" >> $file; | |
} | |
#----- | |
# MAIN | |
#----- | |
if (test $# -ne 4) then | |
echo "arguments:" 1>&2 | |
echo "1: input files" | |
echo "2: working directory" 1>&2 | |
echo "3: makeblastdb options (without -in for input file)" 1>&2 | |
echo "4: bank name" 1>&2 | |
echo `makeblastdb -help`; | |
exit -1 | |
fi | |
relWorkDir=`echo "$2" | sed "s/\/*$//"` # remove useless trailing slash | |
workdir=$datadir/$dirversion/future_release | |
workdir=$workdir/$relWorkDir; | |
rm -rf $workdir; | |
mkdir -p $workdir ; | |
if ( test $? -ne 0 ) then | |
echo "Cannot create $workdir." 1>&2 ; | |
exit 1; | |
fi | |
cd $workdir | |
# Some vars for links creation | |
back=""; | |
dir=$relWorkDir; | |
OLDIFS=$IFS; | |
IFS="/"; | |
for i in $dir | |
do | |
back="../"$back; | |
done | |
IFS=$OLDIFS; | |
# Create links to input files into the working dir | |
listFile=""; | |
for expression in $1 | |
do | |
# the basename can be a regex | |
lsFile=`ls $datadir/$dirversion/future_release/$expression`; | |
if ( test $? -ne 0 ) then | |
echo "No input file found in dir `pwd`." 1>&2 ; | |
exit 1 | |
fi | |
baseFile=`dirname $expression`; | |
for f in $lsFile | |
do | |
name=`basename $f`; | |
rm -f $4.p*; | |
rm -f $4.n*; | |
nameLink=`echo $name | cut -d"." -f1`; | |
ln -s $back/$baseFile/$name $nameLink; | |
if ( test $? -ne 0 ) then | |
echo "Cannot create link. [ln -s $back$f $name]" 1>&2 ; | |
exit 1 | |
fi | |
if (test -z "$listFile") then | |
listFile=$nameLink; | |
else | |
listFile=$nameLink" "$listFile; | |
fi | |
done | |
done | |
echo "Input sequence file list: $listFile"; | |
if (test -z "$listFile") then | |
echo "No input file found." 1>&2 ; | |
exit 1 | |
fi | |
nameB=$4; | |
echo "Database name: $nameB"; | |
echo "Working in "`pwd`; | |
echo "Launching makeblastdb [makeblastdb -in $listFile $3 -out $nameB]"; | |
# Execute makeblastdb | |
$MAKEBLASTDB -in "$listFile" $3 -out $nameB; | |
makeblastdbResult=$? | |
if ( test $makeblastdbResult -ne 0 ) then | |
echo "Makeblastdb failed with status $makeblastdbResult" 1>&2 ; | |
exit 1 | |
fi | |
echo "##BIOMAJ#blast###$2$nameB" | |
# Delete temp files and links | |
#------------------------------------------------------------- | |
ls -ltrh | |
rm -f $listFile; | |
rm -f makeblastdb.log | |
# Add generated files to biomaj postprocess dependance | |
echo "Generated files:"; | |
for ff in `ls *` | |
do | |
echo $PP_DEPENDENCE$PWD/$ff; | |
done | |
goodPath=`readlink $datadir/$dirversion/future_release -s -n`; | |
if ( test $? -ne 0 ) then | |
echo "Failed to get version path: readlink returned with an error [$goodPath]" 1>&2 ; | |
exit 1 | |
fi | |
# Search for nal files which are sometimes generated by makeblastdb. | |
lsAl=`ls *.?al 2> /dev/null`; | |
if ( test $? -ne 0 ) then | |
echo "No alias file found."; | |
lsAl=""; | |
else | |
echo "Generated alias files:" | |
echo "$lsAl"; | |
fi | |
# If nal files were generated, use them to generate nal files in $BLASTDB_DIR | |
for fileIndexVirtuel in $lsAl | |
do | |
echo "Found alias file: [$fileIndexVirtuel]"; | |
listIndex=`cat $fileIndexVirtuel | grep DBLIST`; | |
listFile2=""; | |
for f in $listIndex | |
do | |
if (test $f != "DBLIST") then | |
listFile2=$goodPath/$relWorkDir/$f" "$listFile2; | |
fi | |
done | |
echo "Creating alias in [$BLASTDB_DIR/$fileIndexVirtuel]"; | |
createAlias $BLASTDB_DIR/$fileIndexVirtuel $nameB "$listFile2" | |
done | |
# Else, if no nal file was generated by makeblastdb, create them | |
if (test -z "$lsAl") then | |
ext=`ls | grep .*hr$ | tail -c5 | head -c2`al; | |
echo "Creating alias file [$PWD/$4$ext]"; | |
listNhr=`ls *.*hr | sed 's/\..hr$//g'`; | |
listFileNalRel=""; # List of blast db files, relative path | |
listFileNalAbs=""; # List of blast db files, absolute path | |
for f in $listNhr | |
do | |
listFileNalRel=$f" "$listFileNalRel; | |
listFileNalAbs=$goodPath/$relWorkDir/$f" "$listFileNalAbs; | |
done | |
createAlias $4$ext $nameB "$listFileNalRel"; | |
echo $PP_DEPENDENCE$PWD/$4$ext; | |
echo "Creating alias in [$BLASTDB_DIR/$4$ext]"; | |
createAlias $BLASTDB_DIR/$4$ext $nameB "$listFileNalAbs" ; | |
fi | |
deactivate | |