Skip to content

Commit

Permalink
added foldseek support
Browse files Browse the repository at this point in the history
  • Loading branch information
cnotred committed Mar 25, 2023
1 parent 8908d9f commit e7b039f
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 26 deletions.
40 changes: 35 additions & 5 deletions lib/dp_lib/util_dp_drivers.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ Constraint_list *seq2list ( Job_TC *job)
A=fast_pair (job);
RCL=A->CL;
}
else if ( strm ( mode, "3di_pair"))
{
A=fast_pair (job);
RCL=A->CL;

}

else if ( strm ( mode, "proba_pair") )
{

Expand Down Expand Up @@ -537,6 +544,23 @@ Constraint_list *method2pw_cl (TC_method *M, Constraint_list *CL)
PW_CL->evaluate_residue_pair=evaluate_matrix_score;
PW_CL->extend_jit=0;
}
else if ( strm (mode, "3di_pair"))
{

PW_CL->maximise=1;
PW_CL->TG_MODE=1;
PW_CL->use_fragments=0;
sprintf (PW_CL->dp_mode, "proba_pair_wise");
sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
PW_CL->residue_index=NULL;
PW_CL->get_dp_cost=slow_get_dp_cost;
PW_CL->evaluate_residue_pair=evaluate_matrix_score;
PW_CL->extend_jit=0;
if (get_string_variable ("3dimatrix"))
PW_CL->M=read_matrice (get_string_variable ("3dimatrix"));
else
PW_CL->M=read_matrice (get_string_variable ("idmat"));
}
else if ( strm (mode, "proba_pair"))
{

Expand Down Expand Up @@ -1274,7 +1298,6 @@ Constraint_list * profile_pair (TC_method *M , char *in_seq, Constraint_list *CL
A1=seq2R_template_profile(CL->S,s1);
A2=seq2R_template_profile(CL->S,s2);


prf1_file=vtmpnam (NULL);
fp=vfopen (prf1_file, "w");

Expand All @@ -1291,6 +1314,7 @@ Constraint_list * profile_pair (TC_method *M , char *in_seq, Constraint_list *CL
}
else
{
HERE ("NO A1");
fprintf ( fp, ">%s\n%s%s\n",sn1, (CL->S)->seq[s1], PATCH_PRF);
}
vfclose (fp);
Expand Down Expand Up @@ -2511,14 +2535,20 @@ Alignment * fast_pair (Job_TC *job)
for ( a=0; a< n; a++)
{
s=seqlist[a+2];

if ( strm (M->seq_type, "G"))
{
buf[s]=S->seq[s];
S->seq[s]=((((S->T[s])->G)->VG)->S)->seq[0];
}
else
buf[s]=S->seq[s];

}
else if ( strm (M->seq_type, "E"))
{
buf[s]=S->seq[s];
S->seq[s]=((((S->T[s])->E)->VE)->S)->seq[0];
}
else
buf[s]=S->seq[s];

A->seq_al[a]=csprintf (A->seq_al[a], "%s", S->seq[s]);
A->name[a]=csprintf (A->name[a], "%s", (CL->S)->name[s]);

Expand Down
26 changes: 24 additions & 2 deletions lib/perl/lib/scripts/dynamic.pl
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
my_system ("t_coffee -other_pg seq_reformat -in $f -action +seq2dnd list ");
$do_exit=1;
}

if ($method2use eq "list")
{
my %ml;
Expand Down Expand Up @@ -229,7 +230,26 @@

if ($VERBOSE){print "\n![dynamic.pl] --- cmethod == $cmethod\n";}

if ($cmethod eq "tcoffee"|| $cmethod eq "t_coffee" )
if (-e $method2use)
{

my $com=file2string($method2use);

if (!($com=~s/\$input/$infile/))
{
print "ERROR - provided command [$com] should specify <input> [FATAL:dynamic.pl]\n";
exit ($EXIT_FAILURE);
}
if (!($com=~s/\$output/$outfile/))
{
print "ERROR - provided command [$com] should specify <input> [FATAL:dynamic.pl]\n";
exit ($EXIT_FAILURE);
}

my_system ($com);
}

elsif ($cmethod eq "tcoffee"|| $cmethod eq "t_coffee" )
{
my_system ("t_coffee -seq $infile -outfile $outfile -output fasta_aln $CL4tc>/dev/null $QUIET");
}
Expand Down Expand Up @@ -318,9 +338,11 @@
while (<F>)
{
my $l=$_;
if ( $VERBOSE || $l=~/WARNING/ || $l=~/ERROR/ || $l=~/INFORNATION/){print stderr "$l";}
print ("------ $l");
if ( $VERBOSE || $l=~/WARNING/ || $l=~/ERROR/ || $l=~/INFORMATION/){print stderr "$l";}
}
close (F);
if (-e $stderrF){unlink ($stderrF);}
}

my_exit ($CDIR,$EXIT_SUCCESS);
Expand Down
55 changes: 50 additions & 5 deletions lib/t_coffee_lib/t_coffee.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ int batch_main ( int argc, char **argv)
char **template_file_list;
int n_template_file;

char*template_dir_E_;

char **template_mode_list;
int n_template_mode;

Expand Down Expand Up @@ -291,6 +293,7 @@ int batch_main ( int argc, char **argv)
int len;
char *infile;
char *matrix;
char *threedimatrix;
char *dp_mode;
char *profile_mode;
char *profile_comparison;
Expand Down Expand Up @@ -1405,6 +1408,27 @@ if ( !do_evaluate)
);
if (n_template_file)cputenv ("template_file_4_TCOFFEE=%s",template_file_list[0]);

declare_name(template_dir_E_);
get_cl_param( \
/*argc*/ argc ,\
/*argv*/ argv ,\
/*output*/ &le ,\
/*Name*/ "-template_dir_E_" ,\
/*Flag*/ &garbage ,\
/*TYPE*/ "S" ,\
/*OPTIONAL?*/ OPTIONAL ,\
/*MAX Nval*/ 1000 ,\
/*DOC*/ "directory for _E_ templates (_R_ <dir> _P_ <dir>...",\
/*Parameter*/ &template_dir_E_ , \
/*Def 1*/ "./",\
/*Def 2*/ "./" ,\
/*Min_value*/ "any" ,\
/*Max Value*/ "any" \
);
set_string_variable ("template_dir_E_",template_dir_E_);



/*PARAMETER PROTOTYPE: VERSION */
setenv_list=declare_char (100, STRING);
n_setenv=get_cl_param(\
Expand Down Expand Up @@ -2197,6 +2221,27 @@ if ( !do_evaluate)
/*Min_value*/ "any" ,\
/*Max Value*/ "any" \
);

declare_name (threedimatrix);
get_cl_param( \
/*argc*/ argc ,\
/*argv*/ argv ,\
/*output*/ &le ,\
/*Name*/ "-threedimatrix" ,\
/*Flag*/ &garbage ,\
/*TYPE*/ "S" ,\
/*OPTIONAL?*/ OPTIONAL ,\
/*MAX Nval*/ 1 ,\
/*DOC*/ "Specifies the substitution matrix used on 3di.",\
/*Parameter*/ &threedimatrix ,\
/*Def 1*/ "idmat" ,\
/*Def 2*/ "default" ,\
/*Min_value*/ "any" ,\
/*Max Value*/ "any" \
);
set_string_variable ("3dimatrix",threedimatrix);


/*PARAMETER PROTOTYPE: TG_MODE */

get_cl_param(\
Expand Down Expand Up @@ -5092,8 +5137,7 @@ get_cl_param(\
*/
if ( (CL->S)->nseq>1 && CL->ne==0 && !CL->M &&!(do_convert && n_list>0))
{
fprintf ( stderr, "\n******************ERROR*****************************************\n");

fprintf ( stderr, "\n****************** ERROR *****************************************\n");
fprintf ( stderr, "\nYou have not provided any method or enough Sequences[FATAL]");
fprintf ( stderr, "\nIf you have used the '-in' Flag, ADD the methods you wish to use:");
fprintf ( stderr, "\n\t-in <your sequences> Mlalign_id_pair Mfast_pair\n");
Expand Down Expand Up @@ -7277,20 +7321,21 @@ Alignment * t_coffee_dpa (int argc, char **argv)
}
else if (strm (argv[a],"-expand") )
{
cputenv ("COMPACT_4_TCOFFEE=0");

cputenv ("COMPACT_4_TCOFFEE=0");
}
else if (strm (argv[a], "-method") || strm (argv[a], "-dpa_method") || strm (argv[a], "-reg_method"))
{
dpa_aligner=argv[++a];
if (isfile(dpa_aligner))
dpa_aligner=fname2abs(dpa_aligner);
}
else if (strm (argv[a], "-cache"))
{
cache=argv[++a];
}
else if (strm (argv[a],"-in") || strm (argv[a],"-infile"))
{
myexit (fprintf_error (stderr, "%s is not supported when using -dpa [FATAL:%s]", argv[a],PROGRAM));
myexit (fprintf_error (stderr, "%s is not supported when using -dpa, use -seq to input sequences [FATAL:%s]", argv[a],PROGRAM));
}

else if ( strstr (argv[a], "reg_homoplasy"))
Expand Down
35 changes: 24 additions & 11 deletions lib/util_lib/aln_convertion_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -6545,14 +6545,18 @@ Sequence * seq2template_seq ( Sequence *S, char *template_list, Fname *F)
for (a=0; a< S->nseq; a++)
{

if ( (p=strstr (template_list,"SELF_")))p=S->name[a];
if ( (p=strstr (template_list,"SELF_")))
{
p=S->name[a];
}

else if ( strstr (template_list, "SEQFILE_"))p=template_list;
else
{
fprintf ( stderr, "\nUnkown mode for Template [FATAL:%s]\n", PROGRAM);
myexit (EXIT_FAILURE);
}

if ( strstr (template_list, "_P_") && !(S->T[a])->P)(S->T[a])->P =fill_P_template ( S->name[a], p,S);//PDB
else if ( strstr (template_list, "_S_") && !(S->T[a])->S)(S->T[a])->S =fill_S_template ( S->name[a], p,S);//Sequence
else if ( strstr (template_list, "_R_" )&& !(S->T[a])->R)(S->T[a])->R =fill_R_template ( S->name[a], p,S);//pRofile
Expand Down Expand Up @@ -6891,18 +6895,18 @@ int seq2n_X_template ( Sequence *S, char *type)
struct X_template *fill_X_template ( char *name, char *p, char *token)
{
struct X_template *X;




char *k;

X=(X_template*)vcalloc (1, sizeof (X_template));
sprintf ( X->seq_name, "%s", name);
if ( (k=strstr (p, token)))sscanf (k+strlen(token), "%s",X->template_name);
else sprintf (X->template_name, "%s", p);







/*Add a Structure HERE*/
sprintf ( X->template_type, "%s", token);
if ( strm (token, "_P_"))X->VP=(P_template*)vcalloc (1, sizeof (P_template));
Expand Down Expand Up @@ -7288,7 +7292,8 @@ struct X_template *fill_R_template ( char *name,char *p, Sequence *S)
/*Profile template*/
struct X_template *R;




R=fill_X_template ( name, p, "_R_");
sprintf (R->template_format , "fasta_aln");

Expand Down Expand Up @@ -7395,12 +7400,21 @@ struct X_template *fill_E_template ( char *name,char *p, Sequence *S)
{
/*Profile template*/
struct X_template *E;
static char *tdir;

if (!tdir)tdir=get_string_variable ("template_dir_E_");


E=fill_X_template ( name, p, "_E_");
sprintf (E->template_format , "fasta_seq");

if (!is_aln(E->template_name) && !is_seq (E->template_name))
sprintf (E->template_file, "%s%s%s",(tdir)?tdir:"",(tdir)?"/":"",p);
if (!is_aln(E->template_file) && !is_seq (E->template_file))
sprintf (E->template_file, "%s%s%s._E_",(tdir)?tdir:"",(tdir)?"/":"",p);



if (!is_aln(E->template_file) && !is_seq (E->template_file))
{

add_information ( stderr, "_E_ Template %s Could not be found\n",E->template_name);
Expand All @@ -7409,8 +7423,7 @@ struct X_template *fill_E_template ( char *name,char *p, Sequence *S)
}
else
{
(E->VE)->S=main_read_seq (E->template_name);
sprintf ( E->template_file, "%s", E->template_name);
(E->VE)->S=main_read_seq (E->template_file);
}
return E;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/util_lib/reformat.c
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,7 @@ char* is_pdb_struc ( char *iname)
a=0;
name[a]=csprintf ( name[a], "%s", iname);a++;
name[a]=csprintf ( name[a], "%s.pdb", iname);a++;
if (getenv ("PDB_DIR"))name[a]=csprintf ( name[a], "%s/%s", getenv("PBD_DIE"),iname);a++;
if (getenv ("PDB_DIR"))name[a]=csprintf ( name[a], "%s/%s", getenv("PBD_DIR"),iname);a++;
if (getenv ("PDB_DIR"))name[a]=csprintf ( name[a], "%s/%s.pdb",getenv("PDB_DIR"),iname);a++;
if (get_cache_dir())name[a]=csprintf ( name[a], "%s/%s", get_cache_dir(),iname);a++;
if (get_cache_dir())name[a]=csprintf ( name[a], "%s/%s.pdb", get_cache_dir(),iname);a++;
Expand Down
24 changes: 22 additions & 2 deletions lib/util_lib/util_constraints_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -5367,8 +5367,28 @@ char *** produce_method_file ( char *method)
fprintf ( fp, "SUPPORTED NO");
vfclose (fp);}



/* pairwise method to align 3di sequences (i.e. structures turned into alphabet using foldseek)*/
sprintf (list[n][0], "fs_pair");
sprintf (list[n][1], "%s", vtmpnam(NULL));
n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w");
fprintf ( fp, "DOC Probabilistic pairwise alignment\n");
fprintf ( fp, "EXECUTABLE proba_pair\n");
fprintf ( fp, "ALN_MODE pairwise\n");
fprintf ( fp, "OUT_MODE fL\n");
fprintf ( fp, "IN_FLAG no_name\n");
fprintf ( fp, "OUT_FLAG no_name\n");
fprintf ( fp, "SEQ_TYPE E\n");
if ( strm ( retrieve_seq_type(), "DNA") || strm (retrieve_seq_type(), "RNA"))
{
fprintf ( fp, "GOP %d\n",CODE4DNA);//code for DNA
}
else
{
fprintf ( fp, "GOP %d\n",CODE4PROTEINS);//Code for Proteins
}
fprintf ( fp, "ADDRESS %s\n", ADDRESS_BUILT_IN);
fprintf ( fp, "PROGRAM %s\n", PROGRAM_BUILT_IN);
vfclose (fp);}

sprintf (list[n][0], "proba_pair");
sprintf (list[n][1], "%s", vtmpnam(NULL));
Expand Down

0 comments on commit e7b039f

Please sign in to comment.