Skip to content

Commit

Permalink
updated extract_from_pdb for an improved handling of the pdb_entry_ty…
Browse files Browse the repository at this point in the history
…pe.txt files and the unrealeased-xml file
  • Loading branch information
cnotred committed Jan 8, 2016
1 parent 5d45939 commit 4fa9bf6
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 62 deletions.
10 changes: 6 additions & 4 deletions lib/dp_lib/evaluate.c
Original file line number Diff line number Diff line change
Expand Up @@ -4123,8 +4123,7 @@ int cw_profile_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int co






if (last_tag!=A->random_tag)
{
int n1, n2;
Expand Down Expand Up @@ -6052,7 +6051,7 @@ float ** initialise_aa_physico_chemical_property_table (int *n)
}

/**
* Set the extension mode. Default seems to be \c very_fast_triplet (??)
* Set the extension mode. Default is very_fast_triplet
*
* Depending on the \c extend_mode, this functions specifies how the evaluation
* step has to be performed. I assignes functions to the values
Expand Down Expand Up @@ -6170,7 +6169,10 @@ Constraint_list * choose_extension_mode ( char *extend_mode, Constraint_list *CL
}
else if ( strm ( extend_mode, "matrix"))
{


if (!CL->M)
printf_exit ( EXIT_FAILURE,stderr, "\nERROR: use -in <mat_name> to set the matrix to be used [FATAL]");

CL->evaluate_residue_pair=evaluate_matrix_score;
CL->get_dp_cost=cw_profile_get_dp_cost;
CL->normalise=1;
Expand Down
268 changes: 214 additions & 54 deletions lib/perl/lib/scripts/extract_from_pdb
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,121 @@ if (!$nodiagnostic){print STDERR $error;}
####################################################################################################################
# TEST FUNCTIONS #
####################################################################################################################
sub get_pdb_entry_type_file
{
my $cache_file="$cache/pdb_entry_type.txt";
my $env_file = $ENV{"PDB_ENTRY_TYPE_FILE"};
my $pdb_file ="$ENV{'PDB_DIR'}/derived_data/pdb_entry_type.txt";


if (-z $cache_file){unlink ($cache_file);}#will get updated
if (-z $env_file){$env_file="";} #cannot update
if (-z $pdb_file){$pdb_file="";} #cannot update

if (-e $env_file){return $env_file;} #env wins: user decides
elsif (-e $pdb_file){return $pdb_file;} #local database wins: network file may be out of sync
elsif ($no_remote_pdb_dir==1)
{
if (-e $cache_file){return $cache_file;}
else
{add_warning($$,$$,"PDB_ENTRY_TYPE_FILE must be set to the location of <pdb>/derived_data/pdb_entry_type.txt when using NO_REMOTE_PDB_DIR=1");
return "";
}
}
else #update can only take place if the file lives in cache
{
my $new_file;
if (!-e $cache_file || (-M $cache_file)>1)
{
$new_file=vtmpnam();
&url2file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt", $new_file);
if ( !-z $new_file){system ("mv $new_file $cache_file"); unlink ($new_file); $new_file=$cache_file;}
else {unlink($new_file);}
}
else
{
$new_file=$cache_file;
}

if (!-e $cache_file && !-e $new_file)
{
add_warning($$,$$,"Could not download ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt");
return "";
}
elsif (-e $cache_file && !-e $new_file)
{
my $m=(-M $cache_file);
add_warning($$,$$,"Could not update file ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt. Older Version [$cache_file]($m Month(s) old) will be used instead");
return $cache_file;
}
else
{
return $new_file;
}
}
}



sub get_unrealeased_file
{
my $cache_file="$cache/unrealeased.xml";
my $env_file = $ENV{"PDB_UNREALEASED_FILE"};
my $pdb_file ="$ENV{'PDB_DIR'}/derived_data/unrealeased.xml";


if (-z $cache_file){unlink ($cache_file);}#will get updated
if (-z $env_file){unlink($env_file);} #will update
if (-z $pdb_file){$pdb_file="";} #cannot update

if (-e $env_file){return $env_file;} #env wins: user decides
elsif (-e $pdb_file){return $pdb_file;} #local database wins: network file may be out of sync
elsif ($no_remote_pdb_dir==1)
{
if (-e $cache_file){return $cache_file;}
elsif ( $env_file && ! -e $env_file)
{
&url2file("http://www.rcsb.org/pdb/rest/getUnreleased",$env_file);
if ( -e $env_file && !-z $env_file){return $env_file;}
}
else
{
add_warning($$,$$,"UNREALEASED_FILE must be set to the location of your unrealeased.xml file as downloaded from http://www.rcsb.org/pdb/rest/getUnreleased when using NO_REMOTE_PDB_DIR=1");
return "";
}
}
else #update can only take place if the file lives in cache
{
my $new_file=vtmpnam ();
if (!-e $cache_file || (-M $cache_file)>1)
{
&url2file("http://www.rcsb.org/pdb/rest/getUnreleased",$new_file);
if ( !-z $new_file){system ("mv $new_file $cache_file"); unlink ($new_file); $new_file=$cache_file;}
else {unlink($new_file);}
}
else
{
$new_file=$cache_file;
}

if (!-e $cache_file && !-e $new_file)
{
add_warning($$,$$,"Could not download http://www.rcsb.org/pdb/rest/getUnreleased");
return "";
}
elsif (-e $cache_file && !-e $new_file)
{
my $m=(-M $cache_file);
add_warning($$,$$,"Could not update file http://www.rcsb.org/pdb/rest/getUnreleased. Older Version [$cache_file]($m Month(s) ) will be used");
return $cache_file;
}
else
{
return $new_file;
}
}
}

sub is_released
{
my ($r);
Expand All @@ -1162,81 +1277,85 @@ sub is_released
$r=($name && !$hold)?1:0;
return $r;
}

sub remote_is_pdb_name
{
{
my $in=@_[0];
my ($ref_file, $pdb);
my ($pdb);
my ($value,$value1,$value2);
my $max=2;

$ref_file="$cache/pdb_entry_type.txt";


my $ref_file=&get_pdb_entry_type_file();

if ( $in=~/[^\w\d\:\_]/){return 0;}
elsif ($no_remote_pdb_dir==1)
elsif (!-e $ref_file)
{
my $pdbdir=$ENV{'PDB_DIR'};

my $r1="$pdbdir/derived_data/pdb_entry_type.txt";
my $r2=$ref_file;
if (-e $r1){$ref_file=$r1;}
elsif (-e $r2){$ref_file=$r2;}
else
{
my $p=substr ($in,0, 4);
add_warning ("Cannot find pdb_entry_type.txt; $p is assumed to be valid; add ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt in $cache to check name status");
}
add_warning ($$,$$,"Cannot find pdb_entry_type.txt; $in is assumed to be valid; add ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt in $cache to automatically check name status");
return 1;
}
elsif ( !-e $ref_file || (-M $ref_file)>$max || -z $ref_file)
else
{
&url2file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt", $ref_file);
$pdb=substr ($in,0, 4);
chomp(($value1=`grep -c $pdb $ref_file`));
$pdb=lc($pdb);
chomp(($value2=`grep -c $pdb $ref_file`));
$value=($value1 || $value2)?1:0;
$value=($value>0)?1:0;

return $value;
}
$pdb=substr ($in,0, 4);
chomp(($value1=`grep -c $pdb $ref_file`));
$pdb=lc($pdb);
chomp(($value2=`grep -c $pdb $ref_file`));
$value=($value1 || $value2)?1:0;
$value=($value>0)?1:0;

return $value;
}



sub pdb2model_type
{
my $in=@_[0];
my ($ref_file, $pdb);
my ($value, $ret);

if ( $in=~/[^\w\d\:\_]/){return 0;}
$ref_file="$cache/pdb_entry_type.txt";

if ( !-e $ref_file || (-M $ref_file)>2 || -z $ref_file)
$ref_file=&get_pdb_entry_type_file();
if (!-e $ref_file)
{
&url2file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt", $ref_file);
add_warning ($$,$$,"Cannot find pdb_entry_type.txt; $in is assumed to be diffraction; add ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt in $cache to check name status");
return "diffraction";
}
else
{
$pdb=substr ($in,0, 4);
$pdb=lc($pdb);

chomp(($value=`grep $pdb $ref_file`));

$value=~/^\S+\s+\S+\s+(\S+)/;
$ret=$1;
if ( $ret eq""){return "UNKNOWN";}

return $ret;
}
$pdb=substr ($in,0, 4);
$pdb=lc($pdb);

chomp(($value=`grep $pdb $ref_file`));

$value=~/^\S+\s+\S+\s+(\S+)/;
$ret=$1;
if ( $ret eq""){return "UNKNOWN";}

return $ret;
}
sub remote_is_on_hold
{
my $in=@_[0];
my ($ref_file, $pdb);
my ($value1, $value2,$value);




$ref_file=&get_unrealeased_file();

if ($no_rmote_pdb==1){return 0;}
elsif ( $in=~/[^\w\d\:\_]/){return 0;}
$ref_file="$cache/unreleased.xml";
if ( $in=~/[^\w\d\:\_]/){return 0;}

if ( !-e $ref_file || (-M $ref_file)>2 || -z $ref_file)
$ref_file=&get_unrealeased_file();
if (!-e $ref_file)
{
&url2file("http://www.rcsb.org/pdb/rest/getUnreleased",$ref_file);
add_warning ($$,$$,"Cannot find unrealeased.xml; $in is assumed to be released;");
return 1;
}

$pdb=substr ($in,0, 4);
Expand All @@ -1247,33 +1366,34 @@ sub remote_is_on_hold
$value=($value>0)?1:0;
return $value;
}

sub is_pdb_file
{
{
my @arg=@_;

if ( !-e $arg[0]){return 0;}

$F=vfopen ($arg[0], "r");
while ( <$F>)
{
{
if (/^HEADER/)
{
{
close $F;
return 1;
}
}
elsif ( /^SEQRES/)
{
{
close $F;
return 1;
}
}
elsif ( /^ATOM/)
{
{
close $F;
return 1;
}
}
}
}
return 0;
}
}
sub get_pdb_id
{
my $header_file=@_[0];
Expand Down Expand Up @@ -5896,3 +6016,43 @@ sub check_pg_is_installed
}


#####################DEprecated Functions
sub remote_is_pdb_name_deprecated
{
my $in=@_[0];
my ($ref_file, $pdb);
my ($value,$value1,$value2);
my $max=2;



$ref_file="$cache/pdb_entry_type.txt";

if ( $in=~/[^\w\d\:\_]/){return 0;}
elsif ($no_remote_pdb_dir==1)
{
my $pdbdir=$ENV{'PDB_DIR'};

my $r1="$pdbdir/derived_data/pdb_entry_type.txt";
my $r2=$ref_file;
if (-e $r1){$ref_file=$r1;}
elsif (-e $r2){$ref_file=$r2;}
else
{
my $p=substr ($in,0, 4);
add_warning ($$, $$, "Cannot find pdb_entry_type.txt; $p is assumed to be valid; add ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt in $cache to check name status");
}
}
elsif ( !-e $ref_file || (-M $ref_file)>$max || -z $ref_file)
{
&url2file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt", $ref_file);
}
$pdb=substr ($in,0, 4);
chomp(($value1=`grep -c $pdb $ref_file`));
$pdb=lc($pdb);
chomp(($value2=`grep -c $pdb $ref_file`));
$value=($value1 || $value2)?1:0;
$value=($value>0)?1:0;

return $value;
}
2 changes: 1 addition & 1 deletion lib/perl/lib/scripts/tc_generic_method.pl
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ sub blast2pdb_template_test

if (!&pdb_is_released($pdbid))
{
print stdout "\t\t**$pdbid [PDB NOT RELEASED or WITHDRAWN]\n";
print stdout "\t\t**$pdbid [WARNIG: PDB NOT RELEASED or WITHDRAWN]\n";
$c++;
}
elsif (!&pdb_has_right_type ($pdbid,$type))
Expand Down
6 changes: 3 additions & 3 deletions lib/util_lib/util_constraints_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -2261,9 +2261,9 @@ Constraint_list* read_constraint_list(Constraint_list *CL,char *in_fname,char *i
}
else if (strm(read_mode, "matrix"))
{
CL->residue_index=NULL;
CL->extend_jit=0;
CL->M=read_matrice ( fname);
CL->residue_index=NULL;
CL->extend_jit=0;
CL->M=read_matrice ( fname);
}
else if ( strm ( read_mode, "structure"))
{
Expand Down

0 comments on commit 4fa9bf6

Please sign in to comment.