Skip to content
Browse files

- Updated for IDL V6.1.

- Find additional linked keyword sections, by setting add_keywords, even
  when scanning Keywords Sections (not just syntax keywords).
- Allow linking keywords from routines for which we don't know the type
  (fun or pro).
- Can specify "only keys", separate from "has" for add_keywords (has
  only works if they show up in the Syntax section).
- Trim multi-line HTML tags.
- Leading multi-plexers like [XYZ], in addition to {X|Y|Z} have shown
  up.
- Allow parse callbacks to end processing for that section by returning
  -1.
- Remove space around underscores.
- Various new special matchers, lowercase keywords, a bogus DATAMINER
  match against Syntax, etc.
  • Loading branch information...
1 parent c9e8a63 commit 8d56c8df9854277d649ae74a53be0e417721bb5e jdsmith committed
Showing with 190 additions and 66 deletions.
  1. +190 −66 get_html_rinfo
View
256 get_html_rinfo
@@ -3,10 +3,10 @@
# Program to extract the information from the HTML version of the IDL
# manuals (v5.6 and on) and IDL itself, to support IDLWAVE.
#
-# This version supports IDL >v6.0
+# This version supports IDL v6.1
#
# (c) 1999, 2000 Carsten Dominik <dominik@astro.uva.nl>
-# (c) 2001-2003 J.D. Smith <jdsmith@as.arizona.edu>
+# (c) 2001-2004 J.D. Smith <jdsmith@as.arizona.edu>
#
# Requires the HTML documentation files distributed with IDL 6.0 or
# later, decompiled from HTMLHelp idl.chm with Microsoft's HTML Help
@@ -174,6 +174,22 @@
# The data which actually make it into the rinfo file include class,
# type, routine,"kwds" with links, and the "extra" file and kwds with
# links. Everything else is for internal linking.
+#
+#============================================================================
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This file is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
#============================================================================
require 5.004;
@@ -227,6 +243,7 @@ open RINFO,">$rinfofile" or die "Cannot open $rinfofile for writing: $!";
# Scan all of the files
FILE:
foreach $file (@files) {
+# next unless $file=~/^CDF.*\.html/;
open(FILE,"$path/$file");
local $/=undef; #Slurp mode
$file_contents=<FILE>;
@@ -320,7 +337,7 @@ foreach $file (@files) {
# Require a proper heading
if (!$syntax && $part=~/^Syntax/) {
$syntax=$parts{$part};
- } elsif ($part=~/^((?:[A-Z][a-z0-9_]+\s+){0,2}Keywords|
+ } elsif ($part=~/^((?:[A-Z][A-Za-z0-9_]+\s+){0,2}Keywords|
Keywords:\s*(?:[A-Z][a-z0-9_]+\s*){1,2})\s*$/x) {
$kwds.=$parts{$part}
unless $1=~/Thread Pool Keywords/; # Nothing useful in TPool
@@ -337,7 +354,6 @@ foreach $file (@files) {
# Apply the special syntax matchers
$old_syntax=$syntax;
&try_specials();
- diag("$syntax\n");
# See if there are reasons to reject or complain about this entry
if (@rejects) {
@@ -358,7 +374,7 @@ foreach $file (@files) {
# Parse the text keywords, and compare to syntax keywords.
@syntax_kwds=make_unique(@syntax_kwds);
%txt_kwds=parse_keywords($kwds);
-
+ #diag("GOT KWDS SECTION:\n >>>$kwds\n<<<\n\n");
# Complain strenuously
if (@complaints) {
@@ -367,8 +383,6 @@ foreach $file (@files) {
complain($old_syntax,$file,@complaints);
}
- diag(" SYNTAX KWDS FOUND: \n ".join("\n ",sort @syntax_kwds)."\n");
- diag(" TEXT KWDS FOUND: \n ".join("\n ",sort keys %txt_kwds)."\n");
diag("\n$file($title):<<<<<<<<<<<<<<<<<<\n\n\n");
# Normalize the class/routine cases
@@ -461,8 +475,6 @@ foreach $class (keys %properties) {
}
}
-diag("IDLitDataContainer:".Dumper($e{"IDLitDataContainer"})."\n");
-
foreach $class (keys %e) {
next unless $class;
$iname = case_name("method","Init");
@@ -539,18 +551,28 @@ foreach $class (keys %e) {
next unless defined($e{$class}{$type}{$name}{add_kwds});
foreach my $add (@{$e{$class}{$type}{$name}{add_kwds}}) {
- my (%nokeys,$file_to_add,$keys_to_add,$special_to_add);
- my ($has,$aname,$atype,$aclass,$nokeys,$special_sec,$getset)=@$add;
+ my (%nokeys,%only_keys,$file_to_add,$keys_to_add,$special_to_add);
+ my ($has,$aname,$atype,$aclass,$only_keys,$nokeys,
+ $special_sec,$getset)=@$add;
# Does it already have the keywords, just requiring a link
$has=$has eq "has";
- # Is it a real entry being asked for?
+
+ # Check if we didn't know the routine type at the time of
+ # addition; default to "pro" if it exists.
+ unless ($atype) {
+ if (defined($e{$aclass}{"pro"}{$aname})) { $atype="pro" }
+ elsif (defined($e{$aclass}{"fun"}{$aname})) { $atype="fun"};
+ }
+
+ # Is it a actual existing entry being asked for?
next unless defined($e{$aclass}{$atype}{$aname});
foreach (@$nokeys) {$nokeys{$_}++;}
-
+ foreach (@$only_keys) {$only_keys{$_}++;}
+
my $afile=$e{$aclass}{$atype}{$aname}{file};
- # Look for special section keywords only (e.g. "graphics
- # keywords accepted by PLOT")
+ # Look for "2 degree of separation" special section keywords
+ # only (e.g. "graphics keywords accepted by PLOT")
if($special_sec and defined($e{$aclass}{$atype}{$aname}{extra})) {
foreach $file (keys %{$e{$aclass}{$atype}{$aname}{extra}}) {
if($e{$aclass}{$atype}{$aname}{extra}{$file}{special} =~
@@ -580,8 +602,10 @@ foreach $class (keys %e) {
# Actually add the keys (moving unlinked ones to extra if necessary)
foreach (keys %$keys_to_add) {
- next if $nokeys{$_};
- if($has) { #exists in kwds already, move and link it
+ next if (@$only_keys && !$only_keys{$_}) || $nokeys{$_};
+ if($has) {
+ #demand that it exists in kwds already
+ # if so, move and link it in the extra kwds section
next if !defined($e{$class}{$type}{$name}{kwds}{$_}) or
$e{$class}{$type}{$name}{kwds}{$_}; # Leave an existing link
delete $e{$class}{$type}{$name}{kwds}{$_}; # Move to extra keywords
@@ -626,8 +650,8 @@ diag(sprintf("\nProblematic entries: %d rejected, %d complains.\n",
write_rinfo_header();
print RINFO "(defconst idlwave-system-routines\n";
print RINFO " '(\n";
-printf STDERR "\n Nr Class Npro Nfun Ntot Nkwd\n";
-printf STDERR "-----------------------------------------------\n";
+printf STDERR "\n Nr Class Npro Nfun Ntot Nkwd\n";
+printf STDERR "----------------------------------------------------------\n";
$classcnt = -1;
foreach $class (sort ignoring_case keys %e) {
$npro = scalar(keys %{$e{$class}{"pro"}});
@@ -647,7 +671,7 @@ foreach $class (sort ignoring_case keys %e) {
$nprotot += $npro;
$nfuntot += $nfun;
$nclass++;
- printf STDERR "%3d %-21s %4d %4d %4d %5d\n",
+ printf STDERR "%3d %-32s %4d %4d %5d %5d\n",
++$classcnt,$class,$npro,$nfun,$npro+$nfun,$nkwd;
foreach $type ("pro","fun") {
foreach $name (sort ignoring_case keys %{$e{$class}{$type}}) {
@@ -656,8 +680,8 @@ foreach $class (sort ignoring_case keys %e) {
}
}
-print STDERR "-" x 47,"\n";
-printf STDERR "Total %4d %4d %5d %5d\n",
+print STDERR "-" x 58,"\n";
+printf STDERR "Total %4d %4d %5d %5d\n",
$nprotot,$nfuntot,$nprotot+$nfuntot,$n_keywords_total;
printf STDERR "Routines ignored due to -xname: %4d\n",$ignore_name_cnt
if $ignore_name_re;
@@ -768,7 +792,7 @@ sub clean_up_syntax {
$syntax=~s|<h5\s+class="p?Heading4">.*?^</h5>\s*|or|msg; # And headings
$syntax=~s/and then,.*//s;
$syntax=~s/<br>/\n/g;
- $syntax=~s/<.*?>//g; # html tags
+ $syntax=~s/<.*?>//gs; # html tags
$syntax=~s/&nbsp;/ /g; # special chars
$syntax=~s/&#151;/ - /g;
$syntax=~s/&#160;/ /g;
@@ -779,7 +803,7 @@ sub clean_up_syntax {
$syntax=~s/{(?!(?:,|X\s*\|\s*Y))[^}]*}//g; # Internal notes in {}
$syntax=~s/&([gl])t;/$1=="g"?">":"<"/eg; # > and <
$syntax=~s|^\s*or\s*||si;
- diag("Now using cleaned syntax:>>>>>>>>\n$syntax\n<<<<<<<<<<\n");
+# diag("Now using cleaned syntax:>>>>>>>>\n$syntax\n<<<<<<<<<<\n");
}
sub parse_syntax {
@@ -807,7 +831,9 @@ sub parse_syntax {
} else {diag("parse_syntax: No CLASS found\n");}
# Clean out the remarks about BLABLA keywords
- $txt =~ s/^.*?\bkeywords\b.*?://gmi;
+ $txt =~ s/^.*?\bkeywords\b.*?:.*?this.*?accepts.*?keywords.*$//gmi;
+ $txt =~ s/^.*?\bkeywords\b.*?://gmi; #Some have keywords after them.
+
# Clean out the "only in..." stuff
$txt =~ s/\(only\s*in[^\)]*\)//gi;
@@ -820,8 +846,8 @@ sub parse_syntax {
# Get all keywords. Keywords are things with `/' before it or with `='
# after it.
- diag("TESTING:\n\n$txt\n\n");
- while ($txt =~m!(\{([/XYZ |]*)\})? # Leading XYZ multiplexer
+ diag("AFTER SYNTAX CLEANUP:\n\n$txt\n\n");
+ while ($txt =~m!(\{[/XYZ |]*\}|/?\[XYZ?\])? # Leading XYZ multiplexer
(\/)? # Possibly a boolean
\b
(_?[A-Z][A-Z0-9_]*) # The actual keyword
@@ -829,16 +855,16 @@ sub parse_syntax {
\s*
(=)?!gx
) {
- ($xyz,$slash,$identifier,$getset,$equal) = ($2,$3,$4,$6,$7);
+ ($xyz,$slash,$identifier,$getset,$equal) = ($1,$2,$3,$5,$6);
next unless $slash || $equal || ($xyz && $xyz=~m|/|);
-
# Everything before the first keyword is part of the calling sequence
$call = $` unless $call;
# Some keywords have a {X|Y|Z} in front which must be expanded
if ($xyz) {
- @ids = map {tr|/||d; $_ . $identifier} (split(/ *\| */,$xyz));
+ $xyz=~tr|XYZ||cd;
+ @ids=map {$_ . $identifier} split(/(?=[XYZ])/,$xyz);
} else {
@ids = ($identifier);
}
@@ -890,7 +916,8 @@ sub parse_syntax {
# Parse text into "Heading" Sections, optionally doing something with
# the parsed text in a callback. If the callback is passed, it's
# return value is tested, and, if a true list, that link is saved for
-# each name on the list for return.
+# each name on the list for return. If the return value is the scalar
+# -1, no more processing is done.
sub parse_heading {
my ($txt,$heading,$callback)=@_;
my %ret,@ret;
@@ -908,7 +935,9 @@ sub parse_heading {
while (my ($link,$name,$text)=splice(@sections,0,3)) {
if($callback) {
my @ret=&$callback($link,$name,$text);
- if (@ret and $ret[0]) { # More than one name returned
+ if(@ret) {
+ last if $ret[0]==-1;
+ next if $ret[0]==1;
map {$ret{$_}=$link} @ret;
}
} else { $ret{$name}=$link; }
@@ -927,18 +956,50 @@ sub parse_keywords {
sub {
my ($link,$kwd,$text) = @_;
my @ret;
- return 0 if ($kwd=~/Keywords?/);
- return 0 unless
+ if ($kwd=~/WIDGET_CONTROL\s+Keywords/i) {
+ push @complaints,
+ "WIDGET_CONTROL keywords listed as regular keyword.";
+ return -1; # Abort further processing
+ }
+ if ($kwd=~/Keywords?/) {
+ # Some "extra" keywords are annoyingly listed in
+ # amidst the regular keywords
+ my @all=($text=~m{[iI]n\s+addition.*?the\s+
+ ((?:[A-Z0-9_]+,\s+)+)
+ and\s+([A-Z0-9_]+)\s+keywords?\s+to\s+
+ the\s+([:A-Z0-9_]+)\s+(pro|fun)}xs);
+ diag("EXTRA KEYWORDS, Got: \n$text\n\n");
+ return 1 unless @all;
+
+ my $type=pop @all;
+ my ($class,$routine);
+ $routine=pop @all;
+ unshift @all,split(/,\s+/,shift @all);
+ push @complaints,
+ "Extra keywords to link listed as normal keyword: " .
+ "$routine: ". join(",",@all);
+ ($class,$routine)=($routine=~/(?:([a-z][a-z0-9_]+)::?)?
+ ([a-z][a-z0-9_]+)/ix);
+ push @add_keywords,["has",$routine,$type,$class];
+ return 1;
+ }
+
+ # Take care of space surrounding underscores here too
+ $kwd=~s/([A-Z])(?: +_|_ +)([A-Z])/$1_$2/g;
+ return 1 unless
$kwd =~
m{( # The entire keyword text
- (?:(\[XYZ?\])?(\!?[A-Z0-9-_]+)# opt. [XYZ]
- (?![a-z]) # No lowercase lets
- (?:\s*,\s*)? # optional comma
- )+) # >=1 of the above
+ (?:(\[XYZ?\](_?))? # opt. [XYZ]
+ (\!?[A-Z][A-Z0-9-_]*) # keyword
+ (?![a-z]) # No lowercase lets
+ (?:\s*,\s*)? # optional comma
+ )+) # >=1 of the above
(?:\s*\((Get|Set|Get\s*,\s*Set)\)\s*)? # (Get,Set)?
}gxs;
- my ($kwd,$xyz,$key_base,$getset)=($1,$2,$3,$4);
+ my ($kwd,$xyz,$underscore,$key_base,$getset)=
+ ($1,$2,$3,$4,$5);
if ($xyz) {
+ $key_base="_".$key_base if $underscore;
$xyz=~tr/XYZ//cd;
@ret=map {$_ . $key_base} split(/(?=[XYZ])/,$xyz);
} else {
@@ -948,7 +1009,7 @@ sub parse_keywords {
push @setkwds,@ret if $getset =~ /set/i;
return @ret;
});
-
+# print "GOT KWDS: ",Dumper(%kwds),"\n";
my (@missing_from_syntax,@missing_from_text,%syntax_kwds);
foreach $kwd (@syntax_kwds) {
@@ -967,6 +1028,9 @@ sub parse_keywords {
push @complaints,"Text keywords missing from syntax: " .
join(",",@missing_from_syntax) if @missing_from_syntax;
+ diag(" SYNTAX KWDS FOUND: \n ".join("\n ",sort @syntax_kwds)."\n");
+ diag(" TEXT KWDS FOUND: \n ". join("\n ",sort keys %kwds)."\n");
+
# No link found in text, but add the missing keywords from Syntax
# anyway.
map {$kwds{$_}=""} @missing_from_text;
@@ -1041,7 +1105,7 @@ sub make_lisp_reader_string {
my $call=$e{$class}{$type}{$name}{call};
my ($entry,$kwds);
- diag("STARTING $name CALL WITH: $call\n");
+# diag("STARTING $name CALL WITH: $call\n");
# In the calling sequence we want `%s' instead of name and class.
# The calling sequence will later be used as format string to make
# a calling sequence with the correct version of class and name.
@@ -1052,7 +1116,7 @@ sub make_lisp_reader_string {
$call =~ s/\b$name\b/%s/gi;
}
- diag("TESTING CALL WITH: $call\n");
+# diag("TESTING CALL WITH: $call\n");
# Now we make the string which can be parsed by the Lisp reader
# It looks like this:
# ("NAME" TYPE "CLASS" (system) "CALLING SEQUENCE"
@@ -1102,7 +1166,7 @@ sub write_rinfo_header {
print RINFO <<EOF;
;;; idlw-rinfo.el --- Routine Information for IDLWAVE
;; Copyright (c) 1999 Carsten Dominik
-;; Copyright (c) 1999, 2000, 2001, 2002, 2003 Free Software Foundation
+;; Copyright (c) 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation
;; Author: J.D. Smith <jdsmith\@as.arizona.edu>
;; Version: VERSIONTAG
@@ -1385,19 +1449,28 @@ sub try_specials {
# corresponding keywords to the routine description as well, using the
# @add_keywords array, with format:
#
-# [ ("needs"|"has"),name,type,class,no_keys_reference,specials_only,get_set ]
+# [ has,name,type,class,only_keys,allbut_keys,specials_only,get_set ]
+#
+# has: The string "needs" or "has" depending on whether the routine
+# being scanned "needs" keywords added, or already "has" them, but
+# just requires linking to the proper section.
#
-# "needs" or "has": depending on whether the routine being scanned
-# "needs" keywords added, or already "has" them, but just requires
-# linking to the proper section.
# name: The name of the add routine
+#
# type: The type of the add routine
+#
# class: The class (if any) of the add routine
-# no_keys_reference: A reference to a list of keywords *not* to add.
+#
+# only_keys: A reference to a list of keywords to add
+#
+# allbut_keys: A reference to a list of keywords *not* to add.
+#
# specials_only: A string mentioning one of the special keyword
# section names (see %special_sections below), which will limit
# added keywords to those with the named special section origin
-# (e.g. "Graphics").
+# (e.g. "Graphics"), which are actually separated by 2 degrees
+# from the original file (e.g. Surface->Plot->Graphics Keywords).
+#
# get_set: Either omit, or use one of "get","set", or "get,set" to
# specify we need to link that type of keyword from the named
# routine.
@@ -1602,7 +1675,7 @@ BEGIN {
sub {
if ($syntax =~ /ATAN.*ATAN/s) {
# Write a simpler calling sequence
- $syntax = "Result = ATAN([Y,] X)";
+ $syntax = "Result = ATAN([Y,] X [, /PHASE] )";
$act = 1;
} else {
0;
@@ -1745,6 +1818,20 @@ BEGIN {
# ------------------------------------------------------------------
# Attach or link keywords from other routines ----------------------
# ------------------------------------------------------------------
+ sname("ACCEPTS ALL XXX keywords") =>
+ sub {
+ my ($routine,$class);
+ while ($syntax=~/Accepts all ([A-Z_0-9:]+) keywords/gi) {
+ $act=1;
+ $routine=$1;
+ ($class,$routine)=
+ ($routine=~/(?:([a-z][a-z0-9_]+)::?)?([a-z][a-z0-9_]+)/i);
+ push @complaints,"Keyword list not complete ($2 keywords omitted).";
+ push @add_keywords,["needs",$routine,undef,$class];
+ }
+ return $act;
+ },
+
sname("SURFACE,CONTOUR,PLOT_3DBOX (ADD PLOT KEYWORDS)") =>
sub {
if ($syntax =~ /^\s*(SURFACE|CONTOUR|PLOT_3DBOX)/si) {
@@ -1765,18 +1852,6 @@ BEGIN {
}
},
- sname("IDLitComponent::Get|SetPropertyAttribute") =>
- sub {
- if ($syntax =~ /\s*IDLitComponent::\]?(Get|Set)PropertyAttribute/si) {
- # Must add all the get|set keywords from RegisterProperty
- push @add_keywords,["needs","RegisterProperty","pro","IDLitComponent",
- undef,undef,$1];
- $act=1;
- } else {
- 0;
- }
- },
-
sname("POLAR_CONTOUR (LINK CONTOUR KEYWORDS)") =>
sub {
if ($syntax =~ /^\s*POLAR_CONTOUR/si) {
@@ -1789,7 +1864,7 @@ BEGIN {
$nokeys =~ s/and//;
my @nokeys = (split(/[^A-Z0-9_]+/,$nokeys));
shift @nokeys unless $nokeys[0]; # Useless material
- push @add_keywords,["has","CONTOUR","pro","",\@nokeys];
+ push @add_keywords,["has","CONTOUR","pro",undef,undef,\@nokeys];
}
1;
} else {
@@ -1797,7 +1872,19 @@ BEGIN {
}
},
- sname("TVSCL (ADD TV)") =>
+ sname("IDLitComponent::Get|SetPropertyAttribute") =>
+ sub {
+ if ($syntax =~ /\s*IDLitComponent::\]?(Get|Set)PropertyAttribute/si) {
+ # Must add all the get|set keywords from RegisterProperty
+ push @add_keywords,["needs","RegisterProperty","pro","IDLitComponent",
+ undef,undef,undef,$1];
+ $act=1;
+ } else {
+ 0;
+ }
+ },
+
+ sname("TVSCL (ADD TV)") =>
sub {
if ($syntax =~ /^\s*TVSCL/si) {
# Most TV keywords need to be added later.
@@ -1813,11 +1900,11 @@ BEGIN {
sname("LINK SURFACE KEYWORDS") =>
sub {
if ($kwds =~ m|\s+</a>SURFACE Keywords|) {
- # All SURFACE keywords listed in syntax need to be added later.
+ # All SURFACE keywords listed in syntax need to be linked later.
$act = 1;
push @complaints,
"Keyword list not complete (SURFACE kwds omitted)";
- push @add_keywords,["has","SURFACE","pro",""];
+ push @add_keywords,["has","SURFACE","pro"];
1;
} else {
0;
@@ -1886,6 +1973,30 @@ BEGIN {
}
},
+ sname("SPACE(S) AROUND UNDERSCORE") =>
+ sub {
+ if ($syntax =~ /[A-Z]( +_|_ +)[A-Z]/) {
+ $act = ($syntax =~ s/([A-Z]) *_ *([A-Z])/$1_$2/g);
+ push @complaints,"Space surrounding underscore";
+ 1;
+ } else {
+ 0;
+ }
+ },
+
+
+ sname("LOWERCASE KEYWORD") =>
+ sub {
+ if ($syntax =~ /CDF_VARCREATE/) {
+ $act= ($syntax =~ s/VariableType/VARIABLETYPE/);
+ $kwds=~ s/VariableType/VARIABLETYPE/; # Ugghh
+ push @complaints,"Lowercase keyword found";
+ 1;
+ } else {
+ 0;
+ }
+ },
+
# ------------------------------------------------------------------
# Keyword omissions ------------------------------------------------
# ------------------------------------------------------------------
@@ -1961,6 +2072,19 @@ BEGIN {
} else {
0;
}
+ },
+
+
+ sname("KILL DATAMINER INTRO") =>
+ sub {
+ if ($syntax =~ /section shows the proper syntax for calling the method/)
+ {
+ $act=1;
+ $syntax="";
+ 1;
+ } else {
+ 0;
+ }
}
);

0 comments on commit 8d56c8d

Please sign in to comment.
Something went wrong with that request. Please try again.