Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 199 lines (177 sloc) 4.32 kB
b8b58f9 @cmungall new
authored
1 #!/usr/bin/perl -w
2
3 use strict;
4 my %tag_h=();
5 my $regexp = '';
6 my $noheader;
7 my $negate;
8 my $count;
9 while ($ARGV[0] =~ /^\-.+/) {
10 my $opt = shift @ARGV;
11 if ($opt eq '-h' || $opt eq '--help') {
12 print usage();
13 exit 0;
14 }
15 if ($opt eq '-r' || $opt eq '--regexp') {
16 $regexp = shift @ARGV;
17 }
18 if ($opt eq '-c' || $opt eq '--count') {
19 $count = 1;
20 }
21 if ($opt eq '--noheader') {
22 $noheader = 1;
23 }
24 if ($opt eq '-v' || $opt eq '--neg') {
25 $negate = 1;
26 }
27 }
28
29
30 $/ = "\n\n";
31
32 my %done = ();
5122c0e @cmungall report unused rels
authored
33 my %stanzatype=();
cef2b76 @cmungall -new
authored
34 my %name = ();
b8b58f9 @cmungall new
authored
35 my @flagged = ();
9b7bc35 @cmungall anon-check
authored
36 my %referenced;
a40fb72 @cmungall new
authored
37 my %refrel = ();
b8b58f9 @cmungall new
authored
38 my $n = 0;
becda5c @cmungall check xp dupes
authored
39 my %id_by_xp_h = ();
b8b58f9 @cmungall new
authored
40 if (!@ARGV) {
41 @ARGV=('-');
42 }
43 my $n_xps = 0;
44 while (@ARGV) {
45 my $f = pop @ARGV;
46 if ($f eq '-') {
47 *F=*STDIN;
48 }
49 else {
50 open(F,$f) || die $f;
51 }
52 my $hdr = 0;
9b7bc35 @cmungall anon-check
authored
53 my $stanza_type;
b8b58f9 @cmungall new
authored
54 while(<F>) {
55 my $id;
423979c @cmungall fix
authored
56 if (/^\[(\w+)\]/) {
9b7bc35 @cmungall anon-check
authored
57 $stanza_type = lc($1);
58 }
85d8736 @cmungall fix
authored
59 if (/\nid:\s*(\S+)/) {
b8b58f9 @cmungall new
authored
60 $id = $1;
9b7bc35 @cmungall anon-check
authored
61 if ($done{$id} && /\nid/) {
cef2b76 @cmungall -new
authored
62 flag("\"$name{$id}\" present twice: ",$_);
b8b58f9 @cmungall new
authored
63 }
3298917 @cmungall ws
authored
64 $done{$id} = 1;
5122c0e @cmungall report unused rels
authored
65 $stanzatype{$id} = $stanza_type;
3298917 @cmungall ws
authored
66 if (/id:\s*(.*)/) {
67 my $full = $1;
cef2b76 @cmungall -new
authored
68 $name{$id} = $full;
3298917 @cmungall ws
authored
69 $full =~ s/\s*\n.*//;
70 $full =~ s/\s*\!.*//;
71 $full =~ s/\S+\s*//;
72 if ($full) {
73 flag("ID: $id contains whitespace, followed by '$full'", $_);
74 }
75 }
b8b58f9 @cmungall new
authored
76 }
77 my @lines = split(/\n/,$_);
ceac129 @cmungall union
authored
78 foreach (@lines) {
a40fb72 @cmungall new
authored
79 if (/^(union_of|relationship):\s*(\S+)/) {
80 $referenced{$2} = 1;
ceac129 @cmungall union
authored
81 #print STDERR "U: $1\n";
82 }
a40fb72 @cmungall new
authored
83 elsif (/^(holds_over_chain|equivalent_to_chain):\s*(\S+)\s+(\S+)/) {
84 $referenced{$2} = 1;
85 $referenced{$3} = 1;
86 #print STDERR "U: $1\n";
87 }
88
89 if (/^relationship:\s*(\S+)/) {
90 $refrel{$1} .= "$_\n";
91 }
92
ceac129 @cmungall union
authored
93 }
b8b58f9 @cmungall new
authored
94 my @xps = grep {/^intersection_of:/} @lines;
95 if (@xps) {
96 if (@xps == 1) {
97 flag("single_xp: @xps",$_);
98 }
becda5c @cmungall check xp dupes
authored
99 my @xp_links = ();
b8b58f9 @cmungall new
authored
100 my @genii = ();
101 foreach (@xps) {
102 s/\s*\!.*//;
e8becbc @cmungall new
authored
103 s/\s*\{.*\}.*//;
b8b58f9 @cmungall new
authored
104 my @parts = split(' ',$_);
105 shift @parts;
becda5c @cmungall check xp dupes
authored
106 push(@xp_links,join(' ',@parts));
9b7bc35 @cmungall anon-check
authored
107 foreach (@parts) {
108 $referenced{$_} = 1;
109 }
b8b58f9 @cmungall new
authored
110 if (@parts == 1) {
111 push(@genii, $parts[0]);
112 }
a40fb72 @cmungall new
authored
113 else {
114 $refrel{$parts[0]} .= "$_\n";
115 }
b8b58f9 @cmungall new
authored
116 }
becda5c @cmungall check xp dupes
authored
117 my $xp_str = join('; ', sort {$a cmp $b} @xp_links);
118 if ($id_by_xp_h{$xp_str}) {
cef2b76 @cmungall -new
authored
119 if ($id eq $id_by_xp_h{$xp_str}) {
120 # already reported
121 }
122 else {
123 flag("duplicate xp def: '$xp_str' $id == $id_by_xp_h{$xp_str}", $_);
124 }
becda5c @cmungall check xp dupes
authored
125 }
126 $id_by_xp_h{$xp_str} = $id;
b8b58f9 @cmungall new
authored
127 if (@genii < 1) {
128 flag("single_genus: @genii", $_);
129 }
130 elsif (@genii > 1) {
423979c @cmungall fix
authored
131 flag("multiple_genus: @genii in", $_)
132 unless $stanza_type eq 'typedef';
b8b58f9 @cmungall new
authored
133 }
134 else {
135 if ($id eq $genii[0]) {
136 flag("id $id = genus", $_);
137 }
138 # ok
139 }
140 $n_xps++;
141 }
142 }
143 }
9b7bc35 @cmungall anon-check
authored
144
145 foreach (keys %done) {
146 if (/^_:/) {
becda5c @cmungall check xp dupes
authored
147 if (!$referenced{$_}) {
9b7bc35 @cmungall anon-check
authored
148 flag("unreferenced anon class", $_);
149 }
150 }
5122c0e @cmungall report unused rels
authored
151 if ($stanzatype{$_} eq 'typedef') {
a40fb72 @cmungall new
authored
152 $refrel{$_} = 0;
5122c0e @cmungall report unused rels
authored
153 if (!$referenced{$_}) {
154 flag("unreferenced relation", $_);
155 }
156
157 }
9b7bc35 @cmungall anon-check
authored
158 }
159
a40fb72 @cmungall new
authored
160 foreach my $k (%refrel) {
161 if ($refrel{$k}) {
162 flag("relation used but not defined","\'$k\' -- $refrel{$k}");
163 }
164 }
165
b8b58f9 @cmungall new
authored
166 print STDERR "n_xps: $n_xps\n";
167
168 exit(scalar(@flagged));
169
170 sub flag {
171 my $err = shift;
172 my $stanza = shift;
173 print STDERR "FLAG: $err\n$stanza\n\n";
174 push(@flagged, $err);
175 return;
176 }
177
178 sub scriptname {
179 my @p = split(/\//,$0);
180 pop @p;
181 }
182
183
184 sub usage {
185 my $sn = scriptname();
186
187 <<EOM;
188 $sn OBO-FILE [OBO-FILE2...]
189
190 performs syntactic check on intersection_of definitions
191
192 Example:
193
194 $sn mammalian_phenotype_xp.obo
195
196 EOM
197 }
198
Something went wrong with that request. Please try again.