Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 199 lines (177 sloc) 4.32 kb
b8b58f9 Chris Mungall new
authored
1 #!/usr/bin/perl -w
2
3 use strict;
4 my %tag_h=();
5 my $regexp = '';
6 my $noheader;
7 my $negate;
8 my $count;
9 while ($ARGV[0] =~ /^\-.+/) {
10 my $opt = shift @ARGV;
11 if ($opt eq '-h' || $opt eq '--help') {
12 print usage();
13 exit 0;
14 }
15 if ($opt eq '-r' || $opt eq '--regexp') {
16 $regexp = shift @ARGV;
17 }
18 if ($opt eq '-c' || $opt eq '--count') {
19 $count = 1;
20 }
21 if ($opt eq '--noheader') {
22 $noheader = 1;
23 }
24 if ($opt eq '-v' || $opt eq '--neg') {
25 $negate = 1;
26 }
27 }
28
29
30 $/ = "\n\n";
31
32 my %done = ();
5122c0e Chris Mungall report unused rels
authored
33 my %stanzatype=();
cef2b76 Chris Mungall -new
authored
34 my %name = ();
b8b58f9 Chris Mungall new
authored
35 my @flagged = ();
9b7bc35 Chris Mungall anon-check
authored
36 my %referenced;
a40fb72 Chris Mungall new
authored
37 my %refrel = ();
b8b58f9 Chris Mungall new
authored
38 my $n = 0;
becda5c Chris Mungall check xp dupes
authored
39 my %id_by_xp_h = ();
b8b58f9 Chris Mungall new
authored
40 if (!@ARGV) {
41 @ARGV=('-');
42 }
43 my $n_xps = 0;
44 while (@ARGV) {
45 my $f = pop @ARGV;
46 if ($f eq '-') {
47 *F=*STDIN;
48 }
49 else {
50 open(F,$f) || die $f;
51 }
52 my $hdr = 0;
9b7bc35 Chris Mungall anon-check
authored
53 my $stanza_type;
b8b58f9 Chris Mungall new
authored
54 while(<F>) {
55 my $id;
423979c Chris Mungall fix
authored
56 if (/^\[(\w+)\]/) {
9b7bc35 Chris Mungall anon-check
authored
57 $stanza_type = lc($1);
58 }
85d8736 Chris Mungall fix
authored
59 if (/\nid:\s*(\S+)/) {
b8b58f9 Chris Mungall new
authored
60 $id = $1;
9b7bc35 Chris Mungall anon-check
authored
61 if ($done{$id} && /\nid/) {
cef2b76 Chris Mungall -new
authored
62 flag("\"$name{$id}\" present twice: ",$_);
b8b58f9 Chris Mungall new
authored
63 }
3298917 Chris Mungall ws
authored
64 $done{$id} = 1;
5122c0e Chris Mungall report unused rels
authored
65 $stanzatype{$id} = $stanza_type;
3298917 Chris Mungall ws
authored
66 if (/id:\s*(.*)/) {
67 my $full = $1;
cef2b76 Chris Mungall -new
authored
68 $name{$id} = $full;
3298917 Chris Mungall ws
authored
69 $full =~ s/\s*\n.*//;
70 $full =~ s/\s*\!.*//;
71 $full =~ s/\S+\s*//;
72 if ($full) {
73 flag("ID: $id contains whitespace, followed by '$full'", $_);
74 }
75 }
b8b58f9 Chris Mungall new
authored
76 }
77 my @lines = split(/\n/,$_);
ceac129 Chris Mungall union
authored
78 foreach (@lines) {
a40fb72 Chris Mungall new
authored
79 if (/^(union_of|relationship):\s*(\S+)/) {
80 $referenced{$2} = 1;
ceac129 Chris Mungall union
authored
81 #print STDERR "U: $1\n";
82 }
a40fb72 Chris Mungall new
authored
83 elsif (/^(holds_over_chain|equivalent_to_chain):\s*(\S+)\s+(\S+)/) {
84 $referenced{$2} = 1;
85 $referenced{$3} = 1;
86 #print STDERR "U: $1\n";
87 }
88
89 if (/^relationship:\s*(\S+)/) {
90 $refrel{$1} .= "$_\n";
91 }
92
ceac129 Chris Mungall union
authored
93 }
b8b58f9 Chris Mungall new
authored
94 my @xps = grep {/^intersection_of:/} @lines;
95 if (@xps) {
96 if (@xps == 1) {
97 flag("single_xp: @xps",$_);
98 }
becda5c Chris Mungall check xp dupes
authored
99 my @xp_links = ();
b8b58f9 Chris Mungall new
authored
100 my @genii = ();
101 foreach (@xps) {
102 s/\s*\!.*//;
e8becbc Chris Mungall new
authored
103 s/\s*\{.*\}.*//;
b8b58f9 Chris Mungall new
authored
104 my @parts = split(' ',$_);
105 shift @parts;
becda5c Chris Mungall check xp dupes
authored
106 push(@xp_links,join(' ',@parts));
9b7bc35 Chris Mungall anon-check
authored
107 foreach (@parts) {
108 $referenced{$_} = 1;
109 }
b8b58f9 Chris Mungall new
authored
110 if (@parts == 1) {
111 push(@genii, $parts[0]);
112 }
a40fb72 Chris Mungall new
authored
113 else {
114 $refrel{$parts[0]} .= "$_\n";
115 }
b8b58f9 Chris Mungall new
authored
116 }
becda5c Chris Mungall check xp dupes
authored
117 my $xp_str = join('; ', sort {$a cmp $b} @xp_links);
118 if ($id_by_xp_h{$xp_str}) {
cef2b76 Chris Mungall -new
authored
119 if ($id eq $id_by_xp_h{$xp_str}) {
120 # already reported
121 }
122 else {
123 flag("duplicate xp def: '$xp_str' $id == $id_by_xp_h{$xp_str}", $_);
124 }
becda5c Chris Mungall check xp dupes
authored
125 }
126 $id_by_xp_h{$xp_str} = $id;
b8b58f9 Chris Mungall new
authored
127 if (@genii < 1) {
128 flag("single_genus: @genii", $_);
129 }
130 elsif (@genii > 1) {
423979c Chris Mungall fix
authored
131 flag("multiple_genus: @genii in", $_)
132 unless $stanza_type eq 'typedef';
b8b58f9 Chris Mungall new
authored
133 }
134 else {
135 if ($id eq $genii[0]) {
136 flag("id $id = genus", $_);
137 }
138 # ok
139 }
140 $n_xps++;
141 }
142 }
143 }
9b7bc35 Chris Mungall anon-check
authored
144
145 foreach (keys %done) {
146 if (/^_:/) {
becda5c Chris Mungall check xp dupes
authored
147 if (!$referenced{$_}) {
9b7bc35 Chris Mungall anon-check
authored
148 flag("unreferenced anon class", $_);
149 }
150 }
5122c0e Chris Mungall report unused rels
authored
151 if ($stanzatype{$_} eq 'typedef') {
a40fb72 Chris Mungall new
authored
152 $refrel{$_} = 0;
5122c0e Chris Mungall report unused rels
authored
153 if (!$referenced{$_}) {
154 flag("unreferenced relation", $_);
155 }
156
157 }
9b7bc35 Chris Mungall anon-check
authored
158 }
159
a40fb72 Chris Mungall new
authored
160 foreach my $k (%refrel) {
161 if ($refrel{$k}) {
162 flag("relation used but not defined","\'$k\' -- $refrel{$k}");
163 }
164 }
165
b8b58f9 Chris Mungall new
authored
166 print STDERR "n_xps: $n_xps\n";
167
168 exit(scalar(@flagged));
169
170 sub flag {
171 my $err = shift;
172 my $stanza = shift;
173 print STDERR "FLAG: $err\n$stanza\n\n";
174 push(@flagged, $err);
175 return;
176 }
177
178 sub scriptname {
179 my @p = split(/\//,$0);
180 pop @p;
181 }
182
183
184 sub usage {
185 my $sn = scriptname();
186
187 <<EOM;
188 $sn OBO-FILE [OBO-FILE2...]
189
190 performs syntactic check on intersection_of definitions
191
192 Example:
193
194 $sn mammalian_phenotype_xp.obo
195
196 EOM
197 }
198
Something went wrong with that request. Please try again.