Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

validation script for whitehead markers where there appears to be cru…

…ft giving 2 markers with same primers different RH positions in what should be the same map?

svn path=/bioperl-db/trunk/; revision=267
  • Loading branch information...
commit 7f0368dc6c779c1d78f6c921e038dfd79ad3e550 1 parent 108880d
@hyphaltip hyphaltip authored
Showing with 177 additions and 0 deletions.
  1. +177 −0 scripts/validate_whitehead_markers.pl
View
177 scripts/validate_whitehead_markers.pl
@@ -0,0 +1,177 @@
+#!/usr/local/bin/perl -w
+use strict;
+
+use Bio::DB::Map::SQL::DBAdaptor;
+use Bio::DB::Map::Marker;
+use Bio::DB::Map::Map;
+use Bio::SeqIO;
+use IO::File;
+use IO::String;
+use Carp;
+
+use LWP::UserAgent;
+use HTTP::Request::Common;
+
+BEGIN {
+ use vars qw($MARKERDATA $WHITEHEADSTS $WHITEHEADALIAS $WHITEHEADRHMAP
+ $WHITEHEADSTSFILE $WHITEHEADALIASFILE $WHITEHEADRHMAPDIR
+ @SOURCES $ONLINE $DEBUG);
+ $WHITEHEADSTS = 'http://carbon.wi.mit.edu:8000/ftp/pub/human_STS_releases/may97/5-97.STS.DATA.txt';
+ $WHITEHEADALIAS = 'http://carbon.wi.mit.edu:8000/ftp/pub/human_STS_releases/may97/5-97.ALIASES.txt';
+ $WHITEHEADRHMAP = 'http://carbon.wi.mit.edu:8000/ftp/pub/human_STS_releases/may97/rhmap';
+ $WHITEHEADALIASFILE = "/tmp/markers/whitehead/5-97.ALIASES.txt";
+ $WHITEHEADSTSFILE = '/tmp/markers/whitehead/5-97.STS.DATA.txt';
+ $WHITEHEADRHMAPDIR = '/tmp/markers/whitehead/rhmap';
+
+ $MARKERDATA = '/tmp/markers/whitehead';
+ $ONLINE = 0;
+ $DEBUG = 1;
+}
+
+# set proxy stuff here where applicable
+my $ua = new LWP::UserAgent();
+
+use Getopt::Long;
+
+my $host = 'localhost';
+my $port = 3306;
+my $dbname = 'markerdb';
+my $dbuser = 'root';
+my $dbpass = 'undef';
+my $module = 'Bio::DB::Map::SQL::DBAdaptor';
+
+&GetOptions(
+ 'online' => \$ONLINE,
+ 'debug' => \$DEBUG,
+ 'host:s' => \$host,
+ 'port:n' => \$port,
+ 'db|dbname:s' => \$dbname,
+ 'user|dbuser:s' => \$dbuser,
+ 'p|dbpass:s' => \$dbpass,
+ 'm|module:s' => \$module,
+ );
+
+my %props = ( '-host' => $host,
+ '-dbname' => $dbname,
+ '-user' => $dbuser);
+
+if( defined $dbpass ) {
+ $props{'-dbpass'} = $dbpass;
+}
+
+my %sts;
+my ($STS) = &get_whitehead_sts_data();
+
+exit if( ! $STS );
+# build STS mapping
+
+my %markers;
+my %primers;
+while(<$STS>) {
+ last if( /^SOURCE/ );
+ my ($locus,$assay,$chrom,$source,$genbank,$contig,$fwd,
+ $rev,$size) = split("\t", $_);
+ next if( ! defined $assay || ! defined $chrom );
+ $chrom =~ s/Chr(om)?\s*//i;
+ if( ! $fwd || $fwd eq 'NN' || $rev eq 'NN') {
+ # we could lookup these since we know genbank id often, but
+ # we're only talking 300 out of 67k
+
+ next;
+ }
+
+ $assay =~ s/(AFM\S+)P/$1/;
+ $assay =~ s/CHLC\.(\S+)(\.\d+)?/$1/;
+ push @{$primers{"$fwd\_$rev"}}, [ uc $assay, $locus, $chrom];
+}
+
+my $duplicate;
+my %dups;
+# read in the maps
+foreach my $chrom ( 1..23 ) {
+ my %info;
+ my $DATA = &get_data_for_chrom($chrom);
+ my (@requests,@requests_probes);
+ while(<$DATA>) {
+ my ($assay,$mappos,$lod) = split;
+ my $orig = $assay;
+ $assay =~ s/(AFM\S+)P/$1/;
+ $assay =~ s/CHLC\.(\S+)(\.\d+)?/$1/;
+ push @{ $markers{uc $assay}}, [ $orig, $mappos, $lod ];
+ }
+ close($DATA);
+}
+
+my $duplicates = 0;
+my $duplicate_positions = 0;
+my $missing_pos = 0;
+while( my($primers,$assayentry) = each %primers ) {
+ if( @$assayentry > 1 ) {
+ $duplicates++;
+ print "duplicate for $primers :\n";
+ foreach my $assayinfo ( @$assayentry ) {
+ my ($assay, $locus, $chrom) = @$assayinfo;
+ if( ! $markers{$assay} ) {
+ $missing_pos ++;
+ print STDERR "\t-assay $assay had markers but no map position\n";
+ next;
+ }
+
+ foreach my $position ( @{$markers{$assay}} ) {
+ print "\t $assay -> ", join(" ". @$position,
+ $locus, $chrom), "\n";
+ }
+ }
+ print "---\n";
+ }
+}
+print "Total duplicates (same primers different positions ) = $duplicates\n";
+print "number of missing map positions $missing_pos\n";
+sub get_data_for_chrom {
+ my ($chrom) = @_;
+ $chrom =~ s/23/X/;
+ my $fh;
+ if( $ONLINE ) {
+ my $url = sprintf('%s/Chr%s.rh', $WHITEHEADRHMAP,$chrom);
+ my $request = GET $url;
+ my $response = $ua->request($request);
+ if( $response->is_success ) {
+ $fh = new IO::String($response->content);
+ } else {
+ warn(sprintf"Error: Request was %s error was %s",
+ $request->as_string(),
+ $response->error_as_HTML);
+ $fh = undef;
+ }
+ } else {
+ my $file = sprintf('< %s/Chr%s.rh', $WHITEHEADRHMAPDIR,
+ $chrom);
+ $fh = new IO::File($file) or do {
+ warn("cannot open $file");
+ $fh = undef; };
+ }
+ return $fh;
+}
+
+sub get_whitehead_sts_data {
+ my ($STSFH);
+ if( $ONLINE ) {
+
+ my $request = GET $WHITEHEADSTS;
+ my $response = $ua->request($request);
+ if( $response->is_success ) {
+ $STSFH = new IO::String($response->content);
+ } else {
+ warn(sprintf"Error: Request was %s error was %s",
+ $request->as_string(),
+ $response->error_as_HTML);
+ $STSFH = undef;
+ }
+ } else {
+ $STSFH = new IO::File("< $WHITEHEADSTSFILE") or do {
+ warn("cannot open $WHITEHEADSTSFILE");
+ $STSFH = undef;
+ };
+ }
+ return ($STSFH);
+}
Please sign in to comment.
Something went wrong with that request. Please try again.