Skip to content
This repository
Browse code

pull introns out

  • Loading branch information...
commit aeac9a09d323dbf3bed0ff0922c2d0b408ca18a2 1 parent 70209f1
Jason Stajich authored

Showing 1 changed file with 114 additions and 0 deletions. Show diff stats Hide diff stats

  1. +114 0 seqfeature/extract_introns_gff3.pl
114 seqfeature/extract_introns_gff3.pl
... ... @@ -0,0 +1,114 @@
  1 +#!/usr/bin/perl -w
  2 +use strict;
  3 +use Bio::SeqIO;
  4 +use Bio::DB::SeqFeature::Store;
  5 +use Getopt::Long;
  6 +use Env qw(HOME);
  7 +my ($user,$pass,$dbname,$host);
  8 +$host ='localhost';
  9 +my $prefix;
  10 +my $debug = 0;
  11 +my $src = 'gene';
  12 +my $output;
  13 +
  14 +GetOptions(
  15 + 'v|verbose!' => \$debug,
  16 + 'u|user:s' => \$user,
  17 + 'p|pass:s' => \$pass,
  18 + 'host:s' => \$host,
  19 + 'db|dbname:s' => \$dbname,
  20 +
  21 + 's|src:s' => \$src,
  22 + 'o|output:s' => \$output,
  23 + );
  24 +
  25 +unless( defined $dbname ) {
  26 + die("no dbname provided\n");
  27 +}
  28 +
  29 +($user,$pass) = &read_cnf($user,$pass) unless $pass && $user;
  30 +my $dsn = sprintf('dbi:mysql:database=%s;host=%s',$dbname,$host);
  31 +my $dbh = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::mysql',
  32 + -dsn => $dsn,
  33 + -user => $user,
  34 + -password => $pass,
  35 + );
  36 +
  37 +$output ||= sprintf("%s-%s.intron.fa",$dbname,$src);
  38 +my $ofh = Bio::SeqIO->new(-format => 'fasta',
  39 + -file => ">$output");
  40 +my @names;
  41 +my $iter = $dbh->get_seq_stream(-type => 'scaffold:chromosome');
  42 +while( my $segment = $iter->next_seq ) {
  43 +# my $segment = $dbh->segment($seg->seq_id);
  44 + my $count = 0;
  45 + warn("seg is $segment\n");
  46 + # FIX ME HERE
  47 + for my $gene ($segment->get_SeqFeatures(-type => 'gene') ) { #(-type => "$src:") ) {
  48 + warn("gene is $gene\n");
  49 + my $gname = $gene->name;
  50 + my @mrna = $gene->get_SeqFeatures('mRNA');
  51 + for my $mRNA ( @mrna ) {
  52 + my ($id) = $mRNA->load_id;
  53 + my @exons = $mRNA->get_SeqFeatures('cds');
  54 + my $lastexon;
  55 + my $i = 1;
  56 + for my $exon ( sort { ($a->start*$a->strand) <=>
  57 + ($b->end*$b->strand) }
  58 + @exons ) {
  59 + if( $lastexon ) {
  60 + my ($seqid,$s,$e) = ($exon->seq_id);
  61 + my $loc;
  62 + if( $exon->strand > 0 ) {
  63 + ($s,$e) = ($lastexon->end+1, $exon->start-1);
  64 + $loc = Bio::Location::Simple->new(-start => $s,
  65 + -end => $e);
  66 + } else {
  67 + ($s,$e) = ($lastexon->start-1,$exon->end+1);
  68 + $loc = Bio::Location::Simple->new(-start => $e,
  69 + -end => $s,
  70 + -strand => $exon->strand
  71 + );
  72 + }
  73 + my $iseq = $dbh->segment($seqid,$s,$e);
  74 + if( ! $iseq ) {
  75 + warn("cannot get seq for $seqid\n");
  76 + } else {
  77 + $iseq = $iseq->seq->seq;
  78 + }
  79 +
  80 + my $intron = Bio::PrimarySeq->new
  81 + (-seq => $iseq,
  82 + -id => "$prefix:$id.i".$i++,
  83 + -desc => sprintf("%s %s:%s",
  84 + $gname,
  85 + $seqid,$loc->to_FTstring));
  86 + if( $intron->length < 4) {
  87 + warn("intron ",$intron->length, " too short for: ",
  88 + $intron->display_id,"\n");
  89 + } else {
  90 + $ofh->write_seq($intron);
  91 + }
  92 + }
  93 + $lastexon = $exon;
  94 + last if $debug && $count++ > 10;
  95 + }
  96 + }
  97 + }
  98 + last;
  99 +}
  100 +sub read_cnf {
  101 + my ($user,$pass) = @_;
  102 + if( -f "$HOME/.my.cnf") {
  103 + open(IN,"$HOME/.my.cnf");
  104 + while(<IN>) {
  105 + if(/user(name)?\s*=\s*(\S+)/ ) {
  106 + $user = $2;
  107 + } elsif( /pass(word)\s*=\s*(\S+)/ ) {
  108 + $pass = $2;
  109 + }
  110 + }
  111 + close(IN);
  112 + }
  113 + return ($user,$pass);
  114 +}

0 comments on commit aeac9a0

Please sign in to comment.
Something went wrong with that request. Please try again.