# BioPerl

## Manipulation de séquences

### Création d'un objet

In [24]:
use Bio::Seq;
my $seqobj = Bio::Seq->new(
    -seq => "ACTGTGTGTCC",
    -id => "Chlorella sorokiniana",
    -accession_number => "CAA41635"
);

Bio::Seq=HASH(0x7fd623b5e2c0)


### Méthodes

#### Manipulation de séquences

In [25]:
$seq = $seqobj->seq();
print "$seq\n";

ACTGTGTGTCC


1


In [26]:
$length = $seqobj->length();
print "$length\n";

11


1


In [27]:
$subseq = $seqobj->subseq(int($length/2), $length);
print "$subseq\n";

TGTGTCC


1


In [28]:
$new_seq = $seq.$subseq;
if($seqobj->validate_seq($new_seq)){
    $seqobj->seq($new_seq);
}
print $seqobj->seq()," ", $seqobj->length(), "\n"

ACTGTGTGTCCTGTGTCC 18


1


#### Traduction 

In [41]:
$translated_obj = $seq;
if( $seqobj->alphabet() == 'dna'){
    $translated_obj = $seqobj->translate();
}
print $translated_obj->seq(),"\n";

TVCPVS


1


## Manipulation de fichier de séquences
### Ecrire des séquences dans un fichier

In [2]:
use Bio::SeqIO;
use Bio::Seq;

my $seqio_obj = Bio::SeqIO->new(-file => '>sequence.fasta', 
    -format => 'fasta' );

my $seqobj = Bio::Seq->new(
    -seq => "ACTGTGTGTCC",
    -id => "Chlorella sorokiniana"
);

$seqio_obj->write_seq($seqobj);

my $seqobj = Bio::Seq->new(
    -seq => "ACTGTGTGTCCTGTGTCC",
    -id => "Modified Chlorella sorokiniana"
);

$seqio_obj->write_seq($seqobj);

1


Warning: 
--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Chlorella sorokiniana]
---------------------------------------------------


--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Chlorella sorokiniana]
---------------------------------------------------


--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Modified Chlorella sorokiniana]
---------------------------------------------------


--------------------- WARNING ---------------------
MSG: No whitespace allowed in FASTA ID [Modified Chlorella sorokiniana]
---------------------------------------------------


### Lecture de séquences d'un fichier

In [3]:
use Bio::SeqIO;

$seqio_obj = Bio::SeqIO->new(-file => "sequence.fasta", 
    -format => "fasta" );
    
while ($seq_obj = $seqio_obj->next_seq){
    print $seq_obj->seq,"\n";
}

ACTGTGTGTCC
ACTGTGTGTCCTGTGTCC


## Accès aux bases de données
### Récupération d'une séquence dans une base de données

In [6]:
use Bio::DB::GenBank;
use Bio::Seq;

$db_obj = Bio::DB::GenBank->new;
 
$seq_obj = $db_obj->get_Seq_by_id(2);

print $seq_obj->display_id(),"\n";

A00002


1


### Récupération de plusieurs séquences

In [8]:
use Bio::DB::GenBank;
use Bio::DB::Query::GenBank;
 
$query = "Arabidopsis[ORGN] AND topoisomerase[TITL] and 0:3000[SLEN]";
$query_obj = Bio::DB::Query::GenBank->new(-db    => 'nucleotide',  -query => $query );
 
$gb_obj = Bio::DB::GenBank->new;
 
$stream_obj = $gb_obj->get_Stream_by_query($query_obj);
 
while ($seq_obj = $stream_obj->next_seq) {     
    print $seq_obj->display_id, "\t", $seq_obj->length, "\n";
}

NM_001203615	2099
NM_001125591	549
NM_128760	2775
NM_120360	1655
NM_001036386	2734
NM_112969	2212
NM_126949	704
ATHTOP2A	410
XM_002881164	2698
AF323679	1373
DQ446392	1152
AY142667	2140
AY133820	2113
AY090993	1830
AY080864	928
AY039947	2493
AY034972	2586
U12285	591
U12284	590
AK221851	1078
AK221766	1452
AK221174	2785
AJ297843	2212
AJ297842	1655
AJ251990	1655
AJ251989	1640
AK175923	2744
AK175789	2744


## Parser des rapports de recherche
### Parcours d'un fichier issu d'une requête Blast

In [None]:
use Bio::SearchIO;

my $in = new Bio::SearchIO(
    -format => "blast",
    -file => "report.bls");
while(my $result = $in->next_result){
    while(my $hit = $result->next_hit){
        while(my $hsp = $hit->next_hsp){
            print "Query=", $result->query_name,
                " Hit=", $hit->name,
                " Length=", $hsp->length('total'),
                " Percent_id=", $hsp->percent_identity,
                "\n";
        }
    }
}