Skip to content

Commit

Permalink
v2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
arthuryxt committed May 13, 2016
1 parent 1b6cc3a commit 0463c0e
Show file tree
Hide file tree
Showing 13 changed files with 289 additions and 166 deletions.
12 changes: 8 additions & 4 deletions ACF_MAKE.pl
Expand Up @@ -192,12 +192,16 @@
$command="perl ".$SPEC{"ACF_folder"}."/ACF_trans_splice_step2.pl unmap.trans.splicing ".$SPEC{"BWA_genome_folder"}."/ unmap.trans.splicing ".$SPEC{"Seq_len"}." $ts_minSSSum ".$SPEC{"Agtf"};
print OUT $command,"\n";
if (($do_blat_search eq "yes") and (-e $SPEC{"BWA_genome_Index"})) {
my $newlen=int(1.3*($SPEC{"Seq_len"}));
#increasing the -minScore parameter value beyond one-half of the query size has no further effect
#my $newlen=int(1.3*($SPEC{"Seq_len"}));
my $newlen=$SPEC{"Seq_len"};
$command=$blat_path." -minScore=".$newlen." -noHead ".$SPEC{"BWA_genome_Index"}." unmap.trans.splicing.tsloci.fa unmap.trans.splicing.tsloci.psl";
print OUT $command,"\n";
$command="perl -ne \'chomp; my \@a=split(\"\\t\",\$_); if(abs(\$a[11]-\$a[12]) > $newlen){print \$a[9],\"\\n\"\;}\' unmap.trans.splicing.tsloci.psl \> unmap.trans.splicing.tsloci.psl.badid";
print OUT $command,"\n";
$command="sort \-u unmap.trans.splicing.tsloci.psl.badid \> unmap.trans.splicing.tsloci.psl.badids";
#$command="perl -ne \'chomp; my \@a=split(\"\\t\",\$_); if(abs(\$a[11]-\$a[12]) > $newlen){print \$a[9],\"\\n\"\;}\' unmap.trans.splicing.tsloci.psl \> unmap.trans.splicing.tsloci.psl.badid";
#print OUT $command,"\n";
#$command="sort \-u unmap.trans.splicing.tsloci.psl.badid \> unmap.trans.splicing.tsloci.psl.badids";
#print OUT $command,"\n";
$command="perl ".$SPEC{"ACF_folder"}."/get_linear_id_from_psl.pl unmap.trans.splicing.tsloci.psl unmap.trans.splicing.tsloci.psl.badids";
print OUT $command,"\n";
$command="perl ".$SPEC{"ACF_folder"}."/get_selected_from_pool_singleline.pl unmap.trans.splicing.tsloci.psl.badids unmap.trans.splicing.tsloci unmap.trans.splicing.tsloci.good 0 -1";
print OUT $command,"\n";
Expand Down
21 changes: 11 additions & 10 deletions ACF_Step5m2.pl
Expand Up @@ -28,9 +28,10 @@
#}
}
if ($info ne "") {
$OK{$a[0]}=1000;
$OK{$a[0]}=0;
$OKinfo{$a[0]}=$a[0]."\t".int($ASsum/$ASc)."\t".$info;
}
else { $OK{$a[0]}=0; }
}
close IN0;

Expand Down Expand Up @@ -64,11 +65,11 @@
chomp;
my @a=split("\t",$_);
my @b=split(/\_\_\_/,$a[2]);
#if ((exists $OK{$a[0]}) and ($OK{$a[0]} eq 1000)) {
if ($OK{$a[0]} eq 0) {
if (exists $uniq3{$b[0]}) { $uniq3{$b[0]}=$uniq3{$b[0]}."\t".$a[0]; }
else { $uniq3{$b[0]}=$a[0]; }
$OK{$a[0]}++;
#}
}
}
close IN3;
open IN31,$filein3.".refFlat";
Expand All @@ -81,18 +82,18 @@

my %Anno;
open IN4, $filein4;
my $header=<IN4>;
chomp $header;
my @Header=split("\t",$header);
my $tmpid=$Header[0];
$Header[0]=$Header[0]."\tGname";
$Anno{$tmpid}=join("\t",@Header);
while(<IN4>) {
chomp;
my @a=split("\t",$_);
if ($a[0] eq "newid") { my $tmpid=$a[0]; $a[0]=$a[0]."\tGname"; $Anno{$tmpid}=join("\t",@a);}
#elsif ((exists $OK{$a[0]}) and ($OK{$a[0]} > 1000)) {
else{
$Anno{$a[0]}=join("\t",@a);
}
$Anno{$a[0]}=join("\t",@a);
}

my $header=$Anno{"newid"};
my @Header=split("\t",$header);
my $Nr=scalar(@Header);
my $template=0;
for(my $i=2; $i<$Nr; $i++) {$template=$template."\t0";}
Expand Down
80 changes: 2 additions & 78 deletions ACF_fusion_circRNAs.pl
Expand Up @@ -77,47 +77,9 @@
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
elsif(($b[1] < $b2[4]) and ($b[4] > $b2[1])) {
if ($print_exp > 0) {
if ((exists $EXPR{$a[0]}) and (exists $EXPR{$a2[0]})){
$cnt++;
my @exp1=split("\t",$EXPR{$a[0]});
my @exp2=split("\t",$EXPR{$a2[0]});
my $expr=$exp1[2] > $exp2[2] ? $exp2[2] : $exp1[2];
for (my $i=3; $i<scalar(@exp1); $i++) {
$expr=$expr."\t".($exp1[$i] > $exp2[$i] ? $exp2[$i] : $exp1[$i]);
}
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\t",$expr,"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
else{
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
}
elsif (($b[2] eq "-") and ($b[5] eq "-") ){
if (($b[1] > $b2[4]) and ($b[4] < $b2[1])) {
if ($print_exp > 0) {
if ((exists $EXPR{$a[0]}) and (exists $EXPR{$a2[0]})){
$cnt++;
my @exp1=split("\t",$EXPR{$a[0]});
my @exp2=split("\t",$EXPR{$a2[0]});
my $expr=$exp1[2] > $exp2[2] ? $exp2[2] : $exp1[2];
for (my $i=3; $i<scalar(@exp1); $i++) {
$expr=$expr."\t".($exp1[$i] > $exp2[$i] ? $exp2[$i] : $exp1[$i]);
}
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\t",$expr,"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
else{
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
elsif(($b[1] < $b2[4]) and ($b[4] > $b2[1])) {
if(($b[1] < $b2[4]) and ($b[4] > $b2[1])) {
if ($print_exp > 0) {
if ((exists $EXPR{$a[0]}) and (exists $EXPR{$a2[0]})){
$cnt++;
Expand Down Expand Up @@ -157,47 +119,9 @@
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
elsif(($b[1] > $b2[4]) and ($b[4] > $b2[1])) {
if ($print_exp > 0) {
if ((exists $EXPR{$a[0]}) and (exists $EXPR{$a2[0]})){
$cnt++;
my @exp1=split("\t",$EXPR{$a[0]});
my @exp2=split("\t",$EXPR{$a2[0]});
my $expr=$exp1[2] > $exp2[2] ? $exp2[2] : $exp1[2];
for (my $i=3; $i<scalar(@exp1); $i++) {
$expr=$expr."\t".($exp1[$i] > $exp2[$i] ? $exp2[$i] : $exp1[$i]);
}
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\t",$expr,"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
else{
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
}
elsif (($b[2] eq "+") and ($b[5] eq "-") ){
if (($b[1] < $b2[4]) and ($b[4] < $b2[1])) {
if ($print_exp > 0) {
if ((exists $EXPR{$a[0]}) and (exists $EXPR{$a2[0]})){
$cnt++;
my @exp1=split("\t",$EXPR{$a[0]});
my @exp2=split("\t",$EXPR{$a2[0]});
my $expr=$exp1[2] > $exp2[2] ? $exp2[2] : $exp1[2];
for (my $i=3; $i<scalar(@exp1); $i++) {
$expr=$expr."\t".($exp1[$i] > $exp2[$i] ? $exp2[$i] : $exp1[$i]);
}
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\t",$expr,"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
else{
print OUT "fusion-circ-".$cnt,"\t",$a[2]."|".$a[3],"\t",$a[0],"\t",$a2[0],"\n";
if ($debug > 0) {print OUT join("\t",@a),"\n",join("\t",@a2),"\n";}
}
}
elsif(($b[1] > $b2[4]) and ($b[4] > $b2[1])) {
if(($b[1] > $b2[4]) and ($b[4] > $b2[1])) {
if ($print_exp > 0) {
if ((exists $EXPR{$a[0]}) and (exists $EXPR{$a2[0]})){
$cnt++;
Expand Down
16 changes: 9 additions & 7 deletions ACF_trans_splice_step3.pl
Expand Up @@ -60,22 +60,24 @@

my %Anno;
open IN4, $filein4;
my $header=<IN4>;
chomp $header;
my @Header=split("\t",$header);
my $tmpid=$Header[0];
$Header[0]=$Header[0]."\tGname";
$Anno{$tmpid}=join("\t",@Header);
while(<IN4>) {
chomp;
my @a=split("\t",$_);
if ($a[0] eq "newid") { my $tmpid=$a[0]; $a[0]=$a[0]."\tGname"; $Anno{$tmpid}=join("\t",@a);}
#elsif ((exists $OK{$a[0]}) and ($OK{$a[0]} > 1000)) {
else{
$Anno{$a[0]}=join("\t",@a);
}
$Anno{$a[0]}=join("\t",@a);
}

my $header=$Anno{"newid"};
my @Header=split("\t",$header);
my $Nr=scalar(@Header);
my $template=0;
for(my $i=2; $i<$Nr; $i++) {$template=$template."\t0";}



open OUT1,">".$filein1.".expr";
open OUT11,">".$filein1.".newid";
print OUT1 $header,"\n";
Expand Down
27 changes: 16 additions & 11 deletions README.md
Expand Up @@ -3,12 +3,13 @@ Accurate CircRNA Finder Suite. Discovering circRNAs from RNA-Seq data.


# Overview
CircRNAs are generated through splicing, or to be precise, back-splicing where the downstream splice donor attact an upstream splice acceptor. Identifying the exact site of back-splice lies in the heart of circRNA discovery.
CircRNAs are generated through splicing, or to be precise, back-splicing where the downstream splice donor attacks an upstream splice acceptor. Identifying the exact site of back-splice lies in the heart of circRNA discovery.

ACFS first examines and pinpoints the back-splice site from RNA-Seq alignment using a maximal entropy model. The expression of circRNAs is estimated from a second round of alignment to the inferred pseudo circular sequences.

No prior knowledge of gene annotation is needed for ACFS, but reading the coordinates is far less interesting than reading gene names, so circRNAs are annotated using the gene annotation if provided.

ACFS is designed for Single-end RNA-Seq reads. Seeing is believing, we would like to see read directly spanning the back-splice sites. Paired-end data is also supported, albeit with lower sensitivity (if neither of the read ends crosses the back-splice, but the read-pair does).

# Change Log
- Update on 2016-05-06 :
Expand Down Expand Up @@ -97,6 +98,11 @@ Simply unpack the ACFS package.
- If the reads are actually stransless or pair-ended, please run in parallel original reads and reverse-complemented reads.
- No pair-end information is used, as the exact junction-site must be supported by a single read. Seeing is believing.

6. For Paired-end data, it is highly recommended to align the reads to genome+transcriptome first (e.g. using Tophat2), and extract the unmapped read (read pairs) using the following script (the fasta header line is also modified)
```
perl convert_unmapped_SAM_to_fa_for_acfs.pl <output_file_name> <unmapped.sam> <sample_id>
```


# Parameters
There are nine mandatory parameters to run ACFS in a basic mode, searching for fusion-circRNAs need to be enabled. Modify the config file "SPEC_example.txt" accordingly.
Expand Down Expand Up @@ -144,14 +150,13 @@ Optional parameters, the values in below are set as default:

# run ACFS
1. make Pipeline Bash file
```
perl ACF_MAKE.pl SPEC_example.txt BASH_example.sh
```

```
perl ACF_MAKE.pl SPEC_example.txt BASH_example.sh
```
2. find circles
```
bash BASH_example.sh
```
```
bash BASH_example.sh
```


# Results
Expand Down Expand Up @@ -195,11 +200,11 @@ bash BASH_example.sh
perl -e 'open IN,"UNMAP_expr1"; my $line=<IN>; print $line; while(<IN>){chomp; my @a=split("\t",$_); print join("\t",@a),"\n"; $a[0]="rc".$a[0]; print join("\t",@a),"\n";}' > UNMAP_expr
```
1C. **_for single-end and paired-end RNA-Seq unmapped reads_**.
First align raw RNA-Seq reads to some reference with tools (such as Bowtie/BWA/STAR), obtain a SAM file containing all unmapped reads. For example if you choose Tophat, please convert <unmapped.bam> to <unmapped.sam>. Then run the following command:
First align raw RNA-Seq reads to some reference with tools (such as Bowtie/BWA/STAR/Tophat2), obtain a SAM file <unmapped.sam> containing all unmapped reads. For example if you choose Tophat, please convert <unmapped.bam> to <unmapped.sam>. Then run the following command:
```
perl convert_unmapped_SAM_to_fa_for_acfs.pl unmapped.sam newName newNAME.fa
perl convert_unmapped_SAM_to_fa_for_acfs.pl <newNAME.fa> <unmapped.sam> <sample_id>
```
where <newName> is the new sample ID that is used to change the fasta/fastq header to ACF style. It could be "Ctrl12" or "TreatA.rep2", your choice. Just remember, there should be NO underscore as "_" in it.
where <sample_id> is the new sample ID that is used to change the fasta/fastq header to ACF style. It could be "Ctrl12" or "TreatA.rep2", your choice. Just remember, there should be NO underscore as "_" in it.
<newNAME.fa> is the name of the converted file. Name it whatever you like.
```
perl Truseq_merge_unique_fa.pl UNMAP newid newNAME.fa
Expand Down

0 comments on commit 0463c0e

Please sign in to comment.