Permalink
Browse files

update20150727

  • Loading branch information...
1 parent c005305 commit 226774cd2d0a735afd6be75866479dd285884159 BGI2015 committed Jul 27, 2015
Showing with 27 additions and 71 deletions.
  1. +9 −2 RES-Scanner/RES-Scanner_identification/RES-Scanner_identification.pl
  2. +0 −6 RES-Scanner/RES-Scanner_identification/bin/Amino_acid_change.pl
  3. +9 −11 RES-Scanner/RES-Scanner_identification/bin/RNA_edit_site_table.pl
  4. +1 −1 RES-Scanner/RES-Scanner_identification/bin/bigTable.pl
  5. +1 −1 RES-Scanner/RES-Scanner_identification/bin/filter_abnormal_alignment_forBWA.pl
  6. +7 −7 RES-Scanner/RES-Scanner_identification/bin/filter_edit_site_table.pl
  7. +0 −5 RES-Scanner/RES-Scanner_identification/bin/findOverlap.pl
  8. +0 −5 RES-Scanner/RES-Scanner_identification/bin/findOverlap_sameStrand.pl
  9. +0 −33 RES-Scanner/RES-Scanner_identification/bin/gtf2gff.pl
  10. BIN testData/fastq/DNA/{DNA_Ae322_gynes/DNA_Ae322_gynes_1.fq.gz → DNA_gynes/DNA_gynes_1.fq.gz}
  11. BIN testData/fastq/DNA/{DNA_Ae322_gynes/DNA_Ae322_gynes_2.fq.gz → DNA_gynes/DNA_gynes_2.fq.gz}
  12. BIN ...e322_large_workers/DNA_Ae322_large_workers_1.fq.gz → DNA_large_workers/DNA_large_workers_1.fq.gz}
  13. BIN ...e322_large_workers/DNA_Ae322_large_workers_2.fq.gz → DNA_large_workers/DNA_large_workers_2.fq.gz}
  14. BIN ...e322_small_workers/DNA_Ae322_small_workers_1.fq.gz → DNA_small_workers/DNA_small_workers_1.fq.gz}
  15. BIN ...e322_small_workers/DNA_Ae322_small_workers_2.fq.gz → DNA_small_workers/DNA_small_workers_2.fq.gz}
  16. BIN ...q/RNA/{RNA_Ae322_gyne_heads/RNA_Ae322_gyne_heads_1.fq.gz → RNA_gyne_heads/RNA_gyne_heads_1.fq.gz}
  17. BIN ...q/RNA/{RNA_Ae322_gyne_heads/RNA_Ae322_gyne_heads_2.fq.gz → RNA_gyne_heads/RNA_gyne_heads_2.fq.gz}
  18. BIN ...ads/RNA_Ae322_large_worker_heads_1.fq.gz → RNA_large_worker_heads/RNA_large_worker_heads_1.fq.gz}
  19. BIN ...ads/RNA_Ae322_large_worker_heads_2.fq.gz → RNA_large_worker_heads/RNA_large_worker_heads_2.fq.gz}
  20. BIN ...ads/RNA_Ae322_small_worker_heads_1.fq.gz → RNA_small_worker_heads/RNA_small_worker_heads_1.fq.gz}
  21. BIN ...ads/RNA_Ae322_small_worker_heads_2.fq.gz → RNA_small_worker_heads/RNA_small_worker_heads_2.fq.gz}
@@ -44,6 +44,7 @@
"phred:s"=>\$phred,
"DNAdepth:s"=>\$DNAdepth,
"RNAdepth:s"=>\$RNAdepth,
+ "editDepth:s"=>\$readType,
"posdir:s"=>\$posdir,
"editLevel:s"=>\$editLevel,
"editPvalue:s"=>\$pvalue,
@@ -387,6 +388,10 @@
print STEP4RE "perl $Amino_acid_change $OutDir/RES_final_result.annotation $OutDir/codon.database > $OutDir/RES_final_result.annotation.temp\n";
print STEP4RE "mv -f $OutDir/RES_final_result.annotation.temp $OutDir/RES_final_result.annotation\n";
}
+ print STEP4RE "echo step4 work is completed! > $OutDir/step4.log\n";
+ print STEP4RE "echo step4 work is completed!\n";
+ print STEP4RE "echo File '$OutDir/RES_final_result.annotation' is the final result! >> $OutDir/step4.log\n";
+ print STEP4RE "echo File '$OutDir/RES_final_result.annotation' is the final result!\n";
}else{
if($method eq "Bayesian"){
print STEP4RE "perl $bigTable --config $OutDir/bigTable.config --genome $genome --phred $phred --qual_cutoff $q --method $method --HomoPrior $HomoPrior --rate $rate --ploidy $ploidy --DNAdepth $DNAdepth --RNAdepth $RNAdepth --Bayesian_P $Bayesian_Posterior_Probability --paralogous_D $paralogous_D --homopolymer $homopolymer > $OutDir/RES_final_result.txt\n";
@@ -401,10 +406,12 @@
print STEP4RE "perl $filter_sites_in_paralogous_regions $OutDir/RES_final_result.txt.bilateral_sequence.fa.psl $OutDir/RES_final_result.txt > $OutDir/RES_final_result.txt.filter\n";
print STEP4RE "mv -f $OutDir/RES_final_result.txt.filter $OutDir/RES_final_result.txt\n";
}
+ print STEP4RE "echo step4 work is completed! > $OutDir/step4.log\n";
+ print STEP4RE "echo step4 work is completed!\n";
+ print STEP4RE "echo File '$OutDir/RES_final_result.txt' is the final result! >> $OutDir/step4.log\n";
+ print STEP4RE "echo File '$OutDir/RES_final_result.txt' is the final result!\n";
}
-print STEP4RE "echo step4 work is completed! > $OutDir/step4.log\n";
-print STEP4RE "echo step4 work is completed!\n";
close STEP4RE;
print STDERR "####################################################################################################\n";
@@ -49,12 +49,6 @@
$CODE{$codon} = "*" unless (exists $CODE{$codon});
my $amino = $CODE{$codon};
my $C_a = join "$info[7]",$info[6],$amino;
-# my $key;
-# $key = 0 if ($info[6] eq $amino);
-# $key = 1 if ($info[6] ne $amino);
-# $key = -1 if ($amino eq "U" && $info[6] ne "U");
-# $key = 2 if ($amino ne "U" && $info[6] eq "U");
-# $result{$info[0]}{$info[1]}{$info[2]} = [$C_c,$C_a,$key];
$result{$info[0]}{$info[1]}{$info[2]} = [$C_c,$C_a];
}
close IN;
@@ -7,7 +7,6 @@
my ($RNA_singleBase,$DNA_singleBase,$DNAdepth,$phred,$qual_cutoff,$RNAdepth,$editLevel,$editDepth,$strand,$RNA_bam,$ploidy,$samtools,$genome,$method);
my $HomoPrior ||= 0.99;
-my $HetePrior ||= 0.01;
my $rate ||= 4; #the rate of transition over transversion
#$DNAdepth ||= 7;
#$RNAdepth ||= 3;
@@ -33,7 +32,6 @@
"ploidy:s"=>\$ploidy,
"samtools:s"=>\$samtools,
"HomoPrior:s"=>\$HomoPrior,
- "HetePrior:s"=>\$HetePrior,
"rate:s"=>\$rate,
"method:s"=>\$method,
);
@@ -51,9 +49,8 @@
--strand The strand of data, '+' or '-' for strand specific data, 'unknown' for non-strand specific data.
--ploidy The ploidy level, 1 for monoploid , 2 for diploid, 3 for triploid, 4 for tetraploid, and so on. [2].
--samtools The absolute path of pre-installed SAMtools package;
- --method Method for detecting SNPs.'Bayesian' or 'Binomial' or 'Stringent'. [Bayesian]
+ --method Method for detecting SNPs.'Bayesian' or 'Binomial' or 'Frequency'. [Bayesian]
--HomoPrior The prior probability of homozygous genomic positions. (force --method Bayesian) [0.99]
- --HetePrior The prior probability of heterozygous genomic positions. (force --method Bayesian) [0.01]
--rate The rate of transitions over transversions of the genome. (force --method Bayesian) [4]
@@ -64,6 +61,7 @@
exit;
}
+my $HetePrior = 1-$HomoPrior;
die "$RNA_singleBase does not exist!\n" unless -e $RNA_singleBase;
die "$DNA_singleBase doesn't exist!\n" unless -e $DNA_singleBase;
die "$samtools doesn't exist!\n" unless -e $samtools;
@@ -73,8 +71,8 @@
die "Input Error: --strand should be '+' or '-' or 'unknown'!\n";
}
-if($method ne "Bayesian" && $method ne "Binomial" && $method ne "Stringent"){
- die "Error: unknown options value '$method' for --Method [Bayesian|Binomial|Stringent]\n";
+if($method ne "Bayesian" && $method ne "Binomial" && $method ne "Frequency"){
+ die "Error: unknown options value '$method' for --Method [Bayesian|Binomial|Frequency]\n";
}elsif($method eq "Bayesian" && !$genome){
die "Error: options --genome is undefined in Bayesian mode\n";
}
@@ -194,8 +192,8 @@
}elsif($method eq "Binomial"){
@result = &SNPvalue_Binomial($dna_info,$ref_base,$ploidy);
$homo{$info[0]}{$info[1]} = [$coverage,$ref_base,$dna_info,$result[0]];
- }elsif($method eq "Stringent"){
- @result = &SNPvalue_Stringent($seq,$ref_base);
+ }elsif($method eq "Frequency"){
+ @result = &SNPvalue_Frequency($seq,$ref_base);
$homo{$info[0]}{$info[1]} = [$coverage,$ref_base,$dna_info,$result[0],$result[1]];
}else{
die "Error: unknown --Method $method";
@@ -452,7 +450,7 @@
($title1,$title2)=("Bayesian_Genotype(DNA)","Bayesian_Posterior_Probability(DNA)");
}elsif($method eq "Binomial"){
($title1,$title2)=("P_value(DNA_Heterozygosis)","FDR(DNA_Heterozygosis)");
-}elsif($method eq "Stringent"){
+}elsif($method eq "Frequency"){
($title1,$title2)=("Non_Ref_BaseCount(DNA)","Non_Ref_BaseRatio(DNA)");
}else{
die "Error: unknown --Method $method\n";
@@ -467,7 +465,7 @@
}
$time= &getTime();
-print STDERR "$time\tRaw candidate RNA editing sites is generated.!\n";
+print STDERR "$time\tRaw candidate RNA editing sites is generated!\n";
##
@@ -715,7 +713,7 @@ sub SNPvalue_Binomial{
}
-sub SNPvalue_Stringent{
+sub SNPvalue_Frequency{
my ($s,$r)=@_;
my @S=split //,$s;
my $totalDep=@S;
@@ -5,7 +5,6 @@
use Getopt::Long;
my ($config,$phred,$qual_cutoff,$genome,$intron,$homopolymer,$paralogous_D);
my $HomoPrior ||= 0.99;
-my $HetePrior = 1-$HomoPrior;
my $rate ||= 2; #the rate of transition over transversion
my $method ||= "Bayesian";
my $ploidy ||= 2;
@@ -74,6 +73,7 @@
}elsif($method eq "Bayesian" && !$genome){
die "Error: options --genome is undefined in Bayesian mode\n";
}
+my $HetePrior = 1-$HomoPrior;
my %hash;
my @order;
open IN,"$config" or die $!;
@@ -57,7 +57,7 @@
system "mv $outdir/temp.$n.$filename $outBamfile";
}else{
# print STDERR "Log: There is no ambiguous mapping between BWA and BLAT!\n";
- system "ln -s $inBamfile $outBamfile";
+ system "ln -sf $inBamfile $outBamfile";
}
@@ -29,8 +29,8 @@
"Bayesian_P:s"=>\$Bayesian_Posterior_Probability,
"Binomial_P:s"=>\$P_value_DNA_Heterozygosis,
"Binomial_FDR:s"=>\$FDR_DNA_Heterozygosis,
- "Stringent_N:s"=>\$Non_Ref_BaseCount,
- "Stringent_R:s"=>\$Non_Ref_BaseRatio,
+ "Frequency_N:s"=>\$Non_Ref_BaseCount,
+ "Frequency_R:s"=>\$Non_Ref_BaseRatio,
"editPvalue:s"=>\$p,
"help"=>\$help,
);
@@ -42,18 +42,18 @@
--DNAdepth The homozygous DNA depth [optional(default:7)]
--RNAdepth The minimum cutoff for RNA reads coverage. [3]
--editLevel The minimum cutoff for editing level. [0.05]
- --method Method for detecting SNPs.'Bayesian' or 'Binomial' or 'Stringent'. [Bayesian]
+ --method Method for detecting SNPs.'Bayesian' or 'Binomial' or 'Frequency'. [Bayesian]
--Bayesian_P The minimun Bayesian Posterior Probability cutoff for corresponding genotype.(force --method Bayesian) [0.95]
--Binomial_P The maximun P value cutoff of Binomial test for DNA Heterozygosis. (force --method Binomial) [0.05]
--Binomial_FDR The maximun FDR cutoff of Binomial test for DNA Heterozygosis. (force --method Binomial) [0.05]
- --Stringent_N The maximun non-refference base count cutoff. (force --method Stringent) [0]
- --Stringent_R The maximun non-refference base ratio cutoff. (force --method Stringent) [0]
+ --Frequency_N The maximun non-refference base count cutoff. (force --method Frequency) [0]
+ --Frequency_R The maximun non-refference base ratio cutoff. (force --method Frequency) [0]
--readType The least number of RNA reads that were mapped to overlapping but not identical positions in the reference genome
that supporting an RNA-editing site in a given sample. [3]
--goodRead The least number of RNA reads which in the middle of their length supporting an editing site
(that is, from positions 26~75 of the 100-bp read), to avoid potential false positives resulting
from mis-mapping of reads at splice junctions. [1]
- --editPvalue Cutoff of FDR RNA editing. [0.05]
+ --editPvalue Cutoff of FDR RNA editing. [0.05]
--help Show the help information.
@@ -85,7 +85,7 @@
}elsif($method eq "Binomial"){
next unless $label1<$P_value_DNA_Heterozygosis;
next unless $label2<$FDR_DNA_Heterozygosis;
- }elsif($method eq "Stringent"){
+ }elsif($method eq "Frequency"){
next if $label1>$Non_Ref_BaseCount;
next if $label2>$Non_Ref_BaseRatio;
}else{
@@ -20,11 +20,6 @@
Column 8: the first subject block ame.Group4.17 overlapped with ame.Group1.64, + is the strand of ame.Group4.17, 326 is its own size, 209 is the overlapped size;
Column 9: the second subject block ame.Group16.8 overlapped with ame.Group1.64, - is the strand of ame.Group16.8, numbers has the same meaning as last column;
-Version:
-
- Author: jinlijun, jinlijun\@genomics.org.cn
- Version: 1.0, Date: 2011-05-18
-
Usage
perl findOverlap.pl <ref> [pre]
@@ -20,11 +20,6 @@
Column 8: the first subject block ame.Group4.17 overlapped with ame.Group1.64, + is the strand of ame.Group4.17, 326 is its own size, 209 is the overlapped size;
Column 9: the second subject block ame.Group16.8 overlapped with ame.Group1.64, + is the strand of ame.Group16.8, numbers has the same meaning as last column;
-Version:
-
- Author: jinlijun, jinlijun\@genomics.org.cn
- Version: 1.0, Date: 2011-05-18
-
Usage
perl findOverlap.pl <ref> [pre]
@@ -1,33 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-die "Usage: <gtf file>\n" unless @ARGV == 1;
-
-my %cdsPos;
-if ($ARGV[0] =~ /\.gz$/) {
- open IN, "gunzip -c $ARGV[0] | ";
-} else {
- open IN, $ARGV[0];
-}
-while (<IN>) {
- chomp;
- next if /^#/;
- my @info = split /\t/;
- if ($info[1] !~ "pseudogene" && $info[2] eq "CDS") {
- die "Warning: Unrecognized gff format, 'transcript_id' is missing." unless $info[-1] =~ /transcript_id\s+"(\S+)\s?";/;
- my $transcript_id = $1;
- my ($geneID)= $info[-1] =~ /gene_id\s+"(\S+)\s?";/;
- ($info[3], $info[4]) = sort {$a <=> $b}($info[3], $info[4]);
- push @{$cdsPos{$transcript_id}}, [$info[3], $info[4], $info[0], $info[6], $info[7], $geneID];
- }
-}
-close IN;
-
-foreach my $transcript (keys %cdsPos) {
- @{$cdsPos{$transcript}} = sort {$a->[0] <=> $b->[0]} @{$cdsPos{$transcript}};
- my ($transcript_bg, $transcript_ed, $chr, $strand, $geneID) = ($cdsPos{$transcript}->[0]->[0], $cdsPos{$transcript}->[-1]->[1], $cdsPos{$transcript}->[0]->[2], $cdsPos{$transcript}->[0]->[3],$cdsPos{$transcript}->[0]->[5]);
- print "$chr\tensembl\tmRNA\t$transcript_bg\t$transcript_ed\t.\t$strand\t.\tID=$transcript;GeneID=$geneID\n";
- foreach my $p (@{$cdsPos{$transcript}}) {
- my ($cds_bg, $cds_ed, $chr, $strand, $phase) = @$p;
- print "$chr\tensembl\tCDS\t$cds_bg\t$cds_ed\t.\t$strand\t$phase\tParent=$transcript;GeneID=$geneID\n";
- }
-}

0 comments on commit 226774c

Please sign in to comment.