Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

commit

  • Loading branch information...
commit 6a6eb836093a944e0746835fb4ceb56951d2b215 1 parent d204cc2
@albertwcheng authored
View
49 BatchKmerFinder.sh
@@ -48,13 +48,19 @@ for((i=0;i<${#concs[@]};i++)); do
done
+rmrie.sh $outDir/tmp/union_kmers.txt
+
jfiles=""
for((i=0;i<${#concs[@]};i++)); do
conc=${concs[$i]}
jfiles="$jfiles $outDir/$conc/kmers.txt"
-
+ cuta.py -f.kmer $outDir/$conc/kmers.txt >> $outDir/tmp/union_kmers.txt
done
+uniqa.py $outDir/tmp/union_kmers.txt > $outDir/tmp/uniq_union_kmers.txt
+jfiles="$outDir/tmp/uniq_union_kmers.txt $jfiles"
+
+
#kmer enrichment control_count experim_count
echo $jfiles
@@ -87,4 +93,45 @@ paste $outDir/tmp/concheaderColumnar.txt $outDir/tmp/t1.txt.t.f2 > $outDir/tmp/
matrixTranspose.py $outDir/tmp/t1.txt.t.swapped > $outDir/kmers.merged.$columnSelector.txt
rm $outDir/kmers.merged.txt
+
+jfiles=""
+for((i=0;i<${#concs[@]};i++)); do
+ conc=${concs[$i]}
+ jfiles="$jfiles $outDir/$conc/kmers_unsub.txt"
+
+done
+
+#kmer enrichment control_count experim_count
+
+echo $jfiles
+
+echo "kmer" > $outDir/tmp/concheaderColumnar.txt
+echo ${concs[@]} | tr " " "\n" >> $outDir/tmp/concheaderColumnar.txt
+
+rmrie.sh $outDir/kmers_unsub.merged.txt
+eval "multijoinu.sh -fNA $outDir/kmers_unsub.merged.txt $jfiles" #joint
+
+columnSelector="enrichment"
+cuta.py -f.kmer,@$columnSelector $outDir/kmers_unsub.merged.txt > $outDir/tmp/t1.txt
+matrixTranspose.py $outDir/tmp/t1.txt > $outDir/tmp/t1.txt.t
+cuta.py -f2-_1 $outDir/tmp/t1.txt.t > $outDir/tmp/t1.txt.t.f2
+paste $outDir/tmp/concheaderColumnar.txt $outDir/tmp/t1.txt.t.f2 > $outDir/tmp/t1.txt.t.swapped
+matrixTranspose.py $outDir/tmp/t1.txt.t.swapped > $outDir/kmers_unsub.merged.$columnSelector.txt
+
+columnSelector="control_count"
+cuta.py -f.kmer,@$columnSelector $outDir/kmers_unsub.merged.txt > $outDir/tmp/t1.txt
+matrixTranspose.py $outDir/tmp/t1.txt > $outDir/tmp/t1.txt.t
+cuta.py -f2-_1 $outDir/tmp/t1.txt.t > $outDir/tmp/t1.txt.t.f2
+paste $outDir/tmp/concheaderColumnar.txt $outDir/tmp/t1.txt.t.f2 > $outDir/tmp/t1.txt.t.swapped
+matrixTranspose.py $outDir/tmp/t1.txt.t.swapped > $outDir/kmers_unsub.merged.$columnSelector.txt
+
+columnSelector="experim_count"
+cuta.py -f.kmer,@$columnSelector $outDir/kmers_unsub.merged.txt > $outDir/tmp/t1.txt
+matrixTranspose.py $outDir/tmp/t1.txt > $outDir/tmp/t1.txt.t
+cuta.py -f2-_1 $outDir/tmp/t1.txt.t > $outDir/tmp/t1.txt.t.f2
+paste $outDir/tmp/concheaderColumnar.txt $outDir/tmp/t1.txt.t.f2 > $outDir/tmp/t1.txt.t.swapped
+matrixTranspose.py $outDir/tmp/t1.txt.t.swapped > $outDir/kmers_unsub.merged.$columnSelector.txt
+
+rm $outDir/kmers_unsub.merged.txt
+
rm -R $outDir/tmp
View
4 DKmerFinder.sh
@@ -67,8 +67,8 @@ idx=0
for i in ${outDir}/jobscripts/*.sh; do
chmod 777 $i;
i=`abspath.py $i`
- echo "submit job command $jobsubcommand $i | extractNumbers.py --numOfNumsPerLine 1 | head -n 1"
- jobnum[$idx]=`eval "$jobsubcommand $i | extractNumbers.py --numOfNumsPerLine 1 | head -n 1"`
+ echo "submit job command $jobsubcommand -o ${i}.stdout -e ${i}.stderr $i | extractNumbers.py --numOfNumsPerLine 1 | head -n 1"
+ jobnum[$idx]=`eval "$jobsubcommand -o ${i}.stdout -e ${i}.stderr $i | extractNumbers.py --numOfNumsPerLine 1 | head -n 1"`
idx=`expr $idx + 1`
done
View
2  DKmerFinderCounter.cpp
@@ -192,6 +192,8 @@ class DKmerCounter:public SimpleFastqReader,public FileIPMLoop
}
ofstream fil((outDir+DS+KMERUPDATEPATH+DS+name+".txt").c_str());
+
+
for(map<string,SmartPtr<kmerRecord> >::iterator i=kmers.begin();i!=kmers.end();i++){
i->second->printUpdateAndUpdateCountHistory(fil);
}
View
34 DKmerFinderMaster.cpp
@@ -43,6 +43,7 @@ using namespace std;
#endif
#define int64 int64_t
+#define uint64 uint64_t
#define VERSION "D/1.0"
@@ -63,16 +64,16 @@ class kmerRecord
}
- inline float enrichment() const{
+ inline double enrichment(double normalizationFactor) const{
if(bgInstances==0){
return -1; //unknown
}
- return float(fgInstances)/bgInstances;
+ return double(fgInstances)/bgInstances*normalizationFactor;
}
//do we need sorting?
inline bool operator < (const kmerRecord& right) const{
- return this->enrichment()<right.enrichment();
+ return this->enrichment(1.0)<right.enrichment(1.0);
}
@@ -92,8 +93,9 @@ class DKmerFinder:public FileIPMLoop
uint64 tick;
- int64 totalKmerInstances;
- int64 totalNumSeqs;
+ uint64 fgTotalKmerPositions;
+ uint64 bgTotalKmerPositions;
+ //uint64 totalNumSeqs;
string outDir;
@@ -112,7 +114,7 @@ class DKmerFinder:public FileIPMLoop
DKmerFinder(string _outDir,string _name,int _delayTimeInSeconds,int _k,string _fgSlaves,string _bgSlaves,int _topN):
FileIPMLoop(_outDir+IPMSubFolder,_name,_delayTimeInSeconds),outDir(_outDir),
- k(_k),cycle(1),totalKmerInstances(0),totalNumSeqs(0),respondents(0),topN(_topN),hirespondents(0),tick(0)
+ k(_k),cycle(1),fgTotalKmerPositions(0),bgTotalKmerPositions(0),/*totalNumSeqs(0),*/respondents(0),topN(_topN),hirespondents(0),tick(0)
{
StringUtil::split(_fgSlaves,",",fgSlaves);
StringUtil::split(_bgSlaves,",",bgSlaves);
@@ -144,8 +146,10 @@ class DKmerFinder:public FileIPMLoop
if(mode==UPDATE_FG){
thisKmer->fgInstances+=diff;
+ fgTotalKmerPositions+=diff;
}else{
- thisKmer->bgInstances+=diff;
+ thisKmer->bgInstances+=diff;
+ bgTotalKmerPositions+=diff;
}
}
@@ -259,6 +263,11 @@ class DKmerFinder:public FileIPMLoop
if(respondents==totalSlaves){
cerr<<"all slaves updated kmer counts at cycle "<<cycle<<endl;
cerr<<"now find top enriched kmer at cycle "<<cycle<<endl;
+
+ double normalizationFactor=double(this->bgTotalKmerPositions)/this->fgTotalKmerPositions;
+
+ cerr<<"fgTotalKmerPositions="<<this->fgTotalKmerPositions<<" bgTotalKmerPositions="<<this->bgTotalKmerPositions<<" normalization factor (*bt/ft)="<<normalizationFactor<<endl;
+
//cerr<<"a"<<endl;
kmerRecord* topKmer=sortAndFindTopKmer();
if (!topKmer)
@@ -281,17 +290,17 @@ class DKmerFinder:public FileIPMLoop
ofstream foutKmerUnsub((outDir+DS+"kmers_unsub.txt").c_str());
//top kmer first (because it has been poped from the list
foutKmerUnsub<<"kmer\tenrichment\tcontrol_count\texperim_count"<<endl;
- foutKmerUnsub<<kmerSeq<<"\t"<<topKmer->enrichment()<<"\t"<<topKmer->bgInstances<<"\t"<<topKmer->fgInstances<<endl;
+ foutKmerUnsub<<kmerSeq<<"\t"<<topKmer->enrichment(normalizationFactor)<<"\t"<<topKmer->bgInstances<<"\t"<<topKmer->fgInstances<<endl;
for(list< SmartPtr<kmerRecord> >::reverse_iterator ri=kmers.rbegin();ri!=kmers.rend();ri++){
kmerRecord* curKmer=(*ri);
- foutKmerUnsub<<curKmer->kmerSeq<<"\t"<<curKmer->enrichment()<<"\t"<<curKmer->bgInstances<<"\t"<<curKmer->fgInstances<<endl;
+ foutKmerUnsub<<curKmer->kmerSeq<<"\t"<<curKmer->enrichment(normalizationFactor)<<"\t"<<curKmer->bgInstances<<"\t"<<curKmer->fgInstances<<endl;
}
foutKmerUnsub.close();
}
- (*fout)<<kmerSeq<<"\t"<<topKmer->enrichment()<<"\t"<<topKmer->bgInstances<<"\t"<<topKmer->fgInstances<<endl;
+ (*fout)<<kmerSeq<<"\t"<<topKmer->enrichment(normalizationFactor)<<"\t"<<topKmer->bgInstances<<"\t"<<topKmer->fgInstances<<endl;
delete topKmer; //free it from memory
@@ -305,6 +314,11 @@ class DKmerFinder:public FileIPMLoop
//advance
cerr<<"top Kmer at cycle "<<cycle<<" is "<<kmerSeq<<endl;
+
+ //now subtract the total kmer counts from fg and bg
+ this->fgTotalKmerPositions-=topKmer->fgInstances;
+ this->bgTotalKmerPositions-=topKmer->bgInstances;
+
updateCycleAndResetRespondents();
askSlavesToRemoveAndUpdate(kmerSeq);
}
Please sign in to comment.
Something went wrong with that request. Please try again.