Skip to content

Commit

Permalink
mqf in use case
Browse files Browse the repository at this point in the history
  • Loading branch information
shokrof committed Oct 1, 2023
1 parent ba62744 commit 9457666
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 11 deletions.
7 changes: 5 additions & 2 deletions src/algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,6 @@ namespace kProcessor {
bool exists=false;
for (auto i : kmersToKeep ) {
if (input[i] != nullptr) {
if(input[i]->getKmer()== "GGGGGGGGGGCGGGGGGGGGG")
cout<<"Here"<<endl;
return kmerRow(input[i]->getHashedKmer(),1,0,nullptr);
}
}
Expand Down Expand Up @@ -346,6 +344,11 @@ namespace kProcessor {
columns[newColumnName]=col.second->getTwin();
columns[newColumnName]->resize(res->size());
}
string newColumnName="count"+to_string(i);
if(columns.find(newColumnName)==columns.end())
{
columns[newColumnName]=new vectorColumn<uint32_t>(res->size());
}
iterators[i] = input[i]->begin();
if (iterators[i] != input[i]->end()) {
// cout<<i<<" "<<(*iterators[i]).hashedKmer<<endl;
Expand Down
24 changes: 22 additions & 2 deletions src/kDataFrames/kDataFrameMQF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ kDataFrameMQF::kDataFrameMQF() : kDataFrame() {
kDataFrameMQFIterator *it = new kDataFrameMQFIterator(mqf, kSize, KD);
it->endIterator();
endIterator=new kDataFrameIterator(it,(kDataFrame*)this);
countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}

kDataFrame *kDataFrameMQF::clone() {
Expand Down Expand Up @@ -216,7 +218,8 @@ kDataFrameMQF::kDataFrameMQF(uint64_t ksize, uint8_t q, hashingModes hash_mode)
kDataFrameMQFIterator *it = new kDataFrameMQFIterator(mqf, kSize, KD);
it->endIterator();
endIterator=new kDataFrameIterator(it,(kDataFrame*)this);

countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}

kDataFrameMQF::kDataFrameMQF(uint64_t ksize, uint8_t q, uint8_t fixedCounterSize, uint8_t tagSize,
Expand All @@ -238,6 +241,8 @@ kDataFrameMQF::kDataFrameMQF(uint64_t ksize, uint8_t q, uint8_t fixedCounterSize
kDataFrameMQFIterator *it = new kDataFrameMQFIterator(mqf, kSize, KD);
it->endIterator();
endIterator=new kDataFrameIterator(it,(kDataFrame*)this);
countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}

kDataFrameMQF::kDataFrameMQF(uint64_t ksize, hashingModes hash_mode) :
Expand All @@ -250,6 +255,8 @@ kDataFrameMQF::kDataFrameMQF(uint64_t ksize, hashingModes hash_mode) :
mqf = NULL;
endIterator=NULL;
reserve(1000000);
countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;

}

Expand All @@ -263,6 +270,8 @@ kDataFrameMQF::kDataFrameMQF(uint64_t ksize) :
mqf = NULL;
endIterator=NULL;
reserve(1000000);
countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}

kDataFrameMQF::kDataFrameMQF(QF *mqf, uint64_t ksize, double falsePositiveRate) :
Expand All @@ -281,6 +290,8 @@ kDataFrameMQF::kDataFrameMQF(QF *mqf, uint64_t ksize, double falsePositiveRate)
kDataFrameMQFIterator *it = new kDataFrameMQFIterator(mqf, kSize, KD);
it->endIterator();
endIterator=new kDataFrameIterator(it,(kDataFrame*)this);
countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}
kDataFrameMQF::kDataFrameMQF(uint64_t ksize, vector<uint64_t> countHistogram, uint8_t tagSize, double falsePositiveRate)
:
Expand All @@ -293,6 +304,9 @@ kDataFrameMQF::kDataFrameMQF(uint64_t ksize, vector<uint64_t> countHistogram, ui
kDataFrameMQF::estimateParameters(countHistogram, 2 * ksize, tagSize,
&nSlots, &fixedCounterSize, &memory);
qf_init(mqf, nSlots, 2 * ksize, tagSize, fixedCounterSize, 32, true, "", 2038074761);

countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}


Expand Down Expand Up @@ -326,6 +340,9 @@ kDataFrame(frame->ksize()){
c.second->setValueFromColumn(frame->columns[c.first],order,k.getOrder());
}
}

countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}

kDataFrameMQF::kDataFrameMQF(uint64_t ksize, vector<uint64_t> countHistogram)
Expand All @@ -342,6 +359,9 @@ kDataFrameMQF::kDataFrameMQF(uint64_t ksize,uint64_t nKmers) :
mqf = NULL;
endIterator=NULL;
reserve(nKmers);

countColumn=new vectorColumn<uint32_t>(1);
columns["count"]=countColumn;
}

kDataFrame *kDataFrameMQF::getTwin() {
Expand Down Expand Up @@ -729,7 +749,7 @@ namespace kProcessor {


kmqf->setkSize(_kmer_length);
kmqf->reserve(_total_kmers);
kmqf->reserve(_total_kmers*2);
while (kmer_data_base.ReadNextKmer(kmer_object, counter)) {
kmer_object.to_string(str);
kmqf->_insert(str, counter);
Expand Down
10 changes: 6 additions & 4 deletions usecases/kDifferentialExpression/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ def getFastq(name):
return list(filter(lambda x:x.sample_name==name,pep.samples))[0].file


kFrameType="map"
kFrameType="mqf"
kFrameTypeC="MQF"

samplesKDataframe=list(filter(lambda x:x.library=="sample",pep.samples))
samplesKDataframe=["kDataframe."+x.sample_name+"."+kFrameType for x in samplesKDataframe]
controlKDataframe=list(filter(lambda x:x.library=="control",pep.samples))
Expand All @@ -30,8 +32,8 @@ rule kDifferntialExpression:
"""
echo {input.sample} | sed -e 's/.{kFrameType}//g'|tr -s ' ' $'\n' > sample
echo {input.control} | sed -e 's/.{kFrameType}//g'|tr -s ' ' $'\n' > control
./kDifferntialExpression -g {input.transpictome} -s sample -c control -o {output} &> {log}
"""
./kDifferntialExpression -g {input.transpictome} -s sample -c control -o {output}
"""

rule kmc:
input: lambda wildcards: getFastq(f"{wildcards.experiment}"),
Expand All @@ -46,6 +48,6 @@ rule kmc:
"""
echo {input} |tr -s ' ' $'\n' > {wildcards.experiment}.lst
/usr/bin/time -v -o {output.time} kmc -ci1 -t{threads} -k{kSize} -m2 @{wildcards.experiment}.lst {wildcards.experiment} ./ &> {log}
../../apps/loadFromKMC {wildcards.experiment} MAP kDataframe.{wildcards.experiment}
../../apps/loadFromKMC {wildcards.experiment} {kFrameTypeC} kDataframe.{wildcards.experiment}
"""

7 changes: 4 additions & 3 deletions usecases/kDifferentialExpression/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ void differntialExpression(string genes_file,
return (any)(any_cast<uint64_t>(v) + (uint64_t)count0);
});
uint64_t totalCount= any_cast<uint64_t>(totalCountAny);

kProcessor::transformInPlace(currentFrame, [=](kDataFrameIterator& it) -> void {
uint32_t count0=it.getCount();
double normalized = (double)count0*(100000000.0) / totalCount;
Expand All @@ -90,10 +89,10 @@ void differntialExpression(string genes_file,


int chunkSize = 1000;
kDataFrame * genesFrame = kDataFrameFactory::createBtree(kSize);
kDataFrame * genesFrame = kDataFrameFactory::createMAP(kSize,kFrames[0]->getkmerDecoder()->hash_mode);
kmerDecoder * KMERS = new Kmers(genes_file, chunkSize, kSize,genesFrame->KD->hash_mode);
kProcessor::index(KMERS, genes_file+".names", genesFrame);

cout<<"Hash "<<genesFrame->getkmerDecoder()->hash_mode<<endl;
// kProcessor::createColorColumn(genesFrame);
kFrames.push_back(genesFrame);
requiredIndices={kFrames.size()-1};
Expand All @@ -110,6 +109,7 @@ void differntialExpression(string genes_file,
for(unsigned i=0; i < allDatasets ;i++ ){
uint32_t count;
r.getColumnValue<vectorColumn<uint32_t> >("count"+ to_string(i),count);
// cout<<count<<"\n";
if(count>0)
return true;
}
Expand Down Expand Up @@ -174,6 +174,7 @@ void differntialExpression(string genes_file,
sort(k.second.begin(), k.second.end());
float median = k.second[k.second.size() / 2];
output<<k.first<<"\t"<<median<<"\n";
cout<<k.first<<"\t"<<median<<"\n";
}

}
Expand Down

0 comments on commit 9457666

Please sign in to comment.