Skip to content

Commit

Permalink
Debugging of module routines
Browse files Browse the repository at this point in the history
  • Loading branch information
Falk Hildebrand committed Jun 18, 2016
1 parent 1f4cc12 commit dacdf36
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 41 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -21,9 +21,10 @@
bld/
[Bb]in/
[Oo]bj/
[Mm]odCAG/

#moduleCall (why is it not in here anyways??)
ModuleCall/
[mM]oduleCall/

#rand.cpp
rand.*
Expand Down
Binary file modified ModuleCall/Rare.exe
Binary file not shown.
8 changes: 7 additions & 1 deletion Rare.sln
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.23107.0
VisualStudioVersion = 14.0.25123.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Rare", "Rare\Rare.vcxproj", "{22D81CAB-88EA-4794-A8CA-3C7919882358}"
EndProject
Expand All @@ -17,6 +17,8 @@ Global
HigherLvl|x64 = HigherLvl|x64
mat2ko|Win32 = mat2ko|Win32
mat2ko|x64 = mat2ko|x64
ModCAG|Win32 = ModCAG|Win32
ModCAG|x64 = ModCAG|x64
ModuleCall|Win32 = ModuleCall|Win32
ModuleCall|x64 = ModuleCall|x64
normalize|Win32 = normalize|Win32
Expand Down Expand Up @@ -51,6 +53,10 @@ Global
{22D81CAB-88EA-4794-A8CA-3C7919882358}.mat2ko|Win32.Build.0 = mat2ko|Win32
{22D81CAB-88EA-4794-A8CA-3C7919882358}.mat2ko|x64.ActiveCfg = mat2ko|x64
{22D81CAB-88EA-4794-A8CA-3C7919882358}.mat2ko|x64.Build.0 = mat2ko|x64
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModCAG|Win32.ActiveCfg = ModCAG|Win32
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModCAG|Win32.Build.0 = ModCAG|Win32
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModCAG|x64.ActiveCfg = ModCAG|x64
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModCAG|x64.Build.0 = ModCAG|x64
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModuleCall|Win32.ActiveCfg = ModuleCall|Win32
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModuleCall|Win32.Build.0 = ModuleCall|Win32
{22D81CAB-88EA-4794-A8CA-3C7919882358}.ModuleCall|x64.ActiveCfg = ModuleCall|x64
Expand Down
1 change: 1 addition & 0 deletions Rare/IO.cpp
Expand Up @@ -424,6 +424,7 @@ std::istream& safeGetline(std::istream& is, std::string& t)
std::istream::sentry se(is, true);
std::streambuf* sb = is.rdbuf();


for (;;) {
int c = sb->sbumpc();
switch (c) {
Expand Down
1 change: 1 addition & 0 deletions Rare/IO.h
Expand Up @@ -7,6 +7,7 @@
#include <iostream>
//#include <iterator>
#include <string>
#include <cstring>
#include <map>
#include <list>
#include <stdlib.h>
Expand Down
90 changes: 58 additions & 32 deletions Rare/Matrix.cpp
Expand Up @@ -70,7 +70,7 @@ Matrix::Matrix(const string inF):rowIDs(0),colIDs(0),sampleNameSep("")
}
*/

inline mat_fl median(std::vector<mat_fl>& vec, bool ignoreZeros)
inline mat_fl median(std::vector<mat_fl> vec, bool ignoreZeros)
{
if (vec.size() == 0) { return (mat_fl) 0; }
//std::nth_element(vec.begin(), vec.begin() + vec.size() / 2, vec.end());
Expand Down Expand Up @@ -140,13 +140,13 @@ void ModStep::getAllKOs(list<string>& ret) {
}
}
}
vector<bool> ModStep::abundParts(const vector<mat_fl>& v, const unordered_map<string, int>& IDX, vector<mat_fl>& r,
float hitComplRatio, int redund) {
void ModStep::abundParts(const vector<mat_fl>& v, const unordered_map<string, int>& IDX, vector<mat_fl>& abund,
vector<bool>& active, float hitComplRatio, int redund) {
//some params, should be fine tuned if possible
//float hitComplRatio(0.8f);

vector<bool> active(alternates.size(), false);
r.resize(alternates.size(), (mat_fl)0);
active.resize(alternates.size(), false);
abund.resize(alternates.size(), (mat_fl)0);
//actual deep routine to determine if KOs in this step satisfy presence conditions
for (size_t i = 0; i < alternates.size(); i++) {
size_t altS = alternates[i].size(); float hits(0);
Expand All @@ -171,16 +171,16 @@ vector<bool> ModStep::abundParts(const vector<mat_fl>& v, const unordered_map<st
//r[i] = (mat_fl)-1;//signal that removed due to redundancy
continue;
}
if (hits > 0) {
/* if (hits > 0) {
int x = 0;
}
}*/
if ( hits / (float)altS >= hitComplRatio) {
r[i] = median(tmpAB);
abund[i] = median(tmpAB);
active[i] = true;
}
}

return (active);
//return (active);
}

//*********************************************************
Expand All @@ -197,7 +197,7 @@ Module::Module(vector<string>& n) :name(""), description(""), steps(0){
}
}
mat_fl Module::pathAbundance(const vector<mat_fl>& v, const unordered_map<string, int>& IDX,
const int redund, const float PathwCompl, const float enzymCompl, string & savedNmsKO) {
const int redund, const float PathwCompl, const float enzymCompl, string & savedNmsKO, float& modScoreOut) {
//initial parameters
//float PathwCompl(0.6f); //corresponds to -c
//float enzymCompl(0.8f);
Expand All @@ -208,13 +208,14 @@ mat_fl Module::pathAbundance(const vector<mat_fl>& v, const unordered_map<strin
vector<mat_fl> preMed(steps.size(), (mat_fl)0), postMed(steps.size(), (mat_fl)0);
//auto t = IDX.find("xx");
for (size_t i = 0; i < steps.size(); i++) {
active[i] = steps[i].abundParts(v, IDX,abunds[i], enzymCompl, redund);
steps[i].abundParts(v, IDX,abunds[i], active[i], enzymCompl, redund);
//determine median overall value
preMed[i] = median(abunds[i]);
}
mat_fl pm = median(preMed);
mat_fl retval(0);


if (0) {
//VAR 1
//select one median value per step only, for the final pathway median abundance
Expand All @@ -235,13 +236,13 @@ mat_fl Module::pathAbundance(const vector<mat_fl>& v, const unordered_map<strin
vector<int> decIdx(steps.size(), 0);
//ini decIdx
for (size_t i = 0; i < steps.size(); i++) {
size_t dI = -1; double maxAB = 0;
while (dI < (active[i].size() - 1)) {
dI++;
size_t dI = 0; double maxAB = 0;
while (dI < int (active[i].size() )) {
if (abunds[i][dI] > maxAB && active[i][dI]) {
decIdx[i] = dI;
maxAB = abunds[i][decIdx[i]];
}
dI++;
}
}
bool saveKOnames(true);// save names of KOs used in extra file?
Expand All @@ -257,14 +258,16 @@ mat_fl Module::pathAbundance(const vector<mat_fl>& v, const unordered_map<strin
shldAct--;
} */
}
if ( act / shldAct >= PathwCompl) { //shldAct > 0 &&
if ( (act / shldAct) >= PathwCompl) { //shldAct > 0 &&
//this part parses out the KOs that are actually active
mat_fl curM = median(curP,true);
retval += curM;
vecPurge(abunds,curM);
modScoreOut = act / shldAct;
}
else {
savedNmsKO = "";
modScoreOut = 0.f;
}
break;
}
Expand All @@ -281,7 +284,7 @@ Modules::Modules(const string& inF):
ifstream is(inF.c_str());
string line(""); vector<string> buffer(0);
string ModToken = "M";
while (getline(is, line, '\n')) {
while (safeGetline(is, line)) {
//comment
if (line[0] == '#') { continue; }
//new module opens, create old module
Expand All @@ -291,7 +294,9 @@ Modules::Modules(const string& inF):
}
buffer.resize(0);
}
buffer.push_back(line);
if (line.size() > 3) {
buffer.push_back(line);
}
}
//create last module
mods.push_back(Module(buffer));
Expand Down Expand Up @@ -349,13 +354,13 @@ void Modules::calc_redund() {
}

vector<mat_fl> Modules::calcModAbund(const vector<mat_fl>& v, const unordered_map<string,
int>& IDX, vector<string> &retStr) {
int>& IDX, vector<string> &retStr, vector<float> &retScore) {
vector<mat_fl> ret(mods.size(),(mat_fl)0);
retStr.resize(mods.size(), "");
retStr.resize(mods.size(), ""); retScore.resize(mods.size(), 0.f);
mat_fl unass_cnt(0.f);//TOGO
//vector<bool> usedKOs(v.size()); //initial idea to save KOs used to estimated unassigned fraction - better to scale by seq depth external
for (size_t i = 0; i < mods.size(); i++) {
ret[i] = mods[i].pathAbundance(v,IDX, redund, PathwCompl, enzymCompl, retStr[i]);
ret[i] = mods[i].pathAbundance(v,IDX, redund, PathwCompl, enzymCompl, retStr[i], retScore[i]);
}
//add unkowns
ret.push_back(unass_cnt);
Expand Down Expand Up @@ -714,7 +719,7 @@ Matrix::Matrix(const string inF, const string xtra, bool highLvl)
}
}

void Matrix::estimateModuleAbund(char ** argv) {
void Matrix::estimateModuleAbund(char ** argv, int argc) {
string moduleFile = argv[4];
string outFile = argv[3];
string doModKOest = argv[4];
Expand All @@ -725,40 +730,61 @@ void Matrix::estimateModuleAbund(char ** argv) {
int redundancy = atoi(argv[5]);
float pathCompl = (float)atof(argv[6]);
float enzyCompl = (float)atof(argv[7]);
bool writeKOused = false;
//cout << argv[8] << endl;
if (argc > 8 && strcmp(argv[8],"1")==0 ) { writeKOused = true; }

modDB->setEnzymCompl(enzyCompl);
modDB->setPathwCompl(pathCompl);
modDB->setRedund(redundancy);
//matrix vector of module abudnance
//vector<vector<mat_fl>> modMat(maxCols);
Matrix modMat = Matrix(modDB->modNms(), colIDs);
vector<vector<string>> modStr(maxCols);
vector<vector<float>> modScore(maxCols);
for (int i = 0; i < maxCols; i++) {
// cerr << i << " ";
//TODO: add unknown counts
modMat.addTtlSmpl(
modDB->calcModAbund(mat[i], rowID_hash, modStr[i])
modDB->calcModAbund(mat[i], rowID_hash, modStr[i], modScore[i])
,i );
}
//write description
ofstream of; vector<string> moD = modDB->modDescr(); vector<string> moN = modDB->modNms();
ofstream of; ofstream of2; vector<string> moD = modDB->modDescr(); vector<string> moN = modDB->modNms();
string nos = outFile+".descr";
of.open(nos.c_str());
for (size_t i = 0; i < moD.size(); i++) {
of << moN[i] << "\t" << moD[i] << endl;
}
of.close();
//write KOs used
nos = outFile + ".KOused";
of.open(nos.c_str());
for (size_t i = 0; i < moD.size(); i++) {
of << moN[i] << "\t";
for (size_t k = 0; k < modStr.size(); k++) {
of << modStr[k][i] << "\t";
if (writeKOused) {
nos = outFile + ".KOused";
of.open(nos.c_str());
nos = outFile + ".MODscore";
of2.open(nos.c_str());

//write SmplIDs
for (size_t i = 0; i < colIDs.size(); i++) {
of << "\t" << colIDs[i]; of2 << "\t" << colIDs[i];
}
of << endl;
}
of.close();
of << endl; of2 << endl;

for (size_t i = 0; i < moD.size(); i++) {
bool hasKOUse = false;
for (size_t k = 0; k < modStr.size(); k++) {
if (modStr[k][i] != "") { hasKOUse=true; break; }
}
if (!hasKOUse) { continue; }
of << moN[i]; of2 << moN[i];
for (size_t k = 0; k < modStr.size(); k++) {
of << "\t" << modStr[k][i] ;
of2 << "\t" << modScore[k][i] ;
}
of << endl; of2 << endl;
}
of.close(); of2.close();
}

//write module matrix
cerr << "Write Matrix\n";
Expand Down
11 changes: 6 additions & 5 deletions Rare/Matrix.h
Expand Up @@ -14,7 +14,7 @@ typedef std::unordered_map<string, int> SmplOccur;
typedef std::unordered_map<string, int> ModOccur;


mat_fl median(std::vector<mat_fl>& vec,bool ignoreZeros=false);
mat_fl median(std::vector<mat_fl> vec,bool ignoreZeros=false);
void vecPurge(vector<vector<mat_fl>>& vec, mat_fl val);//removes val from each entry in vec<vec<>>
string join(const vector<string>& in, const string &X);

Expand Down Expand Up @@ -76,7 +76,7 @@ struct ModStep
ModStep(const string&);
void getAllKOs(list<string>&);
void setRedund(ModOccur& m);
vector<bool> abundParts(const vector<mat_fl>& v, const unordered_map<string, int>& IDX, vector<mat_fl>&,
void abundParts(const vector<mat_fl>& v, const unordered_map<string, int>& IDX, vector<mat_fl>&, vector<bool>&,
float hitComplRatio =0.8, int redund=0);


Expand All @@ -94,7 +94,7 @@ class Module
void getAllKOs(list<string>& r) { for (size_t i = 0; i < steps.size(); i++) { steps[i].getAllKOs(r); } }
void setReddundancy(ModOccur& m) { for (size_t i = 0; i < steps.size(); i++) { steps[i].setRedund(m); } }
mat_fl pathAbundance(const vector<mat_fl>& v, const unordered_map<string, int>& IDX,
const int redun, const float PathwCompl, const float enzymCompl, string & savedNmsKO);
const int redun, const float PathwCompl, const float enzymCompl, string & savedNmsKO,float & modScoreOut);
string name;
string description;
vector<ModStep> steps;
Expand All @@ -110,7 +110,8 @@ class Modules
void setPathwCompl(float x) { PathwCompl = x; }
void setEnzymCompl(float x) { enzymCompl = x; }

vector<mat_fl> calcModAbund(const vector<mat_fl>&, const unordered_map<string, int>&, vector<string>& );
vector<mat_fl> calcModAbund(const vector<mat_fl>&, const unordered_map<string, int>&,
vector<string>&, vector<float>& );
vector<string> & modNms() { return moduleNames; }
vector<string> & modDescr() { return moduleDescriptions; }

Expand Down Expand Up @@ -154,7 +155,7 @@ class Matrix
if (mat.size() >= 1) { return (int)mat[0].size(); }
else { return 0; }
}
void estimateModuleAbund(char ** args);
void estimateModuleAbund(char ** args, int argc);
//for the R module, all used for rarefactions only
void addRow(vector<mat_fl>);//idea is that a single row is added on to the matrix
void setSampleNames(vector<string> in) { colIDs = in; }
Expand Down
Binary file modified Rare/ModuleCall/Rare.tlog/CL.write.1.tlog
Binary file not shown.
Binary file modified Rare/ModuleCall/vc140.idb
Binary file not shown.
4 changes: 2 additions & 2 deletions Rare/Rare.cpp
Expand Up @@ -6,7 +6,7 @@
//#include "Matrix.h"
#include "ClStr2Mat.h"

const char* rar_ver="0.63 alpha";
const char* rar_ver="0.65 alpha";

DivEsts* calcDivRar(int i, Matrix* Mo, DivEsts* div, long rareDep, string outF, int repeats, int writeFiles){
cout << i << " ";
Expand Down Expand Up @@ -68,7 +68,7 @@ int main(int argc, char* argv[])
}
Matrix* Mo = new Matrix(inF, ""); //needs to be KO file
cerr << "Estimate mod AB\n";
Mo->estimateModuleAbund(argv);// arg4, outF); //arg4 needs to be module file, outF makes the (several) matrices
Mo->estimateModuleAbund(argv,argc);// arg4, outF); //arg4 needs to be module file, outF makes the (several) matrices
delete Mo;
std::exit(0);
}
Expand Down

0 comments on commit dacdf36

Please sign in to comment.