Skip to content
This repository has been archived by the owner on Jan 6, 2021. It is now read-only.

Commit

Permalink
add function for processing tumor only data
Browse files Browse the repository at this point in the history
  • Loading branch information
Beifang committed Jun 25, 2018
1 parent 47893ad commit 38bc654
Show file tree
Hide file tree
Showing 16 changed files with 329 additions and 47 deletions.
2 changes: 1 addition & 1 deletion bampairs.cpp
Expand Up @@ -29,7 +29,7 @@
#include <bitset>
#include <omp.h>

#include "Bampairs.h"
#include "bampairs.h"

extern Param paramd;

Expand Down
2 changes: 1 addition & 1 deletion cmds.cpp
Expand Up @@ -33,7 +33,7 @@
#include "cmds.h"

#ifndef VERSION
#define VERSION "v0.2"
#define VERSION "v0.3"
#endif

int usage(void) {
Expand Down
30 changes: 23 additions & 7 deletions distribution.cpp
Expand Up @@ -61,6 +61,7 @@ std::ifstream finH;
std::ifstream finM;
std::ifstream finB;
std::ofstream foutD;
std::ofstream foutO;

std::string one_region;

Expand All @@ -73,14 +74,15 @@ void DisUsage(void) {

<<" -e <string> bed file, optional\n"
<<" -f <double> FDR threshold for somatic sites detection, default="<<paramd.fdrThreshold<<"\n"
<<" -i <double> minimal comentropy threshold for somatic sites detection (just for tumor only data), default="<<paramd.comentropyThreshold<<"\n"
<<" -c <int> coverage threshold for msi analysis, WXS: 20; WGS: 15, default="<<paramd.covCutoff<<"\n"
<<" -r <string> choose one region, format: 1:10000000-20000000\n"
<<" -l <int> mininal homopolymer size, default="<<paramd.MininalHomoSize<<"\n"
<<" -p <int> mininal homopolymer size for distribution analysis, default="<<paramd.MininalHomoForDis<<"\n"
<<" -l <int> minimal homopolymer size, default="<<paramd.MininalHomoSize<<"\n"
<<" -p <int> minimal homopolymer size for distribution analysis, default="<<paramd.MininalHomoForDis<<"\n"
<<" -m <int> maximal homopolymer size for distribution analysis, default="<<paramd.MaxHomoSize<<"\n"

<<" -q <int> mininal microsates size, default="<<paramd.MinMicrosate<<"\n"
<<" -s <int> mininal microsates size for distribution analysis, default="<<paramd.MinMicrosateForDis<<"\n"
<<" -q <int> minimal microsates size, default="<<paramd.MinMicrosate<<"\n"
<<" -s <int> minimal microsates size for distribution analysis, default="<<paramd.MinMicrosateForDis<<"\n"
<<" -w <int> maximal microstaes size for distribution analysis, default="<<paramd.MaxMicrosateForDis<<"\n"

<<" -u <int> span size around window for extracting reads, default="<<paramd.DisSpan<<"\n"
Expand All @@ -105,6 +107,7 @@ int dGetOptions(int rgc, char *rgv[]) {
case 'e': bedFile = rgv[++i]; break;
case 'r': one_region = rgv[++i]; break;
case 'f': paramd.fdrThreshold = atof(rgv[++i]); break;
case 'i': paramd.comentropyThreshold = atof(rgv[++i]); break;
case 'c': paramd.covCutoff = atoi(rgv[++i]); break;
case 'l': paramd.MininalHomoSize = atoi(rgv[++i]); break;
case 'p': paramd.MininalHomoForDis = atoi(rgv[++i]); break;
Expand Down Expand Up @@ -152,8 +155,14 @@ int HomoAndMicrosateDisMsi(int argc, char *argv[]) {
}

// load bam files
polyscan.LoadBams( normalBam, tumorBam );

//polyscan.LoadBams( normalBam, tumorBam );
if (!normalBam.empty() && !tumorBam.empty()) {
polyscan.LoadBams( normalBam, tumorBam );
}
// just for tumor only data
if (normalBam.empty() && !tumorBam.empty()) {
polyscan.LoadBam(tumorBam);
}
// check homo/microsate file
finH.open(homoFile.c_str());
if (!finH) {
Expand All @@ -170,7 +179,14 @@ int HomoAndMicrosateDisMsi(int argc, char *argv[]) {
std::cout << "\nTotal loading homopolymer and microsatellites: " << polyscan.totalHomosites << " \n\n";

// change code to one sample
polyscan.GetHomoDistribution(sample, disFile);
//polyscan.GetHomoDistribution(sample, disFile);
// control distribution for tumor only input
if (!normalBam.empty() && !tumorBam.empty()) {
polyscan.GetHomoDistribution(sample, disFile);
}
if (normalBam.empty() && !tumorBam.empty()) {
polyscan.GetHomoTumorDistribution(sample, disFile);
}

std::cout << "\nTotal time consumed: " << Cal_AllTime() << " secs\n\n";

Expand Down
149 changes: 126 additions & 23 deletions homo.cpp
Expand Up @@ -62,6 +62,7 @@ HomoSite::HomoSite()
, normalWithSufCov( false )
, dif( 0.0 )
, pValue( 0.0 )
, comentropy( 0.0 )
, somatic( false )
, withGenotype( false )
{
Expand Down Expand Up @@ -131,6 +132,17 @@ void HomoSite::InitialDis() {
}
}

//initial tumor only distribution
void HomoSite::InitialTumorDis() {
tumorDis = new unsigned short *[polyscan.totalBamTumorsNum];
for (unsigned int j=0; j<polyscan.totalBamTumorsNum; j++) {
tumorDis[j] = new unsigned short [paramd.s_dispots];
for (unsigned int k=0; k<paramd.s_dispots; k++) {
tumorDis[j][k] = 0;
}
}
}

// Out distribution
void HomoSite::OutputDis() {
for (unsigned int j=0; j<polyscan.totalBamPairsNum; j++) {
Expand All @@ -143,35 +155,54 @@ void HomoSite::OutputDis() {
}
}

//Out tumor only distribution
void HomoSite::OutputTumorDis() {
for (unsigned int j=0; j<polyscan.totalBamTumorsNum; j++) {
for (unsigned int k=0; k<paramd.s_dispots; k++) {
std::cout<<tumorDis[j][k];
}
}
}

// Pourout distribution
void HomoSite::PouroutDis(Sample &sample) {
if (normalCov >= paramd.covCutoff && tumorCov >= paramd.covCutoff) {
sample.outputDistribution << chr << " "
<< location << " "
<< fbases << " "
<< length << "["
<< bases << "] "
<< ebases <<"\n";

for (unsigned int j=0; j<polyscan.totalBamPairsNum; j++) {
sample.outputDistribution << chr << "\t"
<< location << "\t"
<< fbases << "\t"
<< length << "\t"
<< bases << "\t"
<< ebases <<"\t"
<< "N";
for (unsigned int k=0; k<paramd.s_dispots; k++) {
sample.outputDistribution << "\t" << normalDis[j][k];
}
sample.outputDistribution << "\n";
sample.outputDistribution << chr << "\t"
<< location << "\t"
<< fbases << "\t"
<< length << "\t"
<< bases << "\t"
<< ebases <<"\t"
<< "T";
for (unsigned int k=0; k<paramd.s_dispots; k++) {
sample.outputDistribution << "\t" << tumorDis[j][k];
}
sample.outputDistribution << "N: ";
for (unsigned int k=0; k<paramd.s_dispots; k++) {
sample.outputDistribution << normalDis[j][k] << " ";
}
sample.outputDistribution << "\nT: ";
for (unsigned int k=0; k<paramd.s_dispots; k++) {
sample.outputDistribution << tumorDis[j][k] << " ";
}

}
sample.outputDistribution << "\n";
}

//Pour tumor only distribution
void HomoSite::PourTumoroutDis(Sample &sample) {
sample.outputDistribution << chr << " "
<< location << " "
<< fbases << " "
<< length << "["
<< bases << "] "
<< ebases <<"\n";

for (unsigned int j=0; j<polyscan.totalBamTumorsNum; j++) {
sample.outputDistribution << "T: ";
for (unsigned int k=0; k<paramd.s_dispots; k++) {
sample.outputDistribution << tumorDis[j][k] << " ";
}
}
sample.outputDistribution << "\n";
}
}

// initial bools
Expand Down Expand Up @@ -266,6 +297,51 @@ void HomoSite::DisGenotyping(Sample &sample) {

}

// tumor somatic analyis
void HomoSite::DisTumorSomatic(Sample &sample) {
sample.numberOftotalSites++;
bool reportSomatic;
reportSomatic = false;
tumorCov = 0;
for (unsigned int j=0; j<polyscan.totalBamTumorsNum; j++) {
for (unsigned int k=0; k<paramd.s_dispots; k++) {
tumorCov += tumorDis[j][k];
}
}

if (tumorCov >= paramd.covCutoff){
withSufCov = true;
comentropy = Comentropy( tumorDis[0], paramd.s_dispots);
} else {
withSufCov = false;
comentropy = 0;
}
if (comentropy >= paramd.comentropyThreshold) {
reportSomatic = true;
sample.numberOfMsiDataPoints ++;
}
if (withSufCov) sample.numberOfDataPoints++;
if (reportSomatic) {
sample.outputSomatic << chr << "\t"
<< location << "\t"
<< fbases << "\t"
<< length << "\t"
<< bases << "\t"
<< ebases;
sample.outputSomatic << "\t" << std::fixed << comentropy;
sample.outputSomatic << std::endl;
SomaticSite onessite;
onessite.chr = chr;
onessite.location = location;
onessite.length = length;
onessite.fbases = fbases;
onessite.ebases = ebases;
onessite.bases = bases;

sample.totalSomaticSites.push_back( onessite );
}
}

// distance
double HomoSite::DistanceBetweenTwo( unsigned short * FirstOriginal, unsigned short * SecondOriginal ) {
double SmallDouble = 0.0000000001;
Expand Down Expand Up @@ -336,6 +412,26 @@ double HomoSite::DistanceBetweenTwo( unsigned short * FirstOriginal, unsigned sh
return (AreaMax - AreaMin)/AreaMax;
}

//comentropy
double HomoSite::Comentropy( unsigned short * tumorDis, unsigned int dispots ) {
double sum = 0;
double comentropy = 0.0;
for (int i = 0; i < dispots; i++){
if (tumorDis[i] <3){
tumorDis[i] = 0;
}
sum += tumorDis[i];
}
if ( sum != 0 ) {
for( int j = 0; j < dispots; j++){
if( tumorDis[j] != 0 ){
comentropy -= (tumorDis[j]/sum)*log(tumorDis[j]/sum);
}
}
}
return comentropy;
}

void HomoSite::ComputeGenotype( unsigned short * NormalReadCount ) {
unsigned int Offset, CoverageCutoff, first, second, Sum;
Offset = 1; CoverageCutoff = paramd.covCutoff;
Expand Down Expand Up @@ -389,3 +485,10 @@ void HomoSite::ReleaseMemory() {
delete [] tumorDis;
}

//release tumor memory
void HomoSite::ReleaseTumorMemory() {
for (unsigned int k=0; k<polyscan.totalBamTumorsNum; k++) {
delete [] tumorDis[k];
}
delete [] tumorDis;
}
9 changes: 8 additions & 1 deletion homo.h
Expand Up @@ -83,21 +83,28 @@ class HomoSite {
bool withGenotype;
double dif;
double pValue;
double comentropy;
int genotype[2];
////////////////////////

inline void InitType(){ genotype[0] = genotype[1] = -2; };

void TransferString();
void InitialDis();
void InitialTumorDis();
void OutputDis();
void OutputTumorDis();
void ReleaseMemory();
void ReleaseTumorMemory();
//void PouroutDis(std::ofstream &fout);
void PouroutDis(Sample &sample);
void PourTumoroutDis(Sample &sample);
void DisGenotyping(Sample &sample);
void DisTumorSomatic(Sample &sample);
//// genotyping ///
void BoolsInitial();
double DistanceBetweenTwo(unsigned short * FirstOriginal, unsigned short * SecondOriginal);
double DistanceBetweenTwo( unsigned short * FirstOriginal, unsigned short * SecondOriginal );
double Comentropy( unsigned short * tumorDis, unsigned int dispots );
void ComputeGenotype( unsigned short * NormalReadCount );

protected:
Expand Down
1 change: 1 addition & 0 deletions param.cpp
Expand Up @@ -102,6 +102,7 @@ Param::Param()
, windowSize(500000) // window size (default 50k)
, covCutoff( 20 )
, fdrThreshold( 0.05 )
, comentropyThreshold( 0.5 ) // for tumor only
{
inital_homo_phabet();
initalphabet();
Expand Down
1 change: 1 addition & 0 deletions param.h
Expand Up @@ -88,6 +88,7 @@ class Param {
// genotyping
unsigned int covCutoff;
double fdrThreshold;
double comentropyThreshold;

};

Expand Down

0 comments on commit 38bc654

Please sign in to comment.