Skip to content

Commit

Permalink
Updates related to keeping the original chromosome names throughout t…
Browse files Browse the repository at this point in the history
…he calculations.
  • Loading branch information
abyzov committed Feb 9, 2015
1 parent 0d77ae3 commit cf13804
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 192 deletions.
119 changes: 63 additions & 56 deletions Genome.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
#include "Genome.hh"

Genome Genome::genomes[NGS] = {Genome("NCBI36"),Genome("GRCh37")};
const string Genome::CHRX = Genome::makeCanonical("X");
const string Genome::CHRY = Genome::makeCanonical("Y");
const string Genome::CHRALL = "ALL";
const string Genome::CHRSEX = "SEX";

Genome::Genome(string name) : n_chr_(0)
{
Expand All @@ -11,58 +15,58 @@ Genome::Genome(string name) : n_chr_(0)
gname_ = "NCBI36";
other_gname_ = "hg18";
n_chr_ = 24;
cnames_[0] = "chr1"; clens_[0] = 247249719;
cnames_[1] = "chr2"; clens_[1] = 242951149;
cnames_[2] = "chr3"; clens_[2] = 199501827;
cnames_[3] = "chr4"; clens_[3] = 191273063;
cnames_[4] = "chr5"; clens_[4] = 180857866;
cnames_[5] = "chr6"; clens_[5] = 170899992;
cnames_[6] = "chr7"; clens_[6] = 158821424;
cnames_[7] = "chr8"; clens_[7] = 146274826;
cnames_[8] = "chr9"; clens_[8] = 140273252;
cnames_[9] = "chr10"; clens_[9] = 135374737;
cnames_[10] = "chr11"; clens_[10] = 134452384;
cnames_[11] = "chr12"; clens_[11] = 132349534;
cnames_[12] = "chr13"; clens_[12] = 114142980;
cnames_[13] = "chr14"; clens_[13] = 106368585;
cnames_[14] = "chr15"; clens_[14] = 100338915;
cnames_[15] = "chr16"; clens_[15] = 88827254;
cnames_[16] = "chr17"; clens_[16] = 78774742;
cnames_[17] = "chr18"; clens_[17] = 76117153;
cnames_[18] = "chr19"; clens_[18] = 63811651;
cnames_[19] = "chr20"; clens_[19] = 62435964;
cnames_[20] = "chr21"; clens_[20] = 46944323;
cnames_[21] = "chr22"; clens_[21] = 49691432;
cnames_[22] = "chrX"; clens_[22] = 154913754;
cnames_[23] = "chrY"; clens_[23] = 57772954;
cnames_[0] = Genome::makeCanonical("1"); clens_[0] = 247249719;
cnames_[1] = Genome::makeCanonical("2"); clens_[1] = 242951149;
cnames_[2] = Genome::makeCanonical("3"); clens_[2] = 199501827;
cnames_[3] = Genome::makeCanonical("4"); clens_[3] = 191273063;
cnames_[4] = Genome::makeCanonical("5"); clens_[4] = 180857866;
cnames_[5] = Genome::makeCanonical("6"); clens_[5] = 170899992;
cnames_[6] = Genome::makeCanonical("7"); clens_[6] = 158821424;
cnames_[7] = Genome::makeCanonical("8"); clens_[7] = 146274826;
cnames_[8] = Genome::makeCanonical("9"); clens_[8] = 140273252;
cnames_[9] = Genome::makeCanonical("10"); clens_[9] = 135374737;
cnames_[10] = Genome::makeCanonical("11"); clens_[10] = 134452384;
cnames_[11] = Genome::makeCanonical("12"); clens_[11] = 132349534;
cnames_[12] = Genome::makeCanonical("13"); clens_[12] = 114142980;
cnames_[13] = Genome::makeCanonical("14"); clens_[13] = 106368585;
cnames_[14] = Genome::makeCanonical("15"); clens_[14] = 100338915;
cnames_[15] = Genome::makeCanonical("16"); clens_[15] = 88827254;
cnames_[16] = Genome::makeCanonical("17"); clens_[16] = 78774742;
cnames_[17] = Genome::makeCanonical("18"); clens_[17] = 76117153;
cnames_[18] = Genome::makeCanonical("19"); clens_[18] = 63811651;
cnames_[19] = Genome::makeCanonical("20"); clens_[19] = 62435964;
cnames_[20] = Genome::makeCanonical("21"); clens_[20] = 46944323;
cnames_[21] = Genome::makeCanonical("22"); clens_[21] = 49691432;
cnames_[22] = Genome::makeCanonical("X"); clens_[22] = 154913754;
cnames_[23] = Genome::makeCanonical("Y"); clens_[23] = 57772954;
} else if (name == "hg19" || name == "grch37") {
gname_ = "GRCh37";
other_gname_ = "hg19";
n_chr_ = 24;
cnames_[0] = "chr1"; clens_[0] = 249250621;
cnames_[1] = "chr2"; clens_[1] = 243199373;
cnames_[2] = "chr3"; clens_[2] = 198022430;
cnames_[3] = "chr4"; clens_[3] = 191154276;
cnames_[4] = "chr5"; clens_[4] = 180915260;
cnames_[5] = "chr6"; clens_[5] = 171115067;
cnames_[6] = "chr7"; clens_[6] = 159138663;
cnames_[7] = "chr8"; clens_[7] = 146364022;
cnames_[8] = "chr9"; clens_[8] = 141213431;
cnames_[9] = "chr10"; clens_[9] = 135534747;
cnames_[10] = "chr11"; clens_[10] = 135006516;
cnames_[11] = "chr12"; clens_[11] = 133851895;
cnames_[12] = "chr13"; clens_[12] = 115169878;
cnames_[13] = "chr14"; clens_[13] = 107349540;
cnames_[14] = "chr15"; clens_[14] = 102531392;
cnames_[15] = "chr16"; clens_[15] = 90354753;
cnames_[16] = "chr17"; clens_[16] = 81195210;
cnames_[17] = "chr18"; clens_[17] = 78077248;
cnames_[18] = "chr19"; clens_[18] = 59128983;
cnames_[19] = "chr20"; clens_[19] = 63025520;
cnames_[20] = "chr21"; clens_[20] = 48129895;
cnames_[21] = "chr22"; clens_[21] = 51304566;
cnames_[22] = "chrX"; clens_[22] = 155270560;
cnames_[23] = "chrY"; clens_[23] = 59373566;
cnames_[0] = Genome::makeCanonical("1"); clens_[0] = 249250621;
cnames_[1] = Genome::makeCanonical("2"); clens_[1] = 243199373;
cnames_[2] = Genome::makeCanonical("3"); clens_[2] = 198022430;
cnames_[3] = Genome::makeCanonical("4"); clens_[3] = 191154276;
cnames_[4] = Genome::makeCanonical("5"); clens_[4] = 180915260;
cnames_[5] = Genome::makeCanonical("6"); clens_[5] = 171115067;
cnames_[6] = Genome::makeCanonical("7"); clens_[6] = 159138663;
cnames_[7] = Genome::makeCanonical("8"); clens_[7] = 146364022;
cnames_[8] = Genome::makeCanonical("9"); clens_[8] = 141213431;
cnames_[9] = Genome::makeCanonical("10"); clens_[9] = 135534747;
cnames_[10] = Genome::makeCanonical("11"); clens_[10] = 135006516;
cnames_[11] = Genome::makeCanonical("12"); clens_[11] = 133851895;
cnames_[12] = Genome::makeCanonical("13"); clens_[12] = 115169878;
cnames_[13] = Genome::makeCanonical("14"); clens_[13] = 107349540;
cnames_[14] = Genome::makeCanonical("15"); clens_[14] = 102531392;
cnames_[15] = Genome::makeCanonical("16"); clens_[15] = 90354753;
cnames_[16] = Genome::makeCanonical("17"); clens_[16] = 81195210;
cnames_[17] = Genome::makeCanonical("18"); clens_[17] = 78077248;
cnames_[18] = Genome::makeCanonical("19"); clens_[18] = 59128983;
cnames_[19] = Genome::makeCanonical("20"); clens_[19] = 63025520;
cnames_[20] = Genome::makeCanonical("21"); clens_[20] = 48129895;
cnames_[21] = Genome::makeCanonical("22"); clens_[21] = 51304566;
cnames_[22] = Genome::makeCanonical("X"); clens_[22] = 155270560;
cnames_[23] = Genome::makeCanonical("Y"); clens_[23] = 59373566;
} else {
cerr<<"Unknown genome '"<<org_name<<"'."<<endl;
}
Expand Down Expand Up @@ -95,15 +99,18 @@ string Genome::makeCanonical(string name)
{
string tmp = "";
for (int i = 0;i < name.length();i++) tmp += toupper(name[i]);
string ret = "chr";
if (tmp.substr(0,3) == "CHR") ret += name.substr(3);
else if (tmp.substr(0,5) == "CHROM") ret += name.substr(5);
else if (tmp.substr(0,10) == "CHROMOSOME") ret += name.substr(10);
else if (name.length() < 3) ret += name;
else ret = name;
if (ret == "chrx") ret = "chrX";
if (ret == "chry") ret = "chrY";
string ret = tmp;
if (tmp.substr(0,10) == "CHROMOSOME") ret = tmp.substr(10);
else if (tmp.substr(0,5) == "CHROM") ret = tmp.substr(5);
else if (tmp.substr(0,3) == "CHR") ret = tmp.substr(3);

return ret;
}

bool Genome::isSexChrom(string name)
{
string tmp = makeCanonical(name);
if (name == CHRSEX || name == CHRX || name == CHRY) return true;
return false;
}

7 changes: 5 additions & 2 deletions Genome.hh
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,23 @@ private:
static const int NGS = 2;
static Genome genomes[NGS];
string gname_,other_gname_;
static const int MAX_N_CHROMS_ = 1000;
static const int MAX_N_CHROMS_ = 100;
static const string CHRX, CHRY;
string cnames_[MAX_N_CHROMS_];
int clens_[MAX_N_CHROMS_];
int n_chr_;

public:
static const string CHRALL, CHRSEX;
static Genome *get(string name);
static string makeCanonical(string name);
static bool isSexChrom(string name);

public:
string name() { return gname_; }
int numChrom() { return n_chr_; }
string chromName(int i) { return (i >= 0 && i < n_chr_) ? cnames_[i] : ""; }
int chromLen(int i) { return (i >= 0 && i < n_chr_) ? clens_[i] : 0; }
int chromLen(int i) { return (i >= 0 && i < n_chr_) ? clens_[i] : 0; }
int getChromosomeIndex(string chr);
};

Expand Down
41 changes: 25 additions & 16 deletions Genotyper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Genotyper::Genotyper(HisMaker *maker,

{}

void Genotyper::printGenotype(TString chr,int start,int end,
void Genotyper::printGenotype(string chr,int start,int end,
bool useATcorr,bool useGCcorr)
{
if (!_maker) return;
Expand All @@ -23,49 +23,58 @@ void Genotyper::printGenotype(TString chr,int start,int end,
return;
}

TString nameSignal = _maker->getSignalName(chr, _bin,
string canon = Genome::makeCanonical(chr);
TString nameSignal = _maker->getSignalName(chr, _bin,
useATcorr,useGCcorr);
TString alt_nameSignal = _maker->getSignalName(canon,_bin,
useATcorr,useGCcorr);
TString nameDistr = _maker->getDistrName(chr, _bin,
useATcorr,useGCcorr);
TString alt_nameDistr = _maker->getDistrName(canon, _bin,
useATcorr,useGCcorr);
TString nameDistrAll = _maker->getDistrName(Genome::CHRALL,_bin,
useATcorr,useGCcorr);
TString nameDistr1000 = _maker->getDistrName(chr, 1000,
useATcorr,useGCcorr);
TString alt_nameDistr1000 = _maker->getDistrName(canon, 1000,
useATcorr,useGCcorr);
TString nameDistr1000All = _maker->getDistrName(Genome::CHRALL,1000,
useATcorr,useGCcorr);
TString nameDistr = _maker->getDistrName(chr, _bin,
useATcorr,useGCcorr);
TString nameDistrAll = _maker->getDistrName(chrAll,_bin,
useATcorr,useGCcorr);
TString nameDistr1000 = _maker->getDistrName(chr, 1000,
useATcorr,useGCcorr);
TString nameDistr1000All = _maker->getDistrName(chrAll,1000,
useATcorr,useGCcorr);

TString dirName = _maker->getDirName(_bin);
TString dirName1000 = _maker->getDirName(1000);

if (!_hisSignal || nameSignal != _hisSignal->GetName())
_hisSignal = _maker->getHistogram(nameSignal,_file,dirName);
_hisSignal = _maker->getHistogram(_file,dirName,nameSignal,alt_nameSignal);

if (!_hisDistr || nameDistr != _hisDistr->GetName()) {
_hisDistr = _maker->getHistogram(nameDistr,_file,dirName);
_hisDistr = _maker->getHistogram(_file,dirName,nameDistr,alt_nameDistr);
if (_hisDistr)
_maker->getMeanSigma(_hisDistr,_mean,_sigma);
}

if (!_hisDistrAll || nameDistrAll != _hisDistrAll->GetName()) {
_hisDistrAll = _maker->getHistogram(nameDistrAll,_file,dirName);
_hisDistrAll = _maker->getHistogram(_file,dirName,nameDistrAll);
if (_hisDistrAll)
_maker->getMeanSigma(_hisDistrAll,_meanAll,_sigmaAll);
}

if (!_hisDistr1000 || nameDistr1000 != _hisDistr1000->GetName()) {
_hisDistr1000 = _maker->getHistogram(nameDistr1000,_file,dirName1000);
_hisDistr1000 = _maker->getHistogram(_file,dirName1000,
nameDistr1000,alt_nameDistr1000);
if (_hisDistr1000)
_maker->getMeanSigma(_hisDistr1000,_mean1000,_sigma1000);
}

if (!_hisDistr1000All || nameDistr1000All != _hisDistr1000All->GetName()) {
_hisDistr1000All = _maker->getHistogram(nameDistr1000All,_file,dirName1000);
_hisDistr1000All = _maker->getHistogram(_file,dirName1000,
nameDistr1000All);
if (_hisDistr1000All)
_maker->getMeanSigma(_hisDistr1000All,_mean1000All,_sigma1000All);
}

double scale = 2;
if (chr == chrX || chr == chrY)
if (Genome::isSexChrom(chr))
if (_meanAll > 0 && _mean/_meanAll < 0.66) {
cout<<"Assuming male individual!"<<endl;
scale = 1;
Expand Down
2 changes: 1 addition & 1 deletion Genotyper.hh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ private:

public:
Genotyper(HisMaker *maker,TString file,int bin);
void printGenotype(TString chrom,int start,int end,
void printGenotype(string chrom,int start,int end,
bool useATcorr,bool useGCcorr);

private:
Expand Down
Loading

0 comments on commit cf13804

Please sign in to comment.