time | calls | line |
---|
| 1 | 5 | for chr = num_chrs
|
| 1 | 6 | if (chr_in_use(chr) == 1)
|
| 1 | 7 | for segment = 1:length(chrCopyNum{chr})
|
| 1 | 8 | chrSegment_peaks{ chr}{segment} = [];
|
| 1 | 9 | chrSegment_mostLikelyGaussians{chr}{segment} = [];
|
| 1 | 10 | chrSegment_actual_cutoffs{ chr}{segment} = [];
|
| 1 | 11 | chrSegment_smoothed{ chr}{segment} = [];
|
| 1 | 12 | end;
|
| 1 | 13 | end;
|
| 1 | 14 | end;
|
| 1 | 15 | for chr = 1:num_chrs
|
| 9 | 16 | if (chr_in_use(chr) == 1)
|
| 8 | 17 | chr_length = chr_size(chr);
|
| 8 | 18 | for segment = 1:length(chrCopyNum{chr})
|
| 12 | 19 | histAll_a = [];
|
| 12 | 20 | histAll_b = [];
|
| 12 | 21 | histAll2 = [];
|
| | 22 | % Look through all SNP data in every chr_bin, to determine if any are within the segment boundries.
|
| | 23 | % Speed up by only checking possible chr_bins has not been implmented.
|
| | 24 |
|
| 12 | 25 | fprintf( '^^^\n');
|
0.01 | 12 | 26 | fprintf(['^^^ chrID = ' num2str(chr) '\n']);
|
| 12 | 27 | fprintf(['^^^ segmentID = ' num2str(segment) '\n']);
|
| 12 | 28 | fprintf(['^^^ segment start = ' num2str(chr_breaks{chr}(segment )*chr_length) '\n']);
|
| 12 | 29 | fprintf(['^^^ segment end = ' num2str(chr_breaks{chr}(segment+1)*chr_length) '\n']);
|
| | 30 |
|
| | 31 | %% Construct and smooth a histogram of alleleic fraction data in the segment of interest.
|
| | 32 | % phased data is stored into arrays 'histAll_a' and 'histAll_b', since proper phasing is known.
|
| | 33 | % unphased data is stored inverted into the second array, since proper phasing is not known.
|
| 12 | 34 | for chr_bin = 1:length(CNVplot2{chr})
|
| | 35 | % 1 : phased SNP ratio data.
|
| | 36 | % 2 : unphased SNP ratio data.
|
| | 37 | % 3 : phased SNP position data.
|
| | 38 | % 4 : unphased SNP position data.
|
0.01 | 4576 | 39 | ratioData_phased = chr_SNPdata{chr,1}{chr_bin};
|
0.01 | 4576 | 40 | ratioData_unphased = chr_SNPdata{chr,2}{chr_bin};
|
0.03 | 4576 | 41 | coordinateData_phased = chr_SNPdata{chr,3}{chr_bin};
|
| 4576 | 42 | coordinateData_unphased = chr_SNPdata{chr,4}{chr_bin};
|
0.01 | 4576 | 43 | if (useHapmap)
|
| | 44 | if (length(ratioData_phased) > 0)
|
| | 45 | for SNP_in_bin = 1:length(ratioData_phased)
|
| | 46 | if ((coordinateData_phased(SNP_in_bin) > chr_breaks{chr}(segment)*chr_length) && (coordinateData_phased(SNP_in_bin) <= chr_breaks{chr}(segment+1)*chr_length))
|
| | 47 | % Ratio data is phased, so it is added twice in its proper orientation (to match density of unphased data below).
|
| | 48 | allelic_ratio = ratioData_phased(SNP_in_bin);
|
| | 49 | histAll_a = [histAll_a allelic_ratio ];
|
| | 50 | histAll_b = [histAll_b allelic_ratio ];
|
| | 51 | end;
|
| | 52 | end;
|
| | 53 | end;
|
| | 54 | end;
|
0.03 | 4576 | 55 | if (length(ratioData_unphased) > 0)
|
| 3817 | 56 | for SNP_in_bin = 1:length(ratioData_unphased)
|
1.10 | 106731 | 57 | if ((coordinateData_unphased(SNP_in_bin) > chr_breaks{chr}(segment)*chr_length) && (coordinateData_unphased(SNP_in_bin) <= chr_breaks{chr}(segment+1)*chr_length))
|
| | 58 | % Ratio data is unphased, so it is added evenly in both orientations.
|
0.10 | 63957 | 59 | allelic_ratio = ratioData_unphased(SNP_in_bin);
|
0.43 | 63957 | 60 | histAll_a = [histAll_a allelic_ratio ];
|
0.49 | 63957 | 61 | histAll_b = [histAll_b 1-allelic_ratio];
|
0.10 | 63957 | 62 | end;
|
0.16 | 106731 | 63 | end;
|
0.01 | 3817 | 64 | end;
|
| 4576 | 65 | end;
|
| | 66 |
|
| | 67 | % make a histogram of SNP allelic fractions in segment, then smooth for display.
|
| 12 | 68 | histAll = [histAll_a histAll_b];
|
| 12 | 69 | histAll(histAll == -1) = [];
|
| | 70 |
|
| | 71 | % Invert histogram values;
|
| 12 | 72 | histAll = 1-histAll;
|
| | 73 |
|
| | 74 | % add bounds to the histogram values.
|
| 12 | 75 | histAll = [histAll 0 1];
|
| | 76 |
|
| | 77 | % generate the histogram.
|
0.01 | 12 | 78 | data_hist = hist(histAll,200);
|
| 12 | 79 | endPoints_hist = hist([0 1],200);
|
| | 80 |
|
| | 81 | % remove the endpoints.
|
| 12 | 82 | data_hist = data_hist-endPoints_hist;
|
| | 83 |
|
| | 84 | % log-scale the histogram to minimize difference between hom & het peak heights.
|
| | 85 | % average this with the raw histogram so the large peaks still appear visibily larger than the small peaks.
|
| | 86 |
|
| 12 | 87 | data_hist = (data_hist + log(data_hist+1))/2;
|
| | 88 | % data_hist = log(data_hist+1);
|
| | 89 |
|
| | 90 | % smooth the histogram.
|
| 12 | 91 | smoothed = smooth_gaussian(data_hist,10,30);
|
| | 92 |
|
| | 93 | % flip the smoothed histogram left-right to make display consistent with values.
|
| 12 | 94 | smoothed = fliplr(smoothed);
|
| | 95 |
|
| | 96 | % scale the smoothed histogram to a max of 1.
|
| 12 | 97 | if (max(smoothed) > 0)
|
| 12 | 98 | smoothed = smoothed/max(smoothed);
|
| 12 | 99 | end;
|
| | 100 |
|
| | 101 | %% Calculate Gaussian fitting details for segment.
|
| 12 | 102 | segment_copyNum = round(chrCopyNum{chr}(segment)); % copy number estimate of this segment.
|
| 12 | 103 | segment_chrBreaks = chr_breaks{chr}(segment); % break points of this segment.
|
| 12 | 104 | segment_smoothedHistogram = smoothed; % whole chromosome allelic ratio histogram smoothed.
|
| | 105 |
|
| | 106 | % Define cutoffs between Gaussian fits.
|
| 12 | 107 | saveName = ['allelic_ratios.chr_' num2str(chr) '.seg_' num2str(segment)];
|
4.44 | 12 | 108 | [peaks,actual_cutoffs,mostLikelyGaussians] = FindGaussianCutoffs_3(workingDir,saveName, chr,segment, segment_copyNum,segment_smoothedHistogram, false);
|
| | 109 |
|
0.01 | 12 | 110 | fprintf(['^^^ copyNum = ' num2str(segment_copyNum ) '\n']);
|
0.02 | 12 | 111 | fprintf(['^^^ peaks = ' num2str(peaks ) '\n']);
|
| 12 | 112 | fprintf(['^^^ mostLikelyGaussians = ' num2str(mostLikelyGaussians) '\n']);
|
0.01 | 12 | 113 | fprintf(['^^^ actual_cutoffs = ' num2str(actual_cutoffs ) '\n']);
|
| | 114 |
|
| 12 | 115 | chrSegment_peaks{ chr}{segment} = peaks;
|
| 12 | 116 | chrSegment_mostLikelyGaussians{chr}{segment} = mostLikelyGaussians;
|
| 12 | 117 | chrSegment_actual_cutoffs{ chr}{segment} = actual_cutoffs;
|
| 12 | 118 | chrSegment_smoothed{ chr}{segment} = smoothed;
|
| 12 | 119 | end;
|
| 8 | 120 | end;
|
| 9 | 121 | end;
|