time | calls | line |
---|
| | 1 | function [imageX, imageY, imageC, imageD] = smoothhist2D_4_Xcorrected(x,y,lambda,nbins,rangeMax, XcorrectionVector, mean_val, scaler)
|
| | 2 | %
|
| | 3 | % This version allows user control of the colormap in use. Only standard matlab colormaps are valid.
|
| | 4 | % This version also allows the user to apply a x-correction to the datapoints before smoothing.
|
| | 5 | %
|
0.01 | 16 | 6 | colormap_ = 'hot';
|
| | 7 |
|
| | 8 | % SMOOTHHIST2D Plot a smoothed histogram of bivariate data.
|
| | 9 | % SMOOTHHIST2D(x,y,LAMBDA,NBINS) plots a smoothed histogram of the bivariate
|
| | 10 | % data in the vectores x and y. Rows correspond to observations. LAMBDA
|
| | 11 | % is a positive scalar smoothing parameter; higher values lead to more
|
| | 12 | % smoothing, values close to zero lead to a plot that is essentially just
|
| | 13 | % the raw data. NBINS is a two-element vector that determines the number
|
| | 14 | % of histogram bins in the horizontal and vertical directions.
|
| | 15 | %
|
| | 16 | % SMOOTHHIST2D(X,LAMBDA,NBINS,CUTOFF) plots outliers in the data as points
|
| | 17 | % overlaid on the smoothed histogram. Outliers are defined as points in
|
| | 18 | % regions where the smoothed density is less than (100*CUTOFF)% of the
|
| | 19 | % maximum density.
|
| | 20 | %
|
| | 21 | % SMOOTHHIST2D(X,LAMBDA,NBINS,[],'surf') plots a smoothed histogram as a
|
| | 22 | % surface plot. SMOOTHHIST2D ignores the CUTOFF input in this case, and
|
| | 23 | % the surface plot does not include outliers.
|
| | 24 | %
|
| | 25 | % SMOOTHHIST2D(X,LAMBDA,NBINS,CUTOFF,'image') plots the histogram as an
|
| | 26 | % image plot, the default.
|
| | 27 | %
|
| | 28 | % Example:
|
| | 29 | % X = [mvnrnd([0 5], [3 0; 0 3], 2000);
|
| | 30 | % mvnrnd([0 8], [1 0; 0 5], 2000);
|
| | 31 | % mvnrnd([3 5], [5 0; 0 1], 2000)];
|
| | 32 | % smoothhist2D(X,5,[100, 100],.05);
|
| | 33 | % smoothhist2D(X,5,[100, 100],[],'surf');
|
| | 34 | %
|
| | 35 | % Reference:
|
| | 36 | % Eilers, P.H.C. and Goeman, J.J (2004) "Enhancing scaterplots with
|
| | 37 | % smoothed densities", Bioinformatics 20(5):623-628.
|
| | 38 | % Copyright 2009 The MathWorks, Inc.
|
| | 39 | % Revision: 1.0 Date: 2006/12/12
|
| | 40 | %
|
| | 41 | % Requires MATLABÆ R14.
|
| | 42 |
|
| 16 | 43 | outliercutoff = 0;
|
| 16 | 44 | if (nargin < 6) || (isempty(outliercutoff))
|
| | 45 | outliercutoff = 0; %.02;
|
| | 46 | end;
|
| | 47 |
|
| | 48 | % data limited to 500,000 points in original version.
|
| 16 | 49 | data_limit = 1000000;
|
| 16 | 50 | if (length(x) > data_limit)
|
| | 51 | % randomize the order of data in the input vectors.
|
2.04 | 10 | 52 | randIndex = randperm(length(x));
|
0.14 | 10 | 53 | x_temp = x(randIndex);
|
0.16 | 10 | 54 | y_temp = y(randIndex);
|
| | 55 | % Limit the randomized data to the data_limit.
|
0.03 | 10 | 56 | x2 = x_temp(1:data_limit);
|
0.03 | 10 | 57 | y2 = y_temp(1:data_limit);
|
| 6 | 58 | else
|
| 6 | 59 | x2 = x;
|
| 6 | 60 | y2 = y;
|
| 6 | 61 | end;
|
| | 62 |
|
| | 63 |
|
0.12 | 16 | 64 | X = [x2;y2]';
|
| | 65 | %X = [x;y]';
|
| | 66 |
|
| | 67 |
|
0.05 | 16 | 68 | minx = min(X,[],1);
|
0.05 | 16 | 69 | maxx = max(X,[],1);
|
| 16 | 70 | if (rangeMax(1) > 0)
|
| 16 | 71 | if (maxx(1) > rangeMax(1));
|
| | 72 | maxx(1) = rangeMax(1);
|
| | 73 | end;
|
| 16 | 74 | end;
|
| 16 | 75 | if (rangeMax(2) > 0)
|
| 16 | 76 | if (maxx(2) > rangeMax(2));
|
| | 77 | maxx(2) = rangeMax(2);
|
| | 78 | end;
|
| 16 | 79 | end;
|
| 16 | 80 | edges1 = linspace(minx(1), maxx(1), nbins(1)+1);
|
| 16 | 81 | ctrs1 = edges1(1:end-1) + .5*diff(edges1);
|
| 16 | 82 | edges1 = [-Inf edges1(2:end-1) Inf];
|
| 16 | 83 | edges2 = linspace(minx(2), maxx(2), nbins(2)+1);
|
| 16 | 84 | ctrs2 = edges2(1:end-1) + .5*diff(edges2);
|
| 16 | 85 | edges2 = [-Inf edges2(2:end-1) Inf];
|
| | 86 |
|
| 16 | 87 | [n,p] = size(X);
|
0.01 | 16 | 88 | bin = zeros(n,2);
|
| | 89 | % Reverse the columns of H to put the first column of X along the
|
| | 90 | % horizontal axis, the second along the vertical.
|
0.48 | 16 | 91 | [dum,bin(:,2)] = histc(X(:,1),edges1);
|
0.29 | 16 | 92 | [dum,bin(:,1)] = histc(X(:,2),edges2);
|
| | 93 |
|
| | 94 | %% Accumulate points into 2D array.
|
0.07 | 16 | 95 | H = accumarray(bin,1,nbins([2 1]))./n;
|
| | 96 |
|
| | 97 |
|
| | 98 | %% Apply the X correction factor: this intended for use to normalize SNP data by CNV estimate.
|
| | 99 | % H : Accumulated data.
|
| | 100 | % XcorrectionVector : Input correction per X bin. The correction factor being squared was determined empirically.
|
| 16 | 101 | H_original = H;
|
| 16 | 102 | for i = 1:size(H,2)
|
0.01 | 6280 | 103 | H(:,i) = H(:,i)*(XcorrectionVector(i).^1); %2?
|
| 6280 | 104 | end;
|
| | 105 |
|
| | 106 |
|
| | 107 | %% Crop 2D histogram to three times the median.
|
| 16 | 108 | temp_H = H;
|
| 16 | 109 | temp_H(temp_H == 0) = [];
|
| 16 | 110 | max_val = mean_val*100;
|
| 16 | 111 | H(H > max_val) = max_val;
|
| | 112 |
|
| | 113 |
|
| | 114 | %% Trouble-shooting outputs.
|
| | 115 | % testCorrection_length = length(XcorrectionVector)
|
| | 116 | % testDataset_length = length(H)
|
| | 117 | % testCorrection = XcorrectionVector
|
| | 118 | % testDataset = H
|
| | 119 | % H_median = median(H(:))
|
| | 120 | % H_mode = mode(H(:))
|
| | 121 | % H_mean = mean(H(:))
|
| | 122 |
|
| | 123 |
|
| | 124 | %% Perform smoothing of 2D histogram: Eiler's 1D smooth, twice
|
0.04 | 16 | 125 | G = smooth1D(H,lambda)*10000;
|
0.49 | 16 | 126 | F = smooth1D(G',lambda)';
|
| | 127 |
|
| | 128 | % relF = F./max(F(:));
|
0.02 | 16 | 129 | relF = F/max(max(F));
|
| 16 | 130 | if outliercutoff > 0
|
| | 131 | outliers = (relF(nbins(2)*(bin(:,2)-1)+bin(:,1)) < outliercutoff);
|
| | 132 | end
|
| | 133 |
|
| | 134 | %% Scale data by input variable "scaler", which is used to correct for per-chromosome differences in data levels.
|
| 16 | 135 | relF = relF*scaler;
|
| | 136 |
|
| 16 | 137 | nc = 256;
|
| | 138 |
|
| 16 | 139 | imageX = ctrs1;
|
| 16 | 140 | imageY = ctrs2;
|
0.04 | 16 | 141 | imageC = floor(nc*2*log(relF+1));
|
0.01 | 16 | 142 | imageD = H_original;
|
| | 143 |
|
| 16 | 144 | switch lower(colormap_)
|
| 16 | 145 | case 'jet'
|
| | 146 | colormap(jet(nc));
|
| 16 | 147 | case 'hsv'
|
| | 148 | colormap(hsv(nc));
|
| 16 | 149 | case 'hot'
|
0.03 | 16 | 150 | colormap(hot(nc));
|
| | 151 | case 'cool'
|
| | 152 | colormap(cool(nc));
|
| | 153 | case 'spring'
|
| | 154 | colormap(spring(nc));
|
| | 155 | case 'summer'
|
| | 156 | colormap(summer(nc));
|
| | 157 | case 'autumn'
|
| | 158 | colormap(autumn(nc));
|
| | 159 | case 'winter'
|
| | 160 | colormap(winter(nc));
|
| | 161 | case 'gray'
|
| | 162 | colormap(gray(nc));
|
| | 163 | case 'bone'
|
| | 164 | colormap(bone(nc));
|
| | 165 | case 'copper'
|
| | 166 | colormap(copper(nc));
|
| | 167 | case 'pink'
|
| | 168 | colormap(pink(nc));
|
| | 169 | case 'lines'
|
| | 170 | colormap(lines(nc));
|
| | 171 | end;
|
Other subfunctions in this file are not included in this listing.