time | calls | line |
---|
| | 1 | function [] = allelic_ratios_WGseq(main_dir,user,genomeUser,project,parent,hapmap,genome,ploidyEstimateString,ploidyBaseString,SNP_verString,LOH_verString,CNV_verString,displayBREAKS);
|
0.07 | 1 | 2 | addpath('../');
|
| | 3 |
|
| 1 | 4 | fprintf(['main_dir = "' main_dir '"\n']);
|
| 1 | 5 | fprintf(['user = "' user '"\n']);
|
| 1 | 6 | fprintf(['genomeUser = "' genomeUser '"\n']);
|
| 1 | 7 | fprintf(['project = "' project '"\n']);
|
| 1 | 8 | fprintf(['parent = "' parent '"\n']);
|
| 1 | 9 | fprintf(['hapmap = "' hapmap '"\n']);
|
| 1 | 10 | fprintf(['genome = "' genome '"\n']);
|
| 1 | 11 | fprintf(['ploidyEstimateString = "' ploidyEstimateString '"\n']);
|
| 1 | 12 | fprintf(['ploidyBaseString = "' ploidyBaseString '"\n']);
|
| | 13 |
|
| 1 | 14 | fprintf('\n\n\t*===============================================================*\n');
|
| 1 | 15 | fprintf( '\t| Fireplot generation in script "allelic_ratios_WGseq.m". |\n');
|
| 1 | 16 | fprintf( '\t*---------------------------------------------------------------*\n');
|
| 1 | 17 | tic;
|
| 1 | 18 | fprintf('\t|\tGenerating FirePlot of SNP allelic ratio data across genome.\n');
|
| | 19 | %% ========================================================================
|
| | 20 | % Centromere_format : Controls how centromeres are depicted. [0..2] '2' is pinched cartoon default.
|
| | 21 | % bases_per_bin : Controls bin sizes for SNP/CGH fractions of plot.
|
| | 22 | % scale_type : 'Ratio' or 'Log2Ratio' y-axis scaling of copy number.
|
| | 23 | % 'Log2Ratio' does not properly scale CGH data by ploidy.
|
| | 24 | % Chr_max_width : max width of chrs as fraction of figure width.
|
| 1 | 25 | Centromere_format = 0;
|
| 1 | 26 | Chr_max_width = 0.8;
|
| 1 | 27 | colorBars = true;
|
| 1 | 28 | blendColorBars = false;
|
| 1 | 29 | show_annotations = true;
|
| 1 | 30 | Yscale_nearest_even_ploidy = true;
|
| 1 | 31 | Linear_display = true;
|
| 1 | 32 | Linear_displayBREAKS = false;
|
| | 33 |
|
| 1 | 34 | projectDir = [main_dir 'users/' user '/projects/' project '/'];
|
| 1 | 35 | genomeDir = [main_dir 'users/' genomeUser '/genomes/' genome '/'];
|
| | 36 |
|
| | 37 |
|
| 1 | 38 | fprintf('\t|\tDetermine if hapmap is in use.\n');
|
| | 39 | %
|
| | 40 | % For right now, ('parent' == 'hapmap') always because of earlier mixed use of variables.
|
| | 41 | % Determine if 'hapmap' is in use by checking user and system hapmap directories.
|
| | 42 | %
|
| | 43 | % Possible error case where 'parent' and 'hapmap' have same name string.
|
| | 44 | % Will be resolved with later disambiguation of parent/hapmap variable earlier in module.
|
| | 45 | %
|
0.01 | 1 | 46 | if (exist([main_dir 'users/default/hapmaps/' hapmap '/'], 'dir') == 7)
|
| | 47 | hapmapDir = [main_dir 'users/default/hapmaps/' hapmap '/']; % system hapmap.
|
| | 48 | useHapmap = true;
|
| 1 | 49 | elseif (exist([main_dir 'users/' user '/hapmaps/' hapmap '/'], 'dir') == 7)
|
| | 50 | hapmapDir = [main_dir 'users/' user '/hapmaps/' hapmap '/']; % user hapmap.
|
| | 51 | useHapmap = true;
|
| 1 | 52 | else
|
| 1 | 53 | useHapmap = false;
|
| 1 | 54 | end;
|
| | 55 |
|
| | 56 |
|
| 1 | 57 | fprintf('\t|\tDetermine if parent project is in use.\n');
|
| | 58 | %
|
| | 59 | % The 'parent' will == the 'project' when no 'parent' is selected in setup.
|
| | 60 | %
|
| 1 | 61 | if (strcmp(project,parent) == 0)
|
| 1 | 62 | useParent = true;
|
| 1 | 63 | if (exist([main_dir 'users/default/projects/' parent '/'], 'dir') == 7)
|
| | 64 | parentDir = [main_dir 'users/default/projects/' parent '/']; % system parent.
|
| 1 | 65 | else
|
| 1 | 66 | parentDir = [main_dir 'users/' user '/projects/' parent '/']; % user parent.
|
| 1 | 67 | end;
|
| | 68 | else
|
| | 69 | useParent = false;
|
| | 70 | parentDir = projectDir;
|
| | 71 | end;
|
| | 72 |
|
| | 73 |
|
| 1 | 74 | fprintf('\t|\tLoading dataset information.\n');
|
0.07 | 1 | 75 | [centromeres, chr_sizes, figure_details, annotations, ploidy_default] = Load_genome_information(genomeDir);
|
| 1 | 76 | [Aneuploidy] = Load_dataset_information(projectDir);
|
| | 77 |
|
| 1 | 78 | num_chrs = length(chr_sizes);
|
| 1 | 79 | for chrID = 1:length(chr_sizes)
|
| 9 | 80 | chr_size( chrID) = 0;
|
| 9 | 81 | cen_start(chrID) = 0;
|
| 9 | 82 | cen_end( chrID) = 0;
|
| 9 | 83 | end;
|
| 1 | 84 | for chrID = 1:length(chr_sizes)
|
| 9 | 85 | chr_size(chr_sizes( chrID).chr) = chr_sizes( chrID).size;
|
| 9 | 86 | cen_start(centromeres(chrID).chr) = centromeres(chrID).start;
|
| 9 | 87 | cen_end(centromeres( chrID).chr) = centromeres(chrID).end;
|
| 9 | 88 | end
|
| 1 | 89 | if (length(annotations) > 0)
|
| | 90 | fprintf(['\nAnnotations for ' genome '.\n']);
|
| | 91 | for annoteID = 1:length(annotations)
|
| | 92 | annotation_chr( annoteID) = annotations(annoteID).chr;
|
| | 93 | annotation_type{ annoteID} = annotations(annoteID).type;
|
| | 94 | annotation_start( annoteID) = annotations(annoteID).start;
|
| | 95 | annotation_end( annoteID) = annotations(annoteID).end;
|
| | 96 | annotation_fillcolor{annoteID} = annotations(annoteID).fillcolor;
|
| | 97 | annotation_edgecolor{annoteID} = annotations(annoteID).edgecolor;
|
| | 98 | annotation_size( annoteID) = annotations(annoteID).size;
|
| | 99 | fprintf(['\t[' num2str(annotations(annoteID).chr) ':' annotations(annoteID).type ':' num2str(annotations(annoteID).start) ':' ...
|
| | 100 | num2str(annotations(annoteID).end) ':' annotations(annoteID).fillcolor ':' annotations(annoteID).edgecolor ':' num2str(annotations(annoteID).size) ']\n']);
|
| | 101 | end;
|
| | 102 | end;
|
| 1 | 103 | for figureDetailID = 1:length(figure_details)
|
0.01 | 8 | 104 | if (figure_details(figureDetailID).chr == 0)
|
| | 105 | if (strcmp(figure_details(figureDetailID).label,'Key') == 1)
|
| | 106 | key_posX = figure_details(figureDetailID).posX;
|
| | 107 | key_posY = figure_details(figureDetailID).posY;
|
| | 108 | key_width = figure_details(figureDetailID).width;
|
| | 109 | key_height = figure_details(figureDetailID).height;
|
| | 110 | end;
|
| 8 | 111 | else
|
| 8 | 112 | chr_id (figure_details(figureDetailID).chr) = figure_details(figureDetailID).chr;
|
| 8 | 113 | chr_label {figure_details(figureDetailID).chr} = figure_details(figureDetailID).label;
|
| 8 | 114 | chr_name {figure_details(figureDetailID).chr} = figure_details(figureDetailID).name;
|
| 8 | 115 | chr_posX (figure_details(figureDetailID).chr) = figure_details(figureDetailID).posX;
|
| 8 | 116 | chr_posY (figure_details(figureDetailID).chr) = figure_details(figureDetailID).posY;
|
| 8 | 117 | chr_width (figure_details(figureDetailID).chr) = figure_details(figureDetailID).width;
|
| 8 | 118 | chr_height(figure_details(figureDetailID).chr) = figure_details(figureDetailID).height;
|
| 8 | 119 | chr_in_use(figure_details(figureDetailID).chr) = str2num(figure_details(figureDetailID).useChr);
|
| 8 | 120 | end;
|
| 8 | 121 | end;
|
| | 122 |
|
| | 123 | %% This block is normally calculated in FindChrSizes_2 in CNV analysis.
|
| 1 | 124 | for usedChr = 1:num_chrs
|
| 9 | 125 | if (chr_in_use(usedChr) == 1)
|
| | 126 | % determine where the endpoints of ploidy segments are.
|
| 8 | 127 | chr_breaks{usedChr}(1) = 0.0;
|
| 8 | 128 | break_count = 1;
|
| 8 | 129 | if (length(Aneuploidy) > 0)
|
| 8 | 130 | for i = 1:length(Aneuploidy)
|
| 48 | 131 | if (Aneuploidy(i).chr == usedChr)
|
| 6 | 132 | break_count = break_count+1;
|
| 6 | 133 | chr_broken = true;
|
| 6 | 134 | chr_breaks{usedChr}(break_count) = Aneuploidy(i).break;
|
| 6 | 135 | end;
|
| 48 | 136 | end;
|
| 8 | 137 | end;
|
| 8 | 138 | chr_breaks{usedChr}(length(chr_breaks{usedChr})+1) = 1;
|
| 8 | 139 | end;
|
| 9 | 140 | end;
|
| | 141 |
|
| | 142 |
|
| | 143 | %%================================================================================================
|
| | 144 | % Load FASTA file name from 'reference.txt' file for project.
|
| | 145 | %-------------------------------------------------------------------------------------------------
|
| 1 | 146 | fprintf('\t|\tLoad FASTA file name for project.\n');
|
| 1 | 147 | userReference = [main_dir 'users/' user '/genomes/' genome '/reference.txt'];
|
| 1 | 148 | defaultReference = [main_dir 'users/default/genomes/' genome '/reference.txt'];
|
| 1 | 149 | if (exist(userReference,'file') == 0)
|
| | 150 | FASTA_string = strtrim(fileread(defaultReference));
|
| 1 | 151 | else
|
| 1 | 152 | FASTA_string = strtrim(fileread(userReference));
|
| 1 | 153 | end;
|
| 1 | 154 | [FastaPath,FastaName,FastaExt] = fileparts(FASTA_string);
|
| | 155 |
|
| | 156 |
|
| | 157 | %%================================================================================================
|
| | 158 | % Preallocate data vectors the length of each chromosome.
|
| | 159 | %-------------------------------------------------------------------------------------------------
|
| 1 | 160 | fprintf('\t|\tPreallocating data vectors the length of each chromosome.\n');
|
| 1 | 161 | chr_SNP_data_positions = cell(length(chr_size),1);
|
| 1 | 162 | chr_SNP_data_ratios = cell(length(chr_size),1);
|
| 1 | 163 | chr_count = cell(length(chr_size),1);
|
| 1 | 164 | for chrID = 1:length(chr_size)
|
| 9 | 165 | if (chr_in_use(chrID) == 1)
|
0.02 | 8 | 166 | chr_SNP_data_positions{chrID} = zeros(chr_size(chrID),1);
|
0.02 | 8 | 167 | chr_SNP_data_ratios{ chrID} = zeros(chr_size(chrID),1);
|
0.01 | 8 | 168 | chr_count{ chrID} = zeros(chr_size(chrID),1);
|
| 8 | 169 | chr_lines_analyzed( chrID) = 0;
|
| 8 | 170 | end;
|
| 9 | 171 | end;
|
| | 172 |
|
| | 173 |
|
| | 174 | %%================================================================================================
|
| | 175 | % Process project 1 dataset.
|
| | 176 | %-------------------------------------------------------------------------------------------------
|
| 1 | 177 | if (useHapmap)
|
| | 178 | % Load only putative SNP data corresponding to hapmap loci.
|
| | 179 | fprintf('\t|\tLoad SNP information from "trimmed_SNPs_v5.txt" file for project.\n');
|
| | 180 | fprintf('\t|\t\t');
|
| | 181 | datafile = [projectDir 'trimmed_SNPs_v5.txt'];
|
| 1 | 182 | else
|
| | 183 | % Load all putative SNP data.
|
| 1 | 184 | fprintf('\t|\tLoad SNP information from "putative_SNPs_v4.txt" file for project.\n');
|
| 1 | 185 | fprintf('\t|\t\t');
|
| 1 | 186 | datafile = [projectDir 'putative_SNPs_v4.txt'];
|
| 1 | 187 | end;
|
| | 188 |
|
| 1 | 189 | data = fopen(datafile, 'r');
|
| 1 | 190 | count = 0;
|
| 1 | 191 | old_chr = 0;
|
| 1 | 192 | gap_string = '';
|
| | 193 | % reading the line before checking for end of file to avoid reading empty
|
| | 194 | % file
|
| 1 | 195 | dataLine = fgetl(data);
|
| 1 | 196 | while not (feof(data))
|
14.49 | 4716127 | 197 | if (length(dataLine) > 0)
|
| | 198 | % process the loaded line into data channels.
|
| | 199 | % if using hapmap no SNP reference is in the file so avoid reading
|
| | 200 | % it
|
8.48 | 4716127 | 201 | if (useHapmap)
|
| | 202 | lineVariables = textscan(dataLine, '%s %d %d %d %d %d');
|
| | 203 | SNP_chr_name = lineVariables{1}{1};
|
| | 204 | SNP_coordinate = lineVariables{2};
|
| | 205 | SNP_countA = lineVariables{3};
|
| | 206 | SNP_countT = lineVariables{4};
|
| | 207 | SNP_countG = lineVariables{5};
|
| | 208 | SNP_countC = lineVariables{6};
|
6.68 | 4716127 | 209 | else
|
138.22 | 4716127 | 210 | lineVariables = textscan(dataLine, '%s %d %s %d %d %d %d');
|
30.24 | 4716127 | 211 | SNP_chr_name = lineVariables{1}{1};
|
13.28 | 4716127 | 212 | SNP_coordinate = lineVariables{2};
|
18.83 | 4716127 | 213 | SNP_reference = lineVariables{3}{1};
|
12.68 | 4716127 | 214 | SNP_countA = lineVariables{4};
|
12.20 | 4716127 | 215 | SNP_countT = lineVariables{5};
|
12.38 | 4716127 | 216 | SNP_countG = lineVariables{6};
|
12.25 | 4716127 | 217 | SNP_countC = lineVariables{7};
|
12.55 | 4716127 | 218 | end;
|
21.61 | 4716127 | 219 | chr_num = strcmp(SNP_chr_name, chr_name);
|
| | 220 | % running only if chromsome was found meaning at least 1 cell is not zero (inheritnley igonres ###
|
| | 221 | % lines that are used for comments)
|
32.19 | 4716127 | 222 | if (any(chr_num) > 0)
|
17.51 | 4676452 | 223 | count = count+1;
|
14.16 | 4676452 | 224 | if (~isequal(old_chr,chr_num))
|
| 8 | 225 | fprintf(['\n\t|\t' SNP_chr_name '\n\t|\t' gap_string]);
|
| 8 | 226 | end;
|
6.76 | 4676452 | 227 | if (mod(count,300) == 0)
|
1.39 | 15588 | 228 | fprintf('.');
|
0.79 | 15588 | 229 | gap_string = [gap_string ' '];
|
0.25 | 15588 | 230 | end;
|
5.80 | 4676452 | 231 | if (count == 24000)
|
0.02 | 194 | 232 | fprintf('\n\t|\t');
|
| 194 | 233 | count = 0;
|
| 194 | 234 | gap_string = '';
|
| 194 | 235 | end;
|
10.22 | 4676452 | 236 | count_vector = [SNP_countA SNP_countT SNP_countG SNP_countC];
|
29.49 | 4676452 | 237 | chr_lines_analyzed(chr_num) = chr_lines_analyzed(chr_num)+1;
|
26.72 | 4676452 | 238 | chr_SNP_data_positions{chr_num}(chr_lines_analyzed(chr_num)) = SNP_coordinate;
|
57.95 | 4676452 | 239 | chr_SNP_data_ratios {chr_num}(chr_lines_analyzed(chr_num)) = max(count_vector)/sum(count_vector);
|
29.29 | 4676452 | 240 | chr_count {chr_num}(chr_lines_analyzed(chr_num)) = sum(count_vector);
|
12.59 | 4676452 | 241 | old_chr = chr_num;
|
7.01 | 4676452 | 242 | end;
|
8.60 | 4716127 | 243 | end;
|
| | 244 | % read next line
|
187.08 | 4716127 | 245 | dataLine = fgetl(data);
|
21.38 | 4716127 | 246 | end;
|
| 1 | 247 | fclose(data);
|
| | 248 |
|
| | 249 | %%================================================================================================
|
| | 250 | % Clean up data vectors.
|
| | 251 | %-------------------------------------------------------------------------------------------------
|
| 1 | 252 | fprintf('\n\t|\tClean up data vectors.\n');
|
| 1 | 253 | for chrID = 1:length(chr_size)
|
| 9 | 254 | if (chr_in_use(chrID) == 1)
|
0.10 | 8 | 255 | chr_SNP_data_ratios{ chrID}(chr_SNP_data_positions{chrID} == 0) = [];
|
0.09 | 8 | 256 | chr_count{ chrID}(chr_SNP_data_positions{chrID} == 0) = [];
|
0.08 | 8 | 257 | chr_SNP_data_positions{chrID}(chr_SNP_data_positions{chrID} == 0) = [];
|
0.05 | 8 | 258 | chr_SNP_data_ratios{ chrID}(chr_count{chrID} <= 20) = [];
|
0.02 | 8 | 259 | chr_SNP_data_positions{chrID}(chr_count{chrID} <= 20) = [];
|
0.02 | 8 | 260 | chr_count{ chrID}(chr_count{chrID} <= 20) = [];
|
| 8 | 261 | end;
|
| 9 | 262 | end;
|
| | 263 |
|
| | 264 |
|
| | 265 | %%================================================================================================
|
| | 266 | % Save processed SNP/LOH data file.
|
| | 267 | %-------------------------------------------------------------------------------------------------
|
| | 268 | % chr_SNP_data_ratios : allelic ratios of SNP data.
|
| | 269 | % chr_SNP_data_positions : coordinates of SNP data.
|
| | 270 | % chr_count : number of chromosomes in dataset.
|
| | 271 | %
|
| 1 | 272 | fprintf('\t|\tSave processed SNP/LOH data to file "SNP_v4.all1.mat" for project.\n');
|
1.52 | 1 | 273 | save([projectDir 'SNP_' SNP_verString '.all1.mat'],'chr_SNP_data_ratios','chr_SNP_data_positions','chr_count');
|
| | 274 |
|
| | 275 |
|
| | 276 | %%================================================================================================
|
| | 277 | % Setup basic figure parameters.
|
| | 278 | %-------------------------------------------------------------------------------------------------
|
| 1 | 279 | fprintf('\t|\tDefine basic figure parameters, not specific to genome.\n');
|
| | 280 | % basic plot parameters not defined per genome.
|
| 1 | 281 | TickSize = 0; % -0.005; %negative for outside, percentage of longest chr figure.
|
| 1 | 282 | bases_per_bin = max(chr_size)/700;
|
| 1 | 283 | maxY = 50; % number of Y-bins in 2D smoothed histogram.
|
| 1 | 284 | cen_tel_Xindent = 5;
|
| 1 | 285 | cen_tel_Yindent = maxY/5;
|
| 1 | 286 | largestChr = find(chr_width == max(chr_width));
|
| 1 | 287 | largestChr = largestChr(1);
|
| | 288 |
|
| | 289 |
|
| | 290 | %%================================================================================================
|
| | 291 | % Setup for main-view figure generation.
|
| | 292 | %-------------------------------------------------------------------------------------------------
|
| | 293 | % load size definitions
|
| 1 | 294 | [linear_fig_height,linear_fig_width,Linear_left_start,Linear_chr_gap,Linear_Chr_max_width,Linear_height...
|
| | 295 | ,Linear_base,rotate,linear_chr_font_size,linear_axis_font_size,linear_gca_font_size,stacked_fig_height,...
|
| | 296 | stacked_fig_width,stacked_chr_font_size,stacked_title_size,stacked_axis_font_size,...
|
| | 297 | gca_stacked_font_size,stacked_copy_font_size,max_chrom_label_size] = Load_size_info(chr_in_use,num_chrs,chr_label,chr_size);
|
| | 298 |
|
| 1 | 299 | fprintf('\t|\tInitialize main figure.\n');
|
0.04 | 1 | 300 | fig = figure(1);
|
| | 301 |
|
| | 302 | %%================================================================================================
|
| | 303 | % Setup for linear-view figure generation.
|
| | 304 | %-------------------------------------------------------------------------------------------------
|
| 1 | 305 | if (Linear_display == true)
|
| 1 | 306 | fprintf('\t|\tInitialize linear figure.\n');
|
0.04 | 1 | 307 | Linear_fig = figure(2);
|
| 1 | 308 | Linear_genome_size = sum(chr_size);
|
| 1 | 309 | Linear_TickSize = -0.01; % negative for outside, percentage of longest chr figure.
|
| 1 | 310 | maxY = 50; % number of Y-bins in 2D smoothed histogram.
|
| 1 | 311 | Linear_left = Linear_left_start;
|
| 1 | 312 | axisLabelPosition_horiz = 0.01125;
|
| 1 | 313 | end;
|
| 1 | 314 | axisLabelPosition_vert = 0.01125;
|
| | 315 |
|
| | 316 |
|
| | 317 | %%================================================================================================
|
| | 318 | % Make figures
|
| | 319 | %-------------------------------------------------------------------------------------------------
|
| 1 | 320 | first_chr = true;
|
| | 321 |
|
| | 322 | %% Determine statistics of data density across entire genome.
|
| 1 | 323 | fprintf('\t|\tDetermine statistics of data density for chromosomes.\n');
|
| 1 | 324 | all_data = [];
|
| 1 | 325 | chr_mean = zeros(1,num_chrs);
|
| 1 | 326 | chr_mean_scaler = zeros(1,num_chrs);
|
| 1 | 327 | for chr = 1:num_chrs
|
| 9 | 328 | if (chr_in_use(chr) == 1)
|
| 8 | 329 | chr_length = ceil(chr_size(chr)/bases_per_bin);
|
0.10 | 8 | 330 | dataX = ceil(chr_SNP_data_positions{chr}/bases_per_bin)';
|
| 8 | 331 | dataY1 = chr_SNP_data_ratios{chr};
|
| 8 | 332 | dataY2 = (dataY1*maxY)';
|
| 8 | 333 | dataX_CNVcorrection = ones(1,chr_length);
|
| 8 | 334 | if (length(dataX) > 0)
|
| | 335 | % 2D smoothed hisogram with correction term.
|
2.19 | 8 | 336 | [imageX{chr},imageY{chr},imageC{chr}, imageD{chr}] = smoothhist2D_4_Xcorrected([dataX dataX 0 chr_length], [dataY2 (maxY-dataY2) 0 0], 0.5,[chr_length maxY],[chr_length maxY], dataX_CNVcorrection, 1,1);
|
| 8 | 337 | all_data = [all_data imageD{chr}];
|
0.01 | 8 | 338 | end;
|
| 8 | 339 | if (length(dataX) > 0)
|
| 8 | 340 | chr_mean(chr) = mean(imageD{chr}(:));
|
| | 341 | else
|
| | 342 | chr_mean(chr) = 0;
|
| | 343 | end;
|
| 8 | 344 | fprintf(['\t|\t\tChr' num2str(chr) ' smoothhist2D average value = ' num2str(chr_mean) '\n']);
|
| 8 | 345 | end;
|
0.01 | 9 | 346 | end;
|
| 1 | 347 | max_mean = max(chr_mean);
|
| 1 | 348 | for chr = 1:num_chrs
|
| 9 | 349 | if (chr_in_use(chr) == 1 && chr_mean(chr) ~= 0)
|
| 8 | 350 | chr_mean_scaler(chr) = max_mean/chr_mean(chr);
|
| 1 | 351 | else
|
| 1 | 352 | chr_mean_scaler(chr) = 0;
|
| 1 | 353 | end;
|
| 9 | 354 | end;
|
0.01 | 1 | 355 | median_val = median(all_data(:));
|
| 1 | 356 | mean_val = mean(all_data(:));
|
0.01 | 1 | 357 | mode_val = mode(all_data(:));
|
| 1 | 358 | min_val = min(all_data(:));
|
| 1 | 359 | max_val = max(all_data(:));
|
| | 360 |
|
| | 361 |
|
| | 362 | %% Generate chromosome figures.
|
| 1 | 363 | fprintf('\t|\tGenerate final chromosome figures.\n');
|
| 1 | 364 | for chr = 1:num_chrs
|
| 9 | 365 | if (chr_in_use(chr) == 1)
|
| 8 | 366 | figure(fig);
|
| | 367 |
|
| | 368 | % make standard chr cartoons.
|
| 8 | 369 | left = chr_posX(chr);
|
| 8 | 370 | bottom = chr_posY(chr);
|
| 8 | 371 | width = chr_width(chr);
|
| 8 | 372 | height = chr_height(chr);
|
0.04 | 8 | 373 | subplot('Position',[left bottom width height]);
|
| 8 | 374 | hold on;
|
| | 375 |
|
| | 376 | % standard : axes labels etc.
|
| 8 | 377 | xlim([0,chr_size(chr)/bases_per_bin]);
|
| | 378 |
|
| | 379 | %% standard : modify y axis limits to show annotation locations if any are provided.
|
| 8 | 380 | if (length(annotations) > 0)
|
| | 381 | ylim([-maxY/10*1.5,maxY]);
|
| 8 | 382 | else
|
| 8 | 383 | ylim([0,maxY]);
|
| 8 | 384 | end;
|
| 8 | 385 | set(gca,'YTick',[]);
|
| 8 | 386 | set(gca,'YTickLabel',[]);
|
| 8 | 387 | set(gca,'TickLength',[(TickSize*chr_size(largestChr)/chr_size(chr)) 0]); %ensures same tick size on all subfigs.
|
| 8 | 388 | text(-50000/5000/2*3, maxY/2, chr_label{chr}, 'Rotation',90, 'HorizontalAlignment','center', 'VerticalAlign','bottom', 'Fontsize',stacked_chr_font_size);
|
| 8 | 389 | set(gca,'XTick',0:(40*(5000/bases_per_bin)):(650*(5000/bases_per_bin)));
|
| 8 | 390 | set(gca,'XTickLabel',{'0.0','0.2','0.4','0.6','0.8','1.0','1.2','1.4','1.6','1.8','2.0','2.2','2.4','2.6','2.8','3.0','3.2'});
|
| 8 | 391 | text(axisLabelPosition_vert, maxY/4*0, '0' ,'HorizontalAlignment','right','Fontsize',stacked_axis_font_size);
|
| 8 | 392 | text(axisLabelPosition_vert, maxY/4*1, '1/4','HorizontalAlignment','right','Fontsize',stacked_axis_font_size);
|
0.01 | 8 | 393 | text(axisLabelPosition_vert, maxY/4*2, '1/2','HorizontalAlignment','right','Fontsize',stacked_axis_font_size);
|
| 8 | 394 | text(axisLabelPosition_vert, maxY/4*3, '3/4','HorizontalAlignment','right','Fontsize',stacked_axis_font_size);
|
0.01 | 8 | 395 | text(axisLabelPosition_vert, maxY/4*4, '1' ,'HorizontalAlignment','right','Fontsize',stacked_axis_font_size);
|
| | 396 |
|
| 8 | 397 | set(gca,'FontSize',gca_stacked_font_size);
|
| 8 | 398 | if (chr == find(chr_posY == max(chr_posY)))
|
0.01 | 1 | 399 | title([ project ' allelic fraction map'],'Interpreter','none','FontSize',stacked_title_size);
|
| 1 | 400 | end;
|
| | 401 | % standard : end axes labels etc.
|
| | 402 |
|
| | 403 | % standard : show allelic ratio data.
|
| 8 | 404 | chr_length = ceil(chr_size(chr)/bases_per_bin);
|
0.04 | 8 | 405 | dataX = ceil(chr_SNP_data_positions{chr}/bases_per_bin)';
|
| 8 | 406 | dataY1 = chr_SNP_data_ratios{chr};
|
0.04 | 8 | 407 | dataY2 = (dataY1*maxY)';
|
0.01 | 8 | 408 | dataX_CNVcorrection = ones(1,chr_length);;
|
| | 409 |
|
| 8 | 410 | if (length(dataX) > 0)
|
| | 411 | % 2D smoothed hisogram with correction term.
|
| 8 | 412 | fprintf(['\t|\t\tGenerating chr' num2str(chr) ' final smoothed 2D histogram.\n']);
|
2.11 | 8 | 413 | [imageX{chr},imageY{chr},imageC{chr}, discard] = smoothhist2D_4_Xcorrected([dataX dataX 0 chr_length], [dataY2 (maxY-dataY2) 0 0], 0.5,[chr_length maxY],[chr_length maxY], dataX_CNVcorrection, mean_val, chr_mean_scaler(chr));
|
| | 414 |
|
| 8 | 415 | fprintf('\t|\t\t\tDe-emphasizing near-homozygous data.\n');
|
| | 416 | % Image correction method to de-emphasize the near homozygous data points.
|
| | 417 | % The square factor correction was determined empirically, from the relative amounts of data near homozygous and heterozygous.
|
| | 418 | % Improvements in sequencing technology that reduce sequencing error and reduce near-homozygous data will require adjusting this.
|
| 8 | 419 | imageC_correction = imageC{chr}*0;
|
| 8 | 420 | for y = 1:maxY
|
0.01 | 400 | 421 | imageC_correction(y,:) = 1-abs(y-maxY/2)/(maxY/2);
|
| 400 | 422 | end;
|
| 8 | 423 | imageC{chr} = imageC{chr}.*(1+imageC_correction.^2*16);
|
| | 424 |
|
0.01 | 8 | 425 | fprintf('\t|\t\t\tDrawing histogram to figure.\n');
|
0.03 | 8 | 426 | image(imageX{chr}, imageY{chr}, imageC{chr});
|
| 8 | 427 | end;
|
| | 428 | % standard : end show allelic ratio data.
|
| | 429 |
|
| | 430 | % standard : show ChARM breakpoints.
|
| 8 | 431 | if (displayBREAKS == true) && (show_annotations == true)
|
| 8 | 432 | fprintf('\t|\t\t\tShow ChARM breakpoints.\n');
|
| 8 | 433 | chr_length = ceil(chr_size(chr)/bases_per_bin);
|
| 8 | 434 | for segment = 2:length(chr_breaks{chr})-1
|
| 6 | 435 | bP = chr_breaks{chr}(segment)*chr_length;
|
0.03 | 6 | 436 | plot([bP bP], [(-maxY/10*2.5) 0], 'Color',[1 0 0],'LineWidth',2);
|
| 6 | 437 | end;
|
| 8 | 438 | end;
|
| | 439 |
|
| | 440 | % standard : show centromere outlines and horizontal marks.
|
| 8 | 441 | fprintf('\t|\t\t\tDraw centromere and horizontal lines.\n');
|
| 8 | 442 | x1 = cen_start(chr)/bases_per_bin;
|
| 8 | 443 | x2 = cen_end(chr)/bases_per_bin;
|
| 8 | 444 | leftEnd = 0.5*5000/bases_per_bin;
|
| 8 | 445 | rightEnd = (chr_size(chr) - 0.5*5000)/bases_per_bin;
|
| 8 | 446 | if (Centromere_format == 0)
|
| | 447 | % standard chromosome cartoons in a way which will not cause segfaults when running via commandline.
|
| 8 | 448 | dx = cen_tel_Xindent; %5*5000/bases_per_bin;
|
| 8 | 449 | dy = cen_tel_Yindent; %maxY/10;
|
| | 450 | % draw white triangles at corners and centromere locations.
|
| | 451 | % top left corner.
|
| 8 | 452 | c_ = [1.0 1.0 1.0];
|
| 8 | 453 | x_ = [leftEnd leftEnd leftEnd+dx];
|
| 8 | 454 | y_ = [maxY-dy maxY maxY ];
|
0.03 | 8 | 455 | f = fill(x_,y_,c_);
|
| 8 | 456 | set(f,'linestyle','none');
|
| | 457 | % bottom left corner.
|
| 8 | 458 | x_ = [leftEnd leftEnd leftEnd+dx];
|
| 8 | 459 | y_ = [dy 0 0 ];
|
| 8 | 460 | f = fill(x_,y_,c_);
|
| 8 | 461 | set(f,'linestyle','none');
|
| | 462 | % top right corner.
|
| 8 | 463 | x_ = [rightEnd rightEnd rightEnd-dx];
|
| 8 | 464 | y_ = [maxY-dy maxY maxY ];
|
0.05 | 8 | 465 | f = fill(x_,y_,c_);
|
0.01 | 8 | 466 | set(f,'linestyle','none');
|
| | 467 | % bottom right corner.
|
| 8 | 468 | x_ = [rightEnd rightEnd rightEnd-dx];
|
| 8 | 469 | y_ = [dy 0 0 ];
|
| 8 | 470 | f = fill(x_,y_,c_);
|
| 8 | 471 | set(f,'linestyle','none');
|
| | 472 | % top centromere.
|
| 8 | 473 | x_ = [x1-dx x1 x2 x2+dx];
|
| 8 | 474 | y_ = [maxY maxY-dy maxY-dy maxY];
|
| 8 | 475 | f = fill(x_,y_,c_);
|
| 8 | 476 | set(f,'linestyle','none');
|
| | 477 | % bottom centromere.
|
| 8 | 478 | x_ = [x1-dx x1 x2 x2+dx];
|
| 8 | 479 | y_ = [0 dy dy 0 ];
|
| 8 | 480 | f = fill(x_,y_,c_);
|
| 8 | 481 | set(f,'linestyle','none');
|
| | 482 | % draw outlines of chromosome cartoon. (drawn after horizontal lines to that cartoon edges are not interrupted by horiz lines.
|
| 8 | 483 | plot([leftEnd leftEnd leftEnd+dx x1-dx x1 x2 x2+dx rightEnd-dx rightEnd rightEnd rightEnd-dx x2+dx x2 x1 x1-dx leftEnd+dx leftEnd],...
|
| | 484 | [dy maxY-dy maxY maxY maxY-dy maxY-dy maxY maxY maxY-dy dy 0 0 dy dy 0 0 dy ],...
|
| | 485 | 'Color',[0 0 0]);
|
| 8 | 486 | end;
|
| | 487 | % standard : end show centromere.
|
| | 488 |
|
| | 489 | %% standard : show annotation locations
|
| 8 | 490 | fprintf('\t|\t\t\tShow annotation locations.\n');
|
| 8 | 491 | if (show_annotations) && (length(annotations) > 0)
|
| | 492 | plot([leftEnd rightEnd], [-maxY/10*1.5 -maxY/10*1.5],'color',[0 0 0]);
|
| | 493 | annotation_location = (annotation_start+annotation_end)./2;
|
| | 494 | for i = 1:length(annotation_location)
|
| | 495 | if (annotation_chr(i) == chr)
|
| | 496 | annotationloc = annotation_location(i)/bases_per_bin-0.5*(5000/bases_per_bin);
|
| | 497 | annotationStart = annotation_start(i)/bases_per_bin-0.5*(5000/bases_per_bin);
|
| | 498 | annotationEnd = annotation_end(i)/bases_per_bin-0.5*(5000/bases_per_bin);
|
| | 499 | if (strcmp(annotation_type{i},'dot') == 1)
|
| | 500 | plot(annotationloc,-maxY/10*1.5,'k:o','MarkerEdgeColor',annotation_edgecolor{i}, ...
|
| | 501 | 'MarkerFaceColor',annotation_fillcolor{i}, ...
|
| | 502 | 'MarkerSize', annotation_size(i));
|
| | 503 | elseif (strcmp(annotation_type{i},'block') == 1)
|
| | 504 | fill([annotationStart annotationStart annotationEnd annotationEnd], ...
|
| | 505 | [-maxY/10*(1.5+0.75) -maxY/10*(1.5-0.75) -maxY/10*(1.5-0.75) -maxY/10*(1.5+0.75)], ...
|
| | 506 | annotation_fillcolor{i},'EdgeColor',annotation_edgecolor{i});
|
| | 507 | end;
|
| | 508 | end;
|
| | 509 | end;
|
| | 510 | end;
|
| | 511 | % standard : end show annotation locations.
|
0.02 | 8 | 512 | hold off;
|
| | 513 |
|
| | 514 | %% Linear figure draw section
|
| 8 | 515 | if (Linear_display == true)
|
| 8 | 516 | figure(Linear_fig);
|
| 8 | 517 | Linear_width = Linear_Chr_max_width*chr_size(chr)/Linear_genome_size;
|
0.06 | 8 | 518 | subplot('Position',[Linear_left Linear_base Linear_width Linear_height]);
|
| 8 | 519 | hold on;
|
| 8 | 520 | Linear_left = Linear_left + Linear_width + Linear_chr_gap;
|
| | 521 |
|
| | 522 | % linear : show segmental anueploidy breakpoints.
|
| 8 | 523 | if (Linear_displayBREAKS == true) && (show_annotations == true)
|
| | 524 | fprintf('\t|\t\t\tShow ChARM breakpoints on linear figure.\n');
|
| | 525 | chr_length = ceil(chr_size(chr)/bases_per_bin);
|
| | 526 | for segment = 2:length(chr_breaks{chr})-1
|
| | 527 | bP = chr_breaks{chr}(segment)*chr_length;
|
| | 528 | plot([bP bP], [(-maxY/10*2.5) 0], 'Color',[1 0 0],'LineWidth',2);
|
| | 529 | end;
|
| | 530 | end;
|
| | 531 |
|
| | 532 | % linear : show allelic ratio data as 2D-smoothed scatter-plot.
|
| 8 | 533 | fprintf('\t|\t\t\tDraw 2D smoothed histogram of allelic ratio data in linear figure.\n');
|
| | 534 | % display only if processing succeeded (variables will no be
|
| | 535 | % present if the data is zero)
|
| 8 | 536 | if (exist('imageX') && exist('imageY') && exist('imageC'))
|
0.02 | 8 | 537 | image(imageX{chr}, imageY{chr}, imageC{chr});
|
| 8 | 538 | end;
|
| | 539 | % linear : end show allelic ratio data.
|
| | 540 |
|
| | 541 | % linear : show centromere.
|
| 8 | 542 | fprintf('\t|\t\t\tDraw centromere in linear figure.\n');
|
| 8 | 543 | x1 = cen_start(chr)/bases_per_bin;
|
| 8 | 544 | x2 = cen_end(chr)/bases_per_bin;
|
| 8 | 545 | leftEnd = 0.5*5000/bases_per_bin;
|
| 8 | 546 | rightEnd = (chr_size(chr) - 0.5*5000)/bases_per_bin;
|
| 8 | 547 | if (Centromere_format == 0)
|
| | 548 | % standard chromosome cartoons in a way which will not cause segfaults when running via commandline.
|
| 8 | 549 | dx = cen_tel_Xindent; %5*5000/bases_per_bin;
|
| 8 | 550 | dy = cen_tel_Yindent; %maxY/10;
|
| | 551 | % draw white triangles at corners and centromere locations.
|
| 8 | 552 | c_ = [1.0 1.0 1.0];
|
| | 553 | % top left corner.
|
0.02 | 8 | 554 | x_ = [leftEnd leftEnd leftEnd+dx]; y_ = [maxY-dy maxY maxY ]; f = fill(x_,y_,c_); set(f,'linestyle','none');
|
| | 555 | % bottom left corner.
|
0.03 | 8 | 556 | x_ = [leftEnd leftEnd leftEnd+dx]; y_ = [dy 0 0 ]; f = fill(x_,y_,c_); set(f,'linestyle','none');
|
| | 557 | % top right corner.
|
0.03 | 8 | 558 | x_ = [rightEnd rightEnd rightEnd-dx]; y_ = [maxY-dy maxY maxY ]; f = fill(x_,y_,c_); set(f,'linestyle','none');
|
| | 559 | % bottom right corner.
|
0.01 | 8 | 560 | x_ = [rightEnd rightEnd rightEnd-dx]; y_ = [dy 0 0 ]; f = fill(x_,y_,c_); set(f,'linestyle','none');
|
| | 561 | % top centromere.
|
0.03 | 8 | 562 | x_ = [x1-dx x1 x2 x2+dx]; y_ = [maxY maxY-dy maxY-dy maxY]; f = fill(x_,y_,c_); set(f,'linestyle','none');
|
| | 563 | % bottom centromere.
|
| 8 | 564 | x_ = [x1-dx x1 x2 x2+dx]; y_ = [0 dy dy 0 ]; f = fill(x_,y_,c_); set(f,'linestyle','none');
|
| | 565 | % draw outlines of chromosome cartoon. (drawn after horizontal lines to that cartoon edges are not interrupted by horiz lines.
|
0.01 | 8 | 566 | plot([leftEnd leftEnd leftEnd+dx x1-dx x1 x2 x2+dx rightEnd-dx rightEnd rightEnd rightEnd-dx x2+dx x2 x1 x1-dx leftEnd+dx leftEnd],...
|
| | 567 | [dy maxY-dy maxY maxY maxY-dy maxY-dy maxY maxY maxY-dy dy 0 0 dy dy 0 0 dy],...
|
| | 568 | 'Color',[0 0 0]);
|
| 8 | 569 | end;
|
| | 570 | % linear : end show centromere.
|
| | 571 |
|
| | 572 | % linear : show annotation locations.
|
| 8 | 573 | if (show_annotations) && (length(annotations) > 0)
|
| | 574 | fprintf('\t|\t\t\tShow annotation locations in linear figure.\n');
|
| | 575 | plot([leftEnd rightEnd], [-maxY/10*1.5 -maxY/10*1.5],'color',[0 0 0]);
|
| | 576 | annotation_location = (annotation_start+annotation_end)./2;
|
| | 577 | for i = 1:length(annotation_location)
|
| | 578 | if (annotation_chr(i) == chr)
|
| | 579 | annotationloc = annotation_location(i)/bases_per_bin-0.5*(5000/bases_per_bin);
|
| | 580 | annotationStart = annotation_start(i)/bases_per_bin-0.5*(5000/bases_per_bin);
|
| | 581 | annotationEnd = annotation_end(i)/bases_per_bin-0.5*(5000/bases_per_bin);
|
| | 582 | if (strcmp(annotation_type{i},'dot') == 1)
|
| | 583 | plot(annotationloc,-maxY/10*1.5,'k:o','MarkerEdgeColor',annotation_edgecolor{i}, ...
|
| | 584 | 'MarkerFaceColor',annotation_fillcolor{i}, ...
|
| | 585 | 'MarkerSize', annotation_size(i));
|
| | 586 | elseif (strcmp(annotation_type{i},'block') == 1)
|
| | 587 | fill([annotationStart annotationStart annotationEnd annotationEnd], ...
|
| | 588 | [-maxY/10*(1.5+0.75) -maxY/10*(1.5-0.75) -maxY/10*(1.5-0.75) -maxY/10*(1.5+0.75)], ...
|
| | 589 | annotation_fillcolor{i},'EdgeColor',annotation_edgecolor{i});
|
| | 590 | end;
|
| | 591 | end;
|
| | 592 | end;
|
| | 593 | end;
|
| | 594 | % linear : end show annotation locations.
|
| | 595 |
|
| | 596 | % linear : Final formatting stuff.
|
| 8 | 597 | xlim([0,chr_size(chr)/bases_per_bin]);
|
| | 598 | % modify y axis limits to show annotation locations if any are provided.
|
| 8 | 599 | if (length(annotations) > 0)
|
| | 600 | ylim([-maxY/10*1.5,maxY]);
|
| 8 | 601 | else
|
| 8 | 602 | ylim([0,maxY]);
|
| 8 | 603 | end;
|
| 8 | 604 | set(gca,'YTick',[]);
|
| 8 | 605 | set(gca,'YTickLabel',[]);
|
| 8 | 606 | set(gca,'TickLength',[(Linear_TickSize*chr_size(largestChr)/chr_size(chr)) 0]); %ensures same tick size on all subfigs.
|
| 8 | 607 | set(gca,'XTick',0:(40*(5000/bases_per_bin)):(650*(5000/bases_per_bin)));
|
| 8 | 608 | set(gca,'XTickLabel',[]);
|
| 8 | 609 | if (first_chr == true)
|
| | 610 | % This section sets the Y-axis labelling.
|
| 1 | 611 | text(axisLabelPosition_horiz, maxY/4*0, '0' ,'HorizontalAlignment','right','Fontsize',linear_axis_font_size);
|
| 1 | 612 | text(axisLabelPosition_horiz, maxY/4*1, '1/4','HorizontalAlignment','right','Fontsize',linear_axis_font_size);
|
| 1 | 613 | text(axisLabelPosition_horiz, maxY/4*2, '1/2','HorizontalAlignment','right','Fontsize',linear_axis_font_size);
|
| 1 | 614 | text(axisLabelPosition_horiz, maxY/4*3, '3/4','HorizontalAlignment','right','Fontsize',linear_axis_font_size);
|
0.01 | 1 | 615 | text(axisLabelPosition_horiz, maxY/4*4, '1' ,'HorizontalAlignment','right','Fontsize',linear_axis_font_size);
|
| 1 | 616 | end;
|
| 8 | 617 | set(gca,'FontSize',linear_gca_font_size);
|
| | 618 | % linear : end final reformatting.
|
| | 619 | % adding title in the middle of the cartoon
|
| | 620 | % note: adding title is done in the end since if placed upper
|
| | 621 | % in the code somehow the plot function changes the title position
|
| 8 | 622 | if (rotate == 0 && chr_size(chr) ~= 0 )
|
0.04 | 8 | 623 | title(chr_label{chr},'Interpreter','none','FontSize',linear_chr_font_size,'Rotation',rotate);
|
| | 624 | else
|
| | 625 | text((chr_size(chr)/bases_per_bin)/2,maxY+0.5,chr_label{chr},'Interpreter','none','FontSize',linear_chr_font_size,'Rotation',rotate);
|
| | 626 | end;
|
| | 627 |
|
0.01 | 8 | 628 | hold off;
|
| | 629 |
|
| | 630 | % shift back to main figure generation.
|
| 8 | 631 | figure(fig);
|
| 8 | 632 | first_chr = false;
|
| 8 | 633 | end;
|
| 8 | 634 | end;
|
| 9 | 635 | end;
|
| | 636 |
|
| | 637 | %% Save figures.
|
| | 638 | % commenting out stacked figure since it's not diplayed, left for debugging
|
| | 639 | %{
|
| | 640 | set(fig,'PaperPosition',[0 0 stacked_fig_width stacked_fig_height]);
|
| | 641 | fprintf('\t|\tSaving standard figure in EPS format.\n');
|
| | 642 | saveas(fig, [projectDir 'fig.allelic_ratio-map.b1.eps'], 'epsc');
|
| | 643 | fprintf('\t|\tSaving standard figure in PNG format.\n');
|
| | 644 | saveas(fig, [projectDir 'fig.allelic_ratio-map.b1.png'], 'png');
|
| | 645 | %}
|
| 1 | 646 | set(Linear_fig,'PaperPosition',[0 0 linear_fig_width linear_fig_height]);
|
| 1 | 647 | fprintf('\t|\tSaving linear figure in EPS format.\n');
|
1.28 | 1 | 648 | saveas(Linear_fig, [projectDir 'fig.allelic_ratio-map.b2.eps'], 'epsc');
|
| 1 | 649 | fprintf('\t|\tSaving linear figure in PNG format.\n');
|
0.66 | 1 | 650 | saveas(Linear_fig, [projectDir 'fig.allelic_ratio-map.b2.png'], 'png');
|
| | 651 |
|
| | 652 | %% Delete figures from memory.
|
0.02 | 1 | 653 | delete(fig);
|
0.04 | 1 | 654 | delete(Linear_fig);
|
| | 655 |
|
| 1 | 656 | time_end = toc;
|
| 1 | 657 | fprintf('\t|\t%d min, %f sec.\n',floor(time_end/60),rem(time_end,60));
|
| 1 | 658 | fprintf('\t*---------------------------------------------------------------*\n');
|
| 1 | 659 | fprintf('\t| Fireplot generation in "allelic_ratios_WGseq.m" completed. |\n');
|
| 1 | 660 | fprintf('\t*===============================================================*\n');
|
| 1 | 661 | end
|