diff --git a/.DS_Store b/.DS_Store index 3b7d108..9ea4f74 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.README.md.swp b/.README.md.swp new file mode 100644 index 0000000..813deef Binary files /dev/null and b/.README.md.swp differ diff --git a/README.md b/README.md index 0e05a06..6be6395 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,17 @@ [![Downloads](https://img.shields.io/github/downloads/bcgsc/xmatchview/total?logo=github)](https://github.com/bcgsc/xmatchview/releases/download/v1.2.0/xmatchview_1-2.tar) [![Conda](https://img.shields.io/conda/dn/bioconda/xmatchview?label=Conda)](https://anaconda.org/bioconda/xmatchview) [![Issues](https://img.shields.io/github/issues/bcgsc/xmatchview.svg)](https://github.com/bcgsc/xmatchview/issues) -Thank you for your [![Stars](https://img.shields.io/github/stars/bcgsc/xmatchview.svg)](https://github.com/bcgsc/xmatchview/stargazers) ![Logo](https://github.com/warrenlr/xmatchview/blob/master/xmv-logo.png) # xmatchview ## Genome alignment visualization -## xmatchview v1.2.3 Rene L. Warren, 2005-2020 +## xmatchview v1.2.4 Rene L. Warren, 2005-2020 ## email: rwarren [at] bcgsc [dot] ca ### NAME
-   xmatchview.py, xmatchview-hive.py, xmatchview-conifer.py v1.2.3  April 2020
+   xmatchview.py, xmatchview-hive.py, xmatchview-conifer.py v1.2.4  April/October 2020
    xmatchview-hive.pl v1.2.2 February 2020
    xmatchview.py v1.2.0	October 2019
    xmatchview.py v1.1.1   December 2018
@@ -25,7 +24,7 @@ Thank you for your [![Stars](https://img.shields.io/github/stars/bcgsc/xmatchvie
    
### SYNOPSIS - xmatchview, xmatchview-conifer and xmatchview-hive are imaging tools for visualizing DNA sequence synteny. It allows users to align 2 (or 3) DNA sequences in FASTA format using cross_match, minimap2 or any aligners with .paf output capabilities, and displays the alignments in a variety of image formats (png, tiff). xmatchview-hive outputs xml-scalable vector graphics (svg) + xmatchview, xmatchview-conifer and xmatchview-hive are imaging tools for visualizing DNA/RNA sequence synteny. It allows users to align 2 (or 3) sequences in FASTA format using cross_match, minimap2 or any aligners with .paf output capabilities, and displays the alignments in a variety of image formats (png, tiff). xmatchview-hive outputs xml-scalable vector graphics (svg) ## xmatchview ![Logo](https://github.com/warrenlr/xmatchview/blob/master/xmv.png) @@ -74,7 +73,7 @@ I encourage the community to contribute to the development of this software, by Download the tar file and extract the files on your system using:
-tar -xvf xmatchview_1-2-3.tar 
+tar -xvf xmatchview_1-2-4.tar 
 
### DEPENDENCIES @@ -109,7 +108,7 @@ You will need to do the following before you can proceed: ### USAGE ---------------
-Usage: ['./xmatchview.py'] v1.2.3
+Usage: ['./xmatchview.py'] v1.2.4
 -x alignment file (cross_match .rep or Pairwise mApping Format .paf) 
 -s reference genome fasta file
 -q query contig/genome fasta file
@@ -124,7 +123,7 @@ Usage: ['./xmatchview.py'] v1.2.3
 -p full path to the directory with fonts on your system (please refer to the documentation for fonts used)
 * Files for the -s and -q options must correspond to fasta files used to run cross_match
 
-Usage: ['./xmatchview-conifer.py'] v1.2.3
+Usage: ['./xmatchview-conifer.py'] v1.2.4
 -x alignment file (cross_match .rep or Pairwise mApping Format .paf) 
 -s reference genome fasta file
 -q query contig/genome fasta file
@@ -139,7 +138,7 @@ Usage: ['./xmatchview-conifer.py'] v1.2.3
 -p full path to the directory with fonts on your system (please refer to the documentation for fonts used)
 * Files for the -s and -q options must correspond to fasta files used to run cross_match
 
-Usage: ['./xmatchview-hive.py'] v1.2.3
+Usage: ['./xmatchview-hive.py'] v1.2.4
 -x alignment file [1 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)
 -y alignment file [1 vs. 3] (cross_match .rep or Pairwise mApping Format .paf)
 -z alignment file [3 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)
@@ -310,6 +309,12 @@ Krzywinski M, Birol I, Jones S, Marra M (2011). Hive Plots — Rational Approach
 
+### WHAT'S NEW in v1.2.4 +------------------ +
+-Included error handling when scaling isn't sufficient to keep genomic features within plot bounds
+
+ ### WHAT'S NEW in v1.2.3 ------------------
diff --git a/tarballs/xmatchview_1-2-4.tar b/tarballs/xmatchview_1-2-4.tar
new file mode 100644
index 0000000..61b67c8
Binary files /dev/null and b/tarballs/xmatchview_1-2-4.tar differ
diff --git a/v1.2.4/README.md b/v1.2.4/README.md
new file mode 100755
index 0000000..6be6395
--- /dev/null
+++ b/v1.2.4/README.md
@@ -0,0 +1,382 @@
+[![Release](https://img.shields.io/github/release/bcgsc/xmatchview.svg)](https://github.com/bcgsc/xmatchview/releases)
+[![Downloads](https://img.shields.io/github/downloads/bcgsc/xmatchview/total?logo=github)](https://github.com/bcgsc/xmatchview/releases/download/v1.2.0/xmatchview_1-2.tar)
+[![Conda](https://img.shields.io/conda/dn/bioconda/xmatchview?label=Conda)](https://anaconda.org/bioconda/xmatchview)
+[![Issues](https://img.shields.io/github/issues/bcgsc/xmatchview.svg)](https://github.com/bcgsc/xmatchview/issues)
+
+![Logo](https://github.com/warrenlr/xmatchview/blob/master/xmv-logo.png)
+
+# xmatchview
+## Genome alignment visualization
+## xmatchview v1.2.4 Rene L. Warren, 2005-2020
+## email: rwarren [at] bcgsc [dot] ca
+
+### NAME
+   
+   xmatchview.py, xmatchview-hive.py, xmatchview-conifer.py v1.2.4  April/October 2020
+   xmatchview-hive.pl v1.2.2 February 2020
+   xmatchview.py v1.2.0	October 2019
+   xmatchview.py v1.1.1   December 2018
+   xmatchview.py v1.1   October 2018
+   xmatchview.py v1.0   January 2018 - Post JOSS review
+   xmatchview.py v0.3.3 January 2018
+   xmatchview.py v0.3   November 2017
+   XMatchView.py v0.2   March 2005/May 2005/January 2006
+   
+ +### SYNOPSIS + xmatchview, xmatchview-conifer and xmatchview-hive are imaging tools for visualizing DNA/RNA sequence synteny. It allows users to align 2 (or 3) sequences in FASTA format using cross_match, minimap2 or any aligners with .paf output capabilities, and displays the alignments in a variety of image formats (png, tiff). xmatchview-hive outputs xml-scalable vector graphics (svg) + +## xmatchview +![Logo](https://github.com/warrenlr/xmatchview/blob/master/xmv.png) + +## xmatchview-conifer +![Logo](https://github.com/warrenlr/xmatchview/blob/master/xmv-c.png) + +## xmatchview-hive +![Logo](https://github.com/warrenlr/xmatchview/blob/master/xmv-h.png) + + xmatchview, xmatchview-conifer and xmatchview-hive are written in python and run on linux and windows. They serve as visual tools for analyzing cross_match and minimap2 alignments. Cross_match (Green, P. (1994) http://www.phrap.org) uses an implementation of the Smith-Waterman algorithm for comparing DNA sequences that is sensitive. + + Additional hive plot information available at: http://www.hiveplot.com/ + +### LICENSE PREAMBLE + Copyright (c) 2005-2020 Rene Warren, Canada's Michael Smith Genome Science Centre. All rights reserved. + xmatchview is a utility for comparing, visually, two DNA/RNA sequences + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +### IMPLEMENTATION +------------- + +xmatchview, xmatchview-conifer and xmatchview-hive are implemented in PYTHON and run on any OS where PYTHON is installed. + + +### COMMUNITY GUIDELINES +------------- + +I encourage the community to contribute to the development of this software, by providing suggestions for improving the code and/or directly contributing to the open source code for these tools. Users and developers may report software issues, bug fix requests, comments, etc, at + + +### INSTALL + +Download the tar file and extract the files on your system using: + +
+tar -xvf xmatchview_1-2-4.tar 
+
+ +### DEPENDENCIES +------------- + +
+xmatchview and xmatchview-conifer:
+
+You will need to do the following before you can proceed:
+      1) Download python from: http://www.python.org/  (The code was developed and tested on python2.3 or 2.4 but may work with newer versions of python (not tested))
+      2) Download the Python Imaging Library (PIL) from: http://www.pythonware.com/products/pil/
+      3) Either use PIL or true type fonts (ttf) and specify the full path to the font directory in xmatchview.py and xmatchview-conifer.py with the -p option. The fonts used by xmatchview and xmatchview-conifer include:
+
+        arial.ttf
+        arialbd.ttf
+        arialbi.ttf
+        -or-
+        helvR14.pil
+        helvR18.pil
+        helvB18.pil
+        helvBO18.pil
+        helvB24.pil
+        helvR24.pil
+        helvB24.pil
+
+        If fontpaths are not provided (not recommended), default fonts will be used to preserve code functionality. However, these systems fonts are very small and of limited utility. To download the above fonts, simply search the internet for "arial.ttf", "arialbd.ttf" and "arialbi.ttf". For convenience, they are included in the "fonts.tar" file in this directory. The "helv*.pil" fonts are distributed with PIL.
+
+      4) Download cross_match for academic use, see http://www.phrap.org and http://www.phrap.org/consed/consed.html#howToGet
+      5) Make sure cross_match is in your $PATH
+
+ +### USAGE +--------------- +
+Usage: ['./xmatchview.py'] v1.2.4
+-x alignment file (cross_match .rep or Pairwise mApping Format .paf) 
+-s reference genome fasta file
+-q query contig/genome fasta file
+-e reference features (eg. exons) coordinates, GFF tsv file - optional
+-y query features (eg. exons) coordinates, GFF tsv file - optional
+-m mismatch threshold (e.g. -m 10 allows representation of repeats having up to 10% mismatch
+-b length (bp) of similarity block to display
+-c scale (pixel to basepair scale, for displaying the image)
+-r leap (bp) to evaluate repeat frequency (smaller numbers will increase the resolution, but will affect drastically the run time.  recommended -r=50)
+-a alpha value, from 0 (transparent) to 255 (solid, default)
+-f output image file format (png, tiff, jpeg, or gif) NOTE: the png and tiff are better.
+-p full path to the directory with fonts on your system (please refer to the documentation for fonts used)
+* Files for the -s and -q options must correspond to fasta files used to run cross_match
+
+Usage: ['./xmatchview-conifer.py'] v1.2.4
+-x alignment file (cross_match .rep or Pairwise mApping Format .paf) 
+-s reference genome fasta file
+-q query contig/genome fasta file
+-e reference features (eg. exons) coordinates, GFF tsv file - optional
+-y query features (eg. exons) coordinates, GFF tsv file - optional
+-m maximum mismatch threshold (e.g. -m 10 allows representation of repeats having up to 10% mismatch
+-b minimum length (bp) of similarity block to display
+-c scale (pixel to basepair scale, for displaying the image)
+-l label for the tree trunk (6 characters or less for best result)
+-a alpha value, from 0 (transparent) to 255 (solid, default)
+-f output image file format (png, tiff, jpeg, or gif) NOTE: the png and tiff are better.
+-p full path to the directory with fonts on your system (please refer to the documentation for fonts used)
+* Files for the -s and -q options must correspond to fasta files used to run cross_match
+
+Usage: ['./xmatchview-hive.py'] v1.2.4
+-x alignment file [1 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)
+-y alignment file [1 vs. 3] (cross_match .rep or Pairwise mApping Format .paf)
+-z alignment file [3 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)
+-q genome text file 1 (format NAME:LENGTH)
+-r genome text file 2 (format NAME:LENGTH)
+-s genome text file 3 (format NAME:LENGTH)
+-d features (eg. exons) coordinates GFF tsv file 1 (start end) - optional
+-e features (eg. exons) coordinates GFF tsv file 2 (start end) - optional
+-f features (eg. exons) coordinates GFF tsv file 3 (start end) - optional
+-i sequence identity threshold (e.g. -i 90 will show colinear blocks >= 90% sequence identity)
+-b minimum length (bp) of similarity block to display
+-c scale (pixel to basepair scale, for displaying the image)
+-a alpha value, from 0 (transparent) to 1 (solid, default)
+* Files for the -q, -r and -s options must include header_names:base_length, with names that correspond to those in fasta files used to run cross_match or minimap2
+
+! Ensure the config.txt file exists in your run directory
+
+Notes:
+
+xmatchview-hive; you must create a config.txt file with the following information:
+axis_number:name:length(bp)
+
+1:name1
+2:name2
+3:name3
+
+Keep names short (< 10 characters) + +An example config.txt is provided for your convenience in the "test-hive" folder (See TEST section below) +Axis labels are shown in the diagram below. Also, when running cross_match/minimap2, align in this order: +
+1 vs. 2
+1 vs. 3
+3 vs. 2
+
+  1
+  |
+ / \
+3   2
+
+
+The text files supplied in -q -r -s are simply:
+sequence name:base length
+
+The sequence name must correspond to those in the fasta files used to run cross_match or minimap2
+
+example perl-oneliner to convert:
+
+cat genome1.fa | perl -ne 'chomp;if(/^\>(\S+)/){ $head = $1;if($prev ne $head && $prev ne ""){print "$prev:$seq\n";$seq="";}  $prev=$head;}else{ $seq += length($_); }if(eof()){ print "$prev:$seq\n";  } ' > genome1.txt
+
+run as : -q genome1.txt
+
+
+ + +xmatchview and xmatchview-conifer; + +Files for the -s and -q options must correspond to fasta files used to run cross_match or minimap2. Those files should each contain a single, non-justified, sequence (no line breaks). + +fasta file format: + +>yourSequence +AATAGCAGCTACGACGACGCAGCGCGACGTTTCATCAA...AATACAGACGCGACGACGCAGCATCATCGAGAC + + +* A 10th column may be added to the GFF files supplied via -e/-y to specify the color of a feature (default feature color is yellow or black, for xmatchview and xmatchview-conifer, respectively). Users may specify any of these color names: yellow, blue, cyan, green, lime, red, sarin, forest, dirtyred, dirtyyellow, grey, lightgrey, orange, beige, black, white. Examples of .gff files are provided in the accompanied ./test folder + +
+Users can control whether to show the position of sequence features on the reference and query (*-e* and *-y* options), show co-linear blocks of a certain length (*-b* option) when their mismatch rates are below a threshold (*-m* option). The histogram is generated by moving a sliding window with a step length (*-r* recommended between 10-50). The color space in xmatchview is RGBA and the alpha channel is used for visualizing the relationship between co-linear blocks (*-a* option, transparent to solid, 0 to 255). + + +### RUNNING THE cross_match/xmatchview/xmatchview-conifer PIPELINES +------------- +Demo shell scripts that pipeline cross_match and xmatchview/xmatchview-conifer are included with the distribution and provided for guidance (runCompareTwoGenomesColinear.sh and runSpruceView.sh, respectively). + +Refer to: +
+./runCompareTwoGenomesColinear.sh 
+
+Usage: runCompareTwoGenomesColinear.sh
+ QUERY FASTA
+ REFERENCE/SUBJECT/TARGET FASTA
+ ALPHA TRANSPARENCY 0-255
+ MISMATCH THRESHOLD
+ BLOCK LENGTH (bp)
+ LEAP LENGTH (bp)
+ SCALE (1:n)
+ QUERY features GFF .tsv
+ REFERENCE features GFF .tsv
+ cross_match/minimap2
+ PATH-TO-FONTS
+
+and:
+
+./runSpruceView.sh 
+
+Usage: runSpruceView.sh
+ QUERY FASTA
+ REFERENCE/SUBJECT/TARGET FASTA
+ ALPHA TRANSPARENCY 0-255
+ MISMATCH THRESHOLD
+ BLOCK LENGTH (bp)
+ LABEL
+ SCALE (1:n)
+ QUERY features GFF .tsv
+ REFERENCE features GFF .tsv
+ cross_match/minimap2
+ PATH-TO-FONTS
+
+
+Examples on how to run:
+./runCompareTwoGenomesColinear.sh FTL1_pa.fa FTL1_ss.fa 200 99 100 1 2 FTL1_pa.gff FTL1_ss.gff cross_match ../../tarballs/fonts
+./runCompareTwoGenomesColinear.sh FTL1_pa.fa FTL1_ss.fa 200 99 100 1 2 FTL1_pa.gff FTL1_ss.gff minimap2 ../../tarballs/fonts
+
+./runSpruceView.sh FTL1_pa.fa FTL1_ss.fa 200 99 100 FTL1-test 2 FTL1_pa.gff FTL1_ss.gff cross_match ../../tarballs/fonts
+./runSpruceView.sh FTL1_pa.fa FTL1_ss.fa 200 99 100 FTL1-test 2 FTL1_pa.gff FTL1_ss.gff minimap2 ../../tarballs/fonts
+
+
+ + +### TEST xmatchview.py / xmatchview-conifer.py / xmatchview-hive.py +------------- +To test your xmatchview install on your system, we provide a test folder where you can run xmatchview and xmatchview-conifer, using the shell script commands below. If all goes well, and you used the arial fonts provided, the image you generate should be identical to: + +./test/xmv-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_r1_c2_success.png +./test/xmvconifer-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_c2_success.png + +Once you confirmed that it works as expected, you may explore the full range of parameters to test functionality (see USAGE above) + + +At the unix prompt, once the package is installed, change directory to: +
+cd ./test
+
+Once you have downloaded pyhon and PIL and changed the paths to fonts in the xmatchview.py and xmatchview-conifer.py +Execute: +
+./runXMV.sh FTL1_pa.fa_vs_FTL1_ss.fa.rep FTL1_pa.fa FTL1_ss.fa 200 10 2 FTL1_pa.gff FTL1_ss.gff
+and
+./runXMV-conifer.sh FTL1_pa.fa_vs_FTL1_ss.fa.rep FTL1_pa.fa FTL1_ss.fa 200 10 2 FTL1_pa.gff FTL1_ss.gff
+
+ +To test xmatchview-hive: +
+cd ./test-hive
+# cross_match (.rep output)
+../xmatchview-hive.py -q 2019-nCoV.txt -r SARS-CoV.txt -s MERS-CoV.txt -x 2019-nCoV.fa_vs_SARS-CoV.fa.rep -y 2019-nCoV.fa_vs_MERS-CoV.fa.rep -z MERS-CoV.fa_vs_SARS-CoV.fa.rep -e SARScds.gff -i 0 -b 1 -c 30 -a 0.75
+
+ +If all went well, images such as those provided in the test folder should be generated + + +### CITING xmatchview/xmatchview-conifer/xmatchview-hive +------------- + +Thank you for your [![Stars](https://img.shields.io/github/stars/bcgsc/xmatchview.svg)](https://github.com/bcgsc/xmatchview/stargazers) and for using, developing and promoting this free software! + +If you use xmatchview/xmatchview-conifer/xmatchview-hive for you research, please cite: + +
+Warren, RL (2018). Visualizing genome synteny with xmatchview. Journal of Open Source Software, 3(21):497.
+
+[![link](https://img.shields.io/badge/xmatchview-manuscript-brightgreen)](https://doi.org/10.21105/joss.00497) + +Hive plots +
+Krzywinski M, Birol I, Jones S, Marra M (2011). Hive Plots — Rational Approach to Visualizing Networks. Briefings in Bioinformatics (early access 9 December 2011, doi: 10.1093/bib/bbr069)
+
+ + +### WHAT'S NEW in v1.2.4 +------------------ +
+-Included error handling when scaling isn't sufficient to keep genomic features within plot bounds
+
+ +### WHAT'S NEW in v1.2.3 +------------------ +
+-Code refactored for python3
+
+ +### WHAT'S NEW in v1.2.2 +------------------ +
+-Initial support for 3-way comparisons (hive plot) 
+
+ +### WHAT'S NEW in v1.2.0 +------------------ +
+-Bug fixes 
+-Aesthetic improvements to both xmatchview and xmatchview-conifer
+-Support for GFF files
+-Support for Multi-FASTA files
+
+ +### WHAT'S NEW in v1.1.1 +------------------ +
+-Bug fixes (will now return an error when the alignment files are empty [instead of plotting empty graphs])
+
+ +### WHAT'S NEW in v1.1 +------------------ +
+-Bug fixes (the forward synteny blocks were always printed, regardless of specified filters. This is fixed in both xmatchview and xmatchview-conifer v1.1)
+
+ +### WHAT'S NEW in v1.0 +------------------ +
+-Published (JOSS peer-review) version
+
+ +### WHAT'S NEW in v0.3.3 +------------------ +
+-Initial support for .paf (Pairwise mApping Format) alignment files.
+
+ +### WHAT'S NEW in v0.3.2 +------------------ +
+-Made options consistent between xmatchview.py and xmatchview-conifer.py
+-Included new option to specify font path (-p)
+-Added option to specify feature colors in the tsv files provided with the -e and -y options. A third column may be used to specify the color of a feature (default feature color is yellow or black, for xmatchview and xmatchview-conifer, respectively). Users may specify any of these color names: yellow, blue, cyan, green, lime, red, sarin, forest, dirtyred, dirtyyellow, grey, lightgrey, orange, beige, black, white.
+
+ +### WHAT'S NEW in v0.3 +------------------ +
+-Plot colinear blocks and sequence relationships with transparent color (alpha, supplied with -a)
+-Plot the position of exons on the reference and query DNA segments (-e and -y arguments, optional)
+-Plot the position of Ns in query and reference sequences
+-Bug fixes
+
+--- +Please find the v0.2 release in the corresponding subdirectory + +Questions/Comments: Rene Warren : rwarren at bcgsc dot ca diff --git a/v1.2.4/runCompareTwoGenomesColinear.sh b/v1.2.4/runCompareTwoGenomesColinear.sh new file mode 100755 index 0000000..ddd036b --- /dev/null +++ b/v1.2.4/runCompareTwoGenomesColinear.sh @@ -0,0 +1,43 @@ +#!/bin/bash +#RLW 2017,2019 +if [ $# -ne 11 ]; then + echo "Usage: $(basename $0)" + echo " QUERY FASTA" + echo " REFERENCE/SUBJECT/TARGET FASTA" + echo " ALPHA TRANSPARENCY 0-255" + echo " MISMATCH THRESHOLD" + echo " BLOCK LENGTH (bp)" + echo " LEAP LENGTH (bp)" + echo " SCALE (1:n)" + echo " QUERY features GFF .tsv" + echo " REFERENCE features GFF .tsv" + echo " cross_match/minimap2" + echo " PATH-TO-FONTS" + exit 1 +fi + +echo Running: $(basename $0) $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} + +# source PATH-TO-SOURCE (IF NEEDED) + +echo "Make sure xmatchview.py, cross_match and minimap2 are in your PATH" + +if [ ${10} == 'cross_match' ]; then + + # cross_match pipeline + cross_match $1 $2 -minmatch 29 -minscore 59 -masklevel 101 > $1_vs_$2.rep + xmatchview.py -x $1_vs_$2.rep -q $1 -s $2 -a $3 -m $4 -b $5 -r $6 -c $7 -f png -y $8 -e $9 -p ${11} + +elif [ ${10} == 'minimap2' ]; then + + # minimap pipeline + minimap2 $2 $1 -N200 -p0.0001 > $1_vs_$2.paf + xmatchview.py -x $1_vs_$2.paf -q $1 -s $2 -a $3 -m $4 -b $5 -r $6 -c $7 -f png -y $8 -e $9 -p ${11} + +else + + echo Unrecognizable option ${10} + echo Make sure you specify: cross_match OR minimap2 + +fi + diff --git a/v1.2.4/runSpruceView.sh b/v1.2.4/runSpruceView.sh new file mode 100755 index 0000000..6d61af8 --- /dev/null +++ b/v1.2.4/runSpruceView.sh @@ -0,0 +1,43 @@ +#!/bin/bash +#RLW 2017,2019 +if [ $# -ne 11 ]; then + echo "Usage: $(basename $0)" + echo " QUERY FASTA" + echo " REFERENCE/SUBJECT/TARGET FASTA" + echo " ALPHA TRANSPARENCY 0-255" + echo " MISMATCH THRESHOLD" + echo " BLOCK LENGTH (bp)" + echo " LABEL" + echo " SCALE (1:n)" + echo " QUERY features GFF .tsv" + echo " REFERENCE features GFF .tsv" + echo " cross_match/minimap2" + echo " PATH-TO-FONTS" + exit 1 +fi + +echo Running: $(basename $0) $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} + + +# source PATH-TO-SOURCE (IF NEEDED) + +echo "Make sure xmatchview-conifer.py, cross_match and minimap2 are in your PATH" + +if [ ${10} == 'cross_match' ]; then + + # cross_match pipeline + cross_match $1 $2 -minmatch 5 -minscore 10 -masklevel 101 > $1_vs_$2.rep + xmatchview-conifer.py -x $1_vs_$2.rep -q $1 -s $2 -a $3 -m $4 -b $5 -l $6 -c $7 -f png -y $8 -e $9 -p ${11} + +elif [ ${10} == 'minimap2' ]; then + + # minimap pipeline + minimap2 $2 $1 -N200 -p0.0001 > $1_vs_$2.paf + xmatchview-conifer.py -x $1_vs_$2.paf -q $1 -s $2 -a $3 -m $4 -b $5 -l $6 -c $7 -f png -y $8 -e $9 -p ${11} + +else + + echo Unrecognizable option ${10} + echo Make sure you specify: cross_match OR minimap2 + +fi diff --git a/v1.2.4/test-hive/2019-nCoV.fa_vs_MERS-CoV.fa.paf b/v1.2.4/test-hive/2019-nCoV.fa_vs_MERS-CoV.fa.paf new file mode 100644 index 0000000..b9f1450 --- /dev/null +++ b/v1.2.4/test-hive/2019-nCoV.fa_vs_MERS-CoV.fa.paf @@ -0,0 +1 @@ +2019-nCoV 30473 13996 20902 + MERS-CoV 30033 13926 20805 60 6906 16 tp:A:P cm:i:4 s1:i:54 s2:i:0 dv:f:0.3855 rl:i:0 diff --git a/v1.2.4/test-hive/2019-nCoV.fa_vs_MERS-CoV.fa.rep b/v1.2.4/test-hive/2019-nCoV.fa_vs_MERS-CoV.fa.rep new file mode 100644 index 0000000..191956a --- /dev/null +++ b/v1.2.4/test-hive/2019-nCoV.fa_vs_MERS-CoV.fa.rep @@ -0,0 +1,149 @@ +cross_match 2019-nCoV.fa MERS-CoV.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 200126:080632 +Query file(s): 2019-nCoV.fa +Subject file(s): MERS-CoV.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: 2019-nCoV.fa 1 entries +Residue counts: + A 9115 + C 5605 + G 5951 + T 9802 +Total 30473 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 60949 seqs, 11 ids, 2 descrips + +NO QUALITY FILE 2019-nCoV.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 62 +Maximal single base matches (low complexity regions): + + 11 7.14 0.00 0.00 2019-nCoV 1958 1971 (28502) MERS-CoV 2031 2044 (27989) + 10 15.79 0.00 0.00 2019-nCoV 3627 3645 (26828) MERS-CoV 3854 3872 (26161) + 12 8.70 0.00 0.00 2019-nCoV 7626 7648 (22825) MERS-CoV 7505 7527 (22506) + 10 0.00 0.00 0.00 2019-nCoV 8012 8022 (22451) MERS-CoV 8104 8114 (21919) + 11 15.38 0.00 0.00 2019-nCoV 8045 8070 (22403) MERS-CoV 7936 7961 (22072) + 10 12.50 0.00 0.00 2019-nCoV 8264 8279 (22194) MERS-CoV 8158 8173 (21860) + 11 10.00 0.00 0.00 2019-nCoV 9618 9637 (20836) MERS-CoV 9542 9561 (20472) + 26 23.33 1.33 1.33 2019-nCoV 10019 10168 (20305) MERS-CoV 9943 10092 (19941) + 10 0.00 0.00 0.00 2019-nCoV 10224 10234 (20239) MERS-CoV 10148 10158 (19875) + 10 0.00 0.00 0.00 2019-nCoV 10329 10338 (20135) MERS-CoV 11615 11624 (18409) + 11 14.29 0.00 0.00 2019-nCoV 10410 10430 (20043) MERS-CoV 10343 10363 (19670) + 23 14.89 0.00 0.00 2019-nCoV 10472 10518 (19955) MERS-CoV 10405 10451 (19582) + 15 12.00 0.00 0.00 2019-nCoV 10544 10568 (19905) MERS-CoV 10477 10501 (19532) + 12 15.38 0.00 0.00 2019-nCoV 10674 10699 (19774) MERS-CoV 10607 10632 (19401) + 15 5.56 0.00 0.00 2019-nCoV 10932 10949 (19524) MERS-CoV 10856 10873 (19160) + 10 12.50 0.00 0.00 2019-nCoV 11730 11745 (18728) MERS-CoV 11660 11675 (18358) + 17 25.35 0.00 0.00 2019-nCoV 11817 11887 (18586) MERS-CoV 11747 11817 (18216) + 27 23.53 0.00 0.00 2019-nCoV 12276 12377 (18096) MERS-CoV 12209 12310 (17723) + 10 0.00 0.00 6.67 2019-nCoV 12491 12505 (17968) MERS-CoV 12374 12387 (17646) + 12 11.11 0.00 0.00 2019-nCoV 12642 12659 (17814) MERS-CoV 12572 12589 (17444) + 13 18.75 0.00 0.00 2019-nCoV 12679 12710 (17763) MERS-CoV 12612 12643 (17390) + 12 0.00 0.00 0.00 2019-nCoV 13158 13172 (17301) MERS-CoV 13082 13096 (16937) + 14 20.97 1.61 1.61 2019-nCoV 13230 13291 (17182) MERS-CoV 13154 13215 (16818) + 11 14.29 0.00 0.00 2019-nCoV 13320 13340 (17133) MERS-CoV 13244 13264 (16769) + 11 6.25 0.00 0.00 2019-nCoV 13355 13370 (17103) MERS-CoV 13279 13294 (16739) + 20 14.29 0.00 0.00 2019-nCoV 13538 13572 (16901) MERS-CoV 13465 13499 (16534) + 12 14.29 0.00 0.00 2019-nCoV 13664 13684 (16789) MERS-CoV 13597 13617 (16416) + 12 26.23 0.00 0.00 2019-nCoV 13801 13861 (16612) MERS-CoV 13734 13794 (16239) + 13 11.54 3.85 0.00 2019-nCoV 13939 13964 (16509) MERS-CoV 13869 13895 (16138) + 10 0.00 0.00 0.00 2019-nCoV 13940 13952 (16521) MERS-CoV 14032 14044 (15989) + 40 21.74 0.00 0.72 2019-nCoV 13985 14122 (16351) MERS-CoV 13916 14052 (15981) + 173 28.11 0.96 0.96 2019-nCoV 14298 17943 (12530) MERS-CoV 14228 17873 (12160) + 14 12.50 0.00 0.00 2019-nCoV 18064 18087 (12386) MERS-CoV 17985 18008 (12025) + 11 17.39 0.00 0.00 2019-nCoV 18122 18144 (12329) MERS-CoV 18043 18065 (11968) + 22 28.39 0.00 0.00 2019-nCoV 18256 18410 (12063) MERS-CoV 18177 18331 (11702) + 10 16.67 0.00 0.00 2019-nCoV 18491 18514 (11959) MERS-CoV 18412 18435 (11598) + 15 14.81 0.00 0.00 2019-nCoV 18611 18637 (11836) MERS-CoV 18532 18558 (11475) + 17 24.36 0.00 0.00 2019-nCoV 18723 18800 (11673) MERS-CoV 18644 18721 (11312) + 16 14.29 0.00 0.00 2019-nCoV 18871 18898 (11575) MERS-CoV 18792 18819 (11214) + 10 12.50 0.00 0.00 2019-nCoV 19049 19064 (11409) MERS-CoV 18970 18985 (11048) + 12 0.00 0.00 0.00 2019-nCoV 19108 19119 (11354) MERS-CoV 19029 19040 (10993) + 53 22.95 2.32 1.47 2019-nCoV 19199 19673 (10800) MERS-CoV 19108 19586 (10447) + 10 7.69 0.00 0.00 2019-nCoV 19692 19704 (10769) MERS-CoV 19604 19616 (10417) + 20 20.69 0.00 0.00 2019-nCoV 19761 19818 (10655) MERS-CoV 19673 19730 (10303) + 12 5.88 0.00 0.00 2019-nCoV 19926 19942 (10531) MERS-CoV 19838 19854 (10179) + 18 27.22 0.00 0.00 2019-nCoV 20507 20664 (9809) MERS-CoV 20410 20567 (9466) + 59 27.90 0.20 0.20 2019-nCoV 20798 21288 (9185) MERS-CoV 20701 21191 (8842) + 15 20.00 0.00 0.00 2019-nCoV 21329 21373 (9100) MERS-CoV 21232 21276 (8757) + 20 7.14 0.00 0.00 2019-nCoV 21543 21570 (8903) MERS-CoV 21446 21473 (8560) + 16 5.00 0.00 0.00 2019-nCoV 23922 23941 (6532) C MERS-CoV (18876) 11157 11138 + 18 23.68 0.00 0.00 2019-nCoV 24287 24362 (6111) MERS-CoV 24361 24436 (5597) + 11 11.11 0.00 0.00 2019-nCoV 24341 24358 (6115) C MERS-CoV (19291) 10742 10725 + 11 21.88 0.00 0.00 2019-nCoV 24421 24452 (6021) MERS-CoV 24495 24526 (5507) + 10 6.67 0.00 0.00 2019-nCoV 24485 24499 (5974) MERS-CoV 24559 24573 (5460) + 27 9.62 1.92 1.92 2019-nCoV 25209 25260 (5213) MERS-CoV 25310 25361 (4672) + 18 8.33 0.00 0.00 2019-nCoV 26865 26888 (3585) MERS-CoV 28151 28174 (1859) + 17 20.00 0.00 0.00 2019-nCoV 28424 28468 (2005) MERS-CoV 28648 28692 (1341) + 18 19.05 0.00 0.00 2019-nCoV 28604 28645 (1828) MERS-CoV 28825 28866 (1167) + 10 0.00 0.00 0.00 2019-nCoV 28638 28647 (1826) C MERS-CoV (23604) 6429 6420 + 15 9.52 0.00 0.00 2019-nCoV 29026 29046 (1427) MERS-CoV 29253 29273 (760) + 12 6.67 0.00 0.00 2019-nCoV 29189 29203 (1270) MERS-CoV 29416 29430 (603) + 11 15.00 0.00 0.00 2019-nCoV 29617 29636 (837) MERS-CoV 29848 29867 (166) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: + 173 1 1 0.016 0 0 0.000 0 0 0.000 0 0 0.000 + 59 1 2 0.032 0 0 0.000 0 0 0.000 0 0 0.000 + 53 1 3 0.048 0 0 0.000 0 0 0.000 0 0 0.000 + 40 1 4 0.065 0 0 0.000 0 0 0.000 0 0 0.000 + 27 2 6 0.097 0 0 0.000 0 0 0.000 0 0 0.000 + 26 1 7 0.113 0 0 0.000 0 0 0.000 0 0 0.000 + 23 1 8 0.129 0 0 0.000 0 0 0.000 0 0 0.000 + 22 1 9 0.145 0 0 0.000 0 0 0.000 0 0 0.000 + 20 3 12 0.194 0 0 0.000 0 0 0.000 0 0 0.000 + 18 4 16 0.258 0 0 0.000 0 0 0.000 0 0 0.000 + 17 3 19 0.306 0 0 0.000 0 0 0.000 0 0 0.000 + 16 2 21 0.339 0 0 0.000 0 0 0.000 0 0 0.000 + 15 5 26 0.419 0 0 0.000 0 0 0.000 0 0 0.000 + 14 2 28 0.452 0 0 0.000 0 0 0.000 0 0 0.000 + 13 2 30 0.484 0 0 0.000 0 0 0.000 0 0 0.000 + 12 9 39 0.629 0 0 0.000 0 0 0.000 0 0 0.000 + 11 10 49 0.790 0 0 0.000 0 0 0.000 0 0 0.000 + 10 13 62 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 62 62 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) diff --git a/v1.2.4/test-hive/2019-nCoV.fa_vs_SARS-CoV.fa.paf b/v1.2.4/test-hive/2019-nCoV.fa_vs_SARS-CoV.fa.paf new file mode 100644 index 0000000..8a19aa5 --- /dev/null +++ b/v1.2.4/test-hive/2019-nCoV.fa_vs_SARS-CoV.fa.paf @@ -0,0 +1 @@ +2019-nCoV 30473 62 29870 + SARS-CoV 29751 43 29711 3907 29837 60 tp:A:P cm:i:428 s1:i:3872 s2:i:0 dv:f:0.1712 rl:i:0 diff --git a/v1.2.4/test-hive/2019-nCoV.fa_vs_SARS-CoV.fa.rep b/v1.2.4/test-hive/2019-nCoV.fa_vs_SARS-CoV.fa.rep new file mode 100644 index 0000000..d7b31ac --- /dev/null +++ b/v1.2.4/test-hive/2019-nCoV.fa_vs_SARS-CoV.fa.rep @@ -0,0 +1,98 @@ +cross_match 2019-nCoV.fa SARS-CoV.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 200126:080543 +Query file(s): 2019-nCoV.fa +Subject file(s): SARS-CoV.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: 2019-nCoV.fa 1 entries +Residue counts: + A 9115 + C 5605 + G 5951 + T 9802 +Total 30473 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 60949 seqs, 11 ids, 2 descrips + +NO QUALITY FILE 2019-nCoV.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 18 +Maximal single base matches (low complexity regions): + + 23 0.00 0.00 0.00 2019-nCoV 4 28 (30445) SARS-CoV 29193 29217 (534) + 660 22.96 1.11 0.95 2019-nCoV 29 3173 (27300) SARS-CoV 13 3162 (26589) + 57 24.55 1.03 1.03 2019-nCoV 3363 3749 (26724) SARS-CoV 3280 3666 (26085) + 23 16.98 0.00 0.00 2019-nCoV 3972 4024 (26449) SARS-CoV 3883 3935 (25816) +7577 17.13 0.29 0.28 2019-nCoV 4120 21589 (8884) SARS-CoV 4031 21502 (8249) + 10 7.69 0.00 0.00 2019-nCoV 12326 12338 (18135) SARS-CoV 12153 12165 (17586) + 10 20.69 0.00 0.00 2019-nCoV 21684 21712 (8761) SARS-CoV 21609 21637 (8114) + 12 24.14 0.00 0.00 2019-nCoV 21724 21781 (8692) SARS-CoV 21649 21706 (8045) + 30 25.33 0.00 0.00 2019-nCoV 21812 21961 (8512) SARS-CoV 21716 21865 (7886) + 10 17.14 2.86 0.00 2019-nCoV 22070 22104 (8369) SARS-CoV 21962 21997 (7754) + 12 14.89 2.13 2.13 2019-nCoV 22128 22174 (8299) SARS-CoV 22020 22066 (7685) + 11 18.52 0.00 0.00 2019-nCoV 22203 22229 (8244) SARS-CoV 22095 22121 (7630) + 10 0.00 0.00 0.00 2019-nCoV 22272 22282 (8191) SARS-CoV 22164 22174 (7577) + 11 16.67 0.00 0.00 2019-nCoV 22362 22385 (8088) SARS-CoV 22236 22259 (7492) + 19 15.38 0.00 0.00 2019-nCoV 22413 22451 (8022) SARS-CoV 22287 22325 (7426) +2776 17.22 0.81 1.26 2019-nCoV 22555 29875 (598) SARS-CoV 22429 29716 (35) + 10 0.00 0.00 0.00 2019-nCoV 27233 27242 (3231) SARS-CoV 27133 27142 (2609) + 10 7.69 0.00 0.00 2019-nCoV 27884 27896 (2577) SARS-CoV 27779 27791 (1960) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 2 2 0.111 0 0 0.000 0 0 0.000 0 0 0.000 + 660 1 3 0.167 0 0 0.000 0 0 0.000 0 0 0.000 + 57 1 4 0.222 0 0 0.000 0 0 0.000 0 0 0.000 + 30 1 5 0.278 0 0 0.000 0 0 0.000 0 0 0.000 + 23 2 7 0.389 0 0 0.000 0 0 0.000 0 0 0.000 + 19 1 8 0.444 0 0 0.000 0 0 0.000 0 0 0.000 + 12 2 10 0.556 0 0 0.000 0 0 0.000 0 0 0.000 + 11 2 12 0.667 0 0 0.000 0 0 0.000 0 0 0.000 + 10 6 18 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 16 16 0.889 0 0 0.000 0 0 0.000 0 0 0.000 + 28 1 17 0.944 0 0 0.000 0 0 0.000 0 0 0.000 + 3 1 18 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) diff --git a/v1.2.4/test-hive/2019-nCoV.txt b/v1.2.4/test-hive/2019-nCoV.txt new file mode 100644 index 0000000..89ccf19 --- /dev/null +++ b/v1.2.4/test-hive/2019-nCoV.txt @@ -0,0 +1 @@ +2019-nCoV:30473 diff --git a/v1.2.4/test-hive/MERS-CoV.fa_vs_SARS-CoV.fa.paf b/v1.2.4/test-hive/MERS-CoV.fa_vs_SARS-CoV.fa.paf new file mode 100644 index 0000000..e69de29 diff --git a/v1.2.4/test-hive/MERS-CoV.fa_vs_SARS-CoV.fa.rep b/v1.2.4/test-hive/MERS-CoV.fa_vs_SARS-CoV.fa.rep new file mode 100644 index 0000000..6d0fc5c --- /dev/null +++ b/v1.2.4/test-hive/MERS-CoV.fa_vs_SARS-CoV.fa.rep @@ -0,0 +1,185 @@ +cross_match MERS-CoV.fa SARS-CoV.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 200126:080603 +Query file(s): MERS-CoV.fa +Subject file(s): SARS-CoV.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: MERS-CoV.fa 1 entries +Residue counts: + A 7862 + C 6074 + G 6280 + T 9817 +Total 30033 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 60069 seqs, 10 ids, 2 descrips + +NO QUALITY FILE MERS-CoV.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 92 +Maximal single base matches (low complexity regions): + + 12 21.05 0.00 0.00 MERS-CoV 21 58 (29975) SARS-CoV 37 74 (29677) + 13 0.00 0.00 0.00 MERS-CoV 217 229 (29804) SARS-CoV 195 207 (29544) + 23 17.02 0.00 0.00 MERS-CoV 3683 3729 (26304) SARS-CoV 3373 3419 (26332) + 22 6.25 3.12 0.00 MERS-CoV 3951 3982 (26051) SARS-CoV 3640 3672 (26079) + 10 0.00 0.00 0.00 MERS-CoV 4466 4475 (25558) SARS-CoV 4261 4270 (25481) + 10 0.00 0.00 0.00 MERS-CoV 4565 4574 (25459) SARS-CoV 4601 4610 (25141) + 10 0.00 0.00 0.00 MERS-CoV 6258 6267 (23766) SARS-CoV 6005 6014 (23737) + 10 7.69 0.00 0.00 MERS-CoV 7138 7150 (22883) SARS-CoV 7179 7191 (22560) + 15 11.54 0.00 0.00 MERS-CoV 7505 7530 (22503) SARS-CoV 7540 7565 (22186) + 11 0.00 0.00 0.00 MERS-CoV 7998 8009 (22024) SARS-CoV 8012 8023 (21728) + 10 7.14 0.00 0.00 MERS-CoV 8249 8262 (21771) SARS-CoV 8269 8282 (21469) + 10 7.14 0.00 0.00 MERS-CoV 8886 8899 (21134) SARS-CoV 8879 8892 (20859) + 14 20.51 0.00 0.00 MERS-CoV 8938 8976 (21057) SARS-CoV 8931 8969 (20782) + 10 0.00 0.00 0.00 MERS-CoV 9448 9460 (20573) SARS-CoV 9423 9435 (20316) + 11 10.00 0.00 0.00 MERS-CoV 9542 9561 (20472) SARS-CoV 9532 9551 (20200) + 12 10.53 0.00 0.00 MERS-CoV 9838 9856 (20177) SARS-CoV 9828 9846 (19905) + 11 0.00 0.00 0.00 MERS-CoV 9881 9891 (20142) SARS-CoV 9871 9881 (19870) + 13 9.52 0.00 0.00 MERS-CoV 9943 9963 (20070) SARS-CoV 9933 9953 (19798) + 12 26.32 0.00 0.00 MERS-CoV 10070 10126 (19907) SARS-CoV 10060 10116 (19635) + 13 13.33 0.00 3.33 MERS-CoV 10223 10252 (19781) SARS-CoV 10205 10233 (19518) + 14 5.88 0.00 0.00 MERS-CoV 10347 10363 (19670) SARS-CoV 10328 10344 (19407) + 12 18.60 0.00 0.00 MERS-CoV 10409 10451 (19582) SARS-CoV 10390 10432 (19319) + 15 14.29 0.00 0.00 MERS-CoV 10477 10504 (19529) SARS-CoV 10458 10485 (19266) + 20 15.79 0.00 0.00 MERS-CoV 10835 10872 (19161) SARS-CoV 10825 10862 (18889) + 14 18.75 0.00 0.00 MERS-CoV 11636 11667 (18366) SARS-CoV 11620 11651 (18100) + 43 20.35 0.00 0.00 MERS-CoV 11732 11844 (18189) SARS-CoV 11716 11828 (17923) + 14 17.24 0.00 0.00 MERS-CoV 11912 11940 (18093) SARS-CoV 11896 11924 (17827) + 29 25.00 0.00 0.00 MERS-CoV 12224 12351 (17682) SARS-CoV 12205 12332 (17419) + 10 7.69 0.00 0.00 MERS-CoV 12780 12792 (17241) SARS-CoV 12764 12776 (16975) + 11 0.00 0.00 0.00 MERS-CoV 13109 13119 (16914) SARS-CoV 13099 13109 (16642) + 11 0.00 0.00 0.00 MERS-CoV 13111 13121 (16912) SARS-CoV 12795 12805 (16946) + 13 21.05 0.00 0.00 MERS-CoV 13157 13194 (16839) SARS-CoV 13147 13184 (16567) + 12 14.29 0.00 0.00 MERS-CoV 13244 13264 (16769) SARS-CoV 13234 13254 (16497) + 10 12.50 0.00 0.00 MERS-CoV 13279 13294 (16739) SARS-CoV 13269 13284 (16467) + 10 7.69 0.00 0.00 MERS-CoV 13422 13434 (16599) SARS-CoV 13180 13192 (16559) + 18 16.67 0.00 0.00 MERS-CoV 13465 13500 (16533) SARS-CoV 13452 13487 (16264) + 10 11.76 0.00 0.00 MERS-CoV 13513 13529 (16504) SARS-CoV 13494 13510 (16241) + 26 8.33 0.00 0.00 MERS-CoV 13585 13620 (16413) SARS-CoV 13566 13601 (16150) + 16 14.29 0.00 0.00 MERS-CoV 13734 13761 (16272) SARS-CoV 13715 13742 (16009) + 10 0.00 0.00 0.00 MERS-CoV 13836 13846 (16187) SARS-CoV 13820 13830 (15921) + 15 17.50 2.50 0.00 MERS-CoV 13916 13955 (16078) SARS-CoV 13899 13939 (15812) + 27 16.67 0.00 0.00 MERS-CoV 14002 14055 (15978) SARS-CoV 13986 14039 (15712) + 14 14.63 2.44 2.44 MERS-CoV 14173 14213 (15820) SARS-CoV 14157 14197 (15554) + 17 25.81 0.00 0.00 MERS-CoV 14245 14337 (15696) SARS-CoV 14229 14321 (15430) + 15 22.45 0.00 0.00 MERS-CoV 14360 14408 (15625) SARS-CoV 14344 14392 (15359) + 16 12.00 0.00 0.00 MERS-CoV 14504 14528 (15505) SARS-CoV 14488 14512 (15239) + 10 7.69 0.00 0.00 MERS-CoV 14581 14593 (15440) SARS-CoV 14565 14577 (15174) + 128 26.83 0.00 0.00 MERS-CoV 14603 15325 (14708) SARS-CoV 14587 15309 (14442) + 29 11.36 0.00 0.00 MERS-CoV 15432 15475 (14558) SARS-CoV 15416 15459 (14292) + 61 27.25 0.00 0.00 MERS-CoV 15636 16002 (14031) SARS-CoV 15620 15986 (13765) + 11 23.08 0.00 0.00 MERS-CoV 16069 16107 (13926) SARS-CoV 16053 16091 (13660) + 43 26.05 0.00 0.00 MERS-CoV 16177 16391 (13642) SARS-CoV 16161 16375 (13376) + 19 17.78 0.00 0.00 MERS-CoV 16419 16463 (13570) SARS-CoV 16403 16447 (13304) + 17 28.80 0.00 0.00 MERS-CoV 16516 16640 (13393) SARS-CoV 16500 16624 (13127) + 36 20.79 0.00 0.00 MERS-CoV 16681 16781 (13252) SARS-CoV 16665 16765 (12986) + 10 11.11 0.00 0.00 MERS-CoV 16791 16808 (13225) SARS-CoV 16775 16792 (12959) + 17 13.79 0.00 0.00 MERS-CoV 16918 16946 (13087) SARS-CoV 16902 16930 (12821) + 39 28.55 0.88 0.88 MERS-CoV 17026 17820 (12213) SARS-CoV 17010 17804 (11947) + 14 5.56 0.00 0.00 MERS-CoV 17985 18002 (12031) SARS-CoV 17978 17995 (11756) + 12 13.04 0.00 0.00 MERS-CoV 18232 18254 (11779) SARS-CoV 18225 18247 (11504) + 16 17.14 0.00 0.00 MERS-CoV 18292 18326 (11707) SARS-CoV 18285 18319 (11432) + 20 4.35 0.00 0.00 MERS-CoV 18394 18416 (11617) SARS-CoV 18387 18409 (11342) + 14 5.56 0.00 0.00 MERS-CoV 18531 18548 (11485) SARS-CoV 18524 18541 (11210) + 29 19.74 0.00 0.00 MERS-CoV 18644 18719 (11314) SARS-CoV 18637 18712 (11039) + 16 19.51 0.00 0.00 MERS-CoV 18778 18818 (11215) SARS-CoV 18771 18811 (10940) + 11 9.09 0.00 4.55 MERS-CoV 18965 18986 (11047) SARS-CoV 18958 18978 (10773) + 12 19.35 0.00 0.00 MERS-CoV 19097 19127 (10906) SARS-CoV 19102 19132 (10619) + 50 23.90 0.63 1.57 MERS-CoV 19147 19464 (10569) SARS-CoV 19152 19466 (10285) + 14 25.29 0.00 0.00 MERS-CoV 19507 19593 (10440) SARS-CoV 19509 19595 (10156) + 10 26.32 0.00 0.00 MERS-CoV 19675 19731 (10302) SARS-CoV 19677 19733 (10018) + 13 20.59 0.00 0.00 MERS-CoV 20226 20259 (9774) SARS-CoV 20237 20270 (9481) + 11 0.00 0.00 0.00 MERS-CoV 20275 20285 (9748) SARS-CoV 20286 20296 (9455) + 11 21.05 0.00 0.00 MERS-CoV 20410 20447 (9586) SARS-CoV 20421 20458 (9293) + 13 10.53 0.00 0.00 MERS-CoV 20491 20509 (9524) SARS-CoV 20502 20520 (9231) + 29 5.71 0.00 0.00 MERS-CoV 20533 20567 (9466) SARS-CoV 20544 20578 (9173) + 22 28.11 1.02 1.02 MERS-CoV 20701 21191 (8842) SARS-CoV 20712 21202 (8549) + 11 19.23 0.00 0.00 MERS-CoV 21247 21272 (8761) SARS-CoV 21258 21283 (8468) + 15 13.79 0.00 0.00 MERS-CoV 21445 21473 (8560) SARS-CoV 21456 21484 (8267) + 10 0.00 0.00 0.00 MERS-CoV 22110 22119 (7914) SARS-CoV 21736 21745 (8006) + 10 10.53 0.00 0.00 MERS-CoV 24097 24115 (5918) SARS-CoV 23888 23906 (5845) + 10 7.14 0.00 0.00 MERS-CoV 24194 24207 (5826) SARS-CoV 23979 23992 (5759) + 16 22.64 0.00 0.00 MERS-CoV 24362 24414 (5619) SARS-CoV 24147 24199 (5552) + 10 0.00 0.00 0.00 MERS-CoV 24449 24458 (5575) SARS-CoV 24132 24141 (5610) + 10 21.21 0.00 0.00 MERS-CoV 25121 25153 (4880) SARS-CoV 24879 24911 (4840) + 22 16.67 0.00 0.00 MERS-CoV 25310 25357 (4676) SARS-CoV 25068 25115 (4636) + 16 0.00 0.00 0.00 MERS-CoV 27772 27788 (2245) C SARS-CoV (8130) 21621 21605 + 11 18.52 0.00 0.00 MERS-CoV 28648 28674 (1359) SARS-CoV 28257 28283 (1468) + 23 24.10 0.00 0.00 MERS-CoV 28826 28908 (1125) SARS-CoV 28438 28520 (1231) + 15 9.52 0.00 0.00 MERS-CoV 29253 29273 (760) SARS-CoV 28859 28879 (872) + 12 6.67 0.00 0.00 MERS-CoV 29416 29430 (603) SARS-CoV 29022 29036 (715) + 10 0.00 0.00 0.00 MERS-CoV 29583 29592 (441) SARS-CoV 29358 29367 (384) + 11 15.00 0.00 0.00 MERS-CoV 29848 29867 (166) SARS-CoV 29458 29477 (274) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: + 128 1 1 0.011 0 0 0.000 0 0 0.000 0 0 0.000 + 61 1 2 0.022 0 0 0.000 0 0 0.000 0 0 0.000 + 50 1 3 0.033 0 0 0.000 0 0 0.000 0 0 0.000 + 43 2 5 0.054 0 0 0.000 0 0 0.000 0 0 0.000 + 39 1 6 0.065 0 0 0.000 0 0 0.000 0 0 0.000 + 36 1 7 0.076 0 0 0.000 0 0 0.000 0 0 0.000 + 29 4 11 0.120 0 0 0.000 0 0 0.000 0 0 0.000 + 27 1 12 0.130 0 0 0.000 0 0 0.000 0 0 0.000 + 26 1 13 0.141 0 0 0.000 0 0 0.000 0 0 0.000 + 23 2 15 0.163 0 0 0.000 0 0 0.000 0 0 0.000 + 22 3 18 0.196 0 0 0.000 0 0 0.000 0 0 0.000 + 20 2 20 0.217 0 0 0.000 0 0 0.000 0 0 0.000 + 19 1 21 0.228 0 0 0.000 0 0 0.000 0 0 0.000 + 18 1 22 0.239 0 0 0.000 0 0 0.000 0 0 0.000 + 17 3 25 0.272 0 0 0.000 0 0 0.000 0 0 0.000 + 16 6 31 0.337 0 0 0.000 0 0 0.000 0 0 0.000 + 15 6 37 0.402 0 0 0.000 0 0 0.000 0 0 0.000 + 14 8 45 0.489 0 0 0.000 0 0 0.000 0 0 0.000 + 13 6 51 0.554 0 0 0.000 0 0 0.000 0 0 0.000 + 12 8 59 0.641 0 0 0.000 0 0 0.000 0 0 0.000 + 11 12 71 0.772 0 0 0.000 0 0 0.000 0 0 0.000 + 10 21 92 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 90 90 0.978 0 0 0.000 0 0 0.000 0 0 0.000 + 216 1 91 0.989 0 0 0.000 0 0 0.000 0 0 0.000 + 20 1 92 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) diff --git a/v1.2.4/test-hive/MERS-CoV.txt b/v1.2.4/test-hive/MERS-CoV.txt new file mode 100644 index 0000000..5790031 --- /dev/null +++ b/v1.2.4/test-hive/MERS-CoV.txt @@ -0,0 +1 @@ +MERS-CoV:30033 diff --git a/v1.2.4/test-hive/SARS-CoV.txt b/v1.2.4/test-hive/SARS-CoV.txt new file mode 100644 index 0000000..fd6e781 --- /dev/null +++ b/v1.2.4/test-hive/SARS-CoV.txt @@ -0,0 +1 @@ +SARS-CoV:29751 diff --git a/v1.2.4/test-hive/SARScds.gff b/v1.2.4/test-hive/SARScds.gff new file mode 100644 index 0000000..a9ef093 --- /dev/null +++ b/v1.2.4/test-hive/SARScds.gff @@ -0,0 +1,14 @@ +SARS-CoV RefSeq CDS 265 13398 . + 0 ID=cds-NP_828849.2;Parent=gene-sars1;Dbxref=Genbank:NP_828849.2,GeneID:1489680;Name=NP_828849.2;Note=-1 frameshift;exception=ribosomal slippage;gbkey=CDS;gene=orf1ab;locus_tag=sars1;product=orf1ab polyprotein (pp1ab);protein_id=NP_828849.2 cyan +SARS-CoV RefSeq CDS 13398 21485 . + 0 ID=cds-NP_828849.2;Parent=gene-sars1;Dbxref=Genbank:NP_828849.2,GeneID:1489680;Name=NP_828849.2;Note=-1 frameshift;exception=ribosomal slippage;gbkey=CDS;gene=orf1ab;locus_tag=sars1;product=orf1ab polyprotein (pp1ab);protein_id=NP_828849.2 red +SARS-CoV RefSeq CDS 21492 25259 . + 0 ID=cds-NP_828851.1;Parent=gene-sars2;Dbxref=Genbank:NP_828851.1,GeneID:1489668;Name=NP_828851.1;Note=As established by Krokhin et al. (2003)%2C the glycosylated spike protein (as well as the nucleocapsid protein) can be detected in infected cell culture supernatants with antisera from SARS patients.;gbkey=CDS;gene=S;locus_tag=sars2;product=E2 glycoprotein precursor;protein_id=NP_828851.1 red +SARS-CoV RefSeq CDS 25268 26092 . + 0 ID=cds-NP_828852.2;Parent=gene-sars3a;Dbxref=Genbank:NP_828852.2,GeneID:1489669;Name=NP_828852.2;gbkey=CDS;locus_tag=sars3a;product=hypothetical protein sars3a;protein_id=NP_828852.2 cyan +SARS-CoV RefSeq CDS 25689 26153 . + 0 ID=cds-NP_828853.1;Parent=gene-sars3b;Dbxref=Genbank:NP_828853.1,GeneID:1489670;Name=NP_828853.1;gbkey=CDS;locus_tag=sars3b;product=hypothetical protein sars3b;protein_id=NP_828853.1 red +SARS-CoV RefSeq CDS 26117 26347 . + 0 ID=cds-NP_828854.1;Parent=gene-sars4;Dbxref=Genbank:NP_828854.1,GeneID:1489671;Name=NP_828854.1;Note=E. coli expression reported by Shen et al. (2003);gbkey=CDS;gene=E;locus_tag=sars4;product=protein E;protein_id=NP_828854.1 cyan +SARS-CoV RefSeq CDS 26398 27063 . + 0 ID=cds-NP_828855.1;Parent=gene-sars5;Dbxref=Genbank:NP_828855.1,GeneID:1489672;Name=NP_828855.1;Note=E. coli expression reported by Zhang et al. (2003);gbkey=CDS;gene=M;locus_tag=sars5;product=matrix protein;protein_id=NP_828855.1 red +SARS-CoV RefSeq CDS 27074 27265 . + 0 ID=cds-NP_828856.1;Parent=gene-sars6;Dbxref=Genbank:NP_828856.1,GeneID:1489673;Name=NP_828856.1;gbkey=CDS;locus_tag=sars6;product=hypothetical protein sars6;protein_id=NP_828856.1 cyan +SARS-CoV RefSeq CDS 27273 27641 . + 0 ID=cds-NP_828857.1;Parent=gene-sars7a;Dbxref=Genbank:NP_828857.1,GeneID:1489674;Name=NP_828857.1;gbkey=CDS;locus_tag=sars7a;product=hypothetical protein sars7a;protein_id=NP_828857.1 red +SARS-CoV RefSeq CDS 27638 27772 . + 0 ID=cds-NP_849175.1;Parent=gene-sars7b;Dbxref=Genbank:NP_849175.1,GeneID:1489675;Name=NP_849175.1;gbkey=CDS;locus_tag=sars7b;product=hypothetical protein sars7b;protein_id=NP_849175.1 cyan +SARS-CoV RefSeq CDS 27779 27898 . + 0 ID=cds-NP_849176.1;Parent=gene-sars8a;Dbxref=Genbank:NP_849176.1,GeneID:1489676;Name=NP_849176.1;gbkey=CDS;locus_tag=sars8a;product=hypothetical protein sars8a;protein_id=NP_849176.1 red +SARS-CoV RefSeq CDS 27864 28118 . + 0 ID=cds-NP_849177.1;Parent=gene-sars8b;Dbxref=Genbank:NP_849177.1,GeneID:1489677;Name=NP_849177.1;gbkey=CDS;locus_tag=sars8b;product=hypothetical protein sars8b;protein_id=NP_849177.1 cyan +SARS-CoV RefSeq CDS 28120 29388 . + 0 ID=cds-NP_828858.1;Parent=gene-sars9a;Dbxref=Genbank:NP_828858.1,GeneID:1489678;Name=NP_828858.1;Note=As established by Krokhin et al. (2003)%2C the N-terminal methionine is removed%2C all other methionines are oxidized%2C and the resulting N-terminal serine is acetylated;experiment=inhibits the activity of cyclin-CDK complex and blocks S phase progression in mammalian cells;gbkey=CDS;gene=N;locus_tag=sars9a;product=nucleocapsid protein;protein_id=NP_828858.1 red +SARS-CoV RefSeq CDS 28130 28426 . + 0 ID=cds-NP_828859.1;Parent=gene-sars9b;Dbxref=Genbank:NP_828859.1,GeneID:1489679;Name=NP_828859.1;gbkey=CDS;locus_tag=sars9b;product=hypothetical protein sars9b;protein_id=NP_828859.1 cyan diff --git a/v1.2.4/test-hive/all.paf b/v1.2.4/test-hive/all.paf new file mode 100644 index 0000000..c97e409 --- /dev/null +++ b/v1.2.4/test-hive/all.paf @@ -0,0 +1,2 @@ +2019-nCoV 30473 13996 20902 + MERS-CoV 30033 13926 20805 60 6906 16 tp:A:P cm:i:4 s1:i:54 s2:i:0 dv:f:0.3855 rl:i:0 +2019-nCoV 30473 62 29870 + SARS-CoV 29751 43 29711 3907 29837 60 tp:A:P cm:i:428 s1:i:3872 s2:i:0 dv:f:0.1712 rl:i:0 diff --git a/v1.2.4/test-hive/all.paf.success.svg b/v1.2.4/test-hive/all.paf.success.svg new file mode 100644 index 0000000..606a0cb --- /dev/null +++ b/v1.2.4/test-hive/all.paf.success.svg @@ -0,0 +1,35 @@ + + + + +2019-nCoV + +SARS-CoV + +MERS-CoV + + +Sequence identity (%) +Forward | Reverse + +100 + + +90+ + + +80+ + + +70+ + + +60+ + + +50+ + + +0-49 + + \ No newline at end of file diff --git a/v1.2.4/test-hive/all.rep b/v1.2.4/test-hive/all.rep new file mode 100644 index 0000000..6979f8a --- /dev/null +++ b/v1.2.4/test-hive/all.rep @@ -0,0 +1,432 @@ +cross_match 2019-nCoV.fa MERS-CoV.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 200126:080632 +Query file(s): 2019-nCoV.fa +Subject file(s): MERS-CoV.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: 2019-nCoV.fa 1 entries +Residue counts: + A 9115 + C 5605 + G 5951 + T 9802 +Total 30473 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 60949 seqs, 11 ids, 2 descrips + +NO QUALITY FILE 2019-nCoV.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 62 +Maximal single base matches (low complexity regions): + + 11 7.14 0.00 0.00 2019-nCoV 1958 1971 (28502) MERS-CoV 2031 2044 (27989) + 10 15.79 0.00 0.00 2019-nCoV 3627 3645 (26828) MERS-CoV 3854 3872 (26161) + 12 8.70 0.00 0.00 2019-nCoV 7626 7648 (22825) MERS-CoV 7505 7527 (22506) + 10 0.00 0.00 0.00 2019-nCoV 8012 8022 (22451) MERS-CoV 8104 8114 (21919) + 11 15.38 0.00 0.00 2019-nCoV 8045 8070 (22403) MERS-CoV 7936 7961 (22072) + 10 12.50 0.00 0.00 2019-nCoV 8264 8279 (22194) MERS-CoV 8158 8173 (21860) + 11 10.00 0.00 0.00 2019-nCoV 9618 9637 (20836) MERS-CoV 9542 9561 (20472) + 26 23.33 1.33 1.33 2019-nCoV 10019 10168 (20305) MERS-CoV 9943 10092 (19941) + 10 0.00 0.00 0.00 2019-nCoV 10224 10234 (20239) MERS-CoV 10148 10158 (19875) + 10 0.00 0.00 0.00 2019-nCoV 10329 10338 (20135) MERS-CoV 11615 11624 (18409) + 11 14.29 0.00 0.00 2019-nCoV 10410 10430 (20043) MERS-CoV 10343 10363 (19670) + 23 14.89 0.00 0.00 2019-nCoV 10472 10518 (19955) MERS-CoV 10405 10451 (19582) + 15 12.00 0.00 0.00 2019-nCoV 10544 10568 (19905) MERS-CoV 10477 10501 (19532) + 12 15.38 0.00 0.00 2019-nCoV 10674 10699 (19774) MERS-CoV 10607 10632 (19401) + 15 5.56 0.00 0.00 2019-nCoV 10932 10949 (19524) MERS-CoV 10856 10873 (19160) + 10 12.50 0.00 0.00 2019-nCoV 11730 11745 (18728) MERS-CoV 11660 11675 (18358) + 17 25.35 0.00 0.00 2019-nCoV 11817 11887 (18586) MERS-CoV 11747 11817 (18216) + 27 23.53 0.00 0.00 2019-nCoV 12276 12377 (18096) MERS-CoV 12209 12310 (17723) + 10 0.00 0.00 6.67 2019-nCoV 12491 12505 (17968) MERS-CoV 12374 12387 (17646) + 12 11.11 0.00 0.00 2019-nCoV 12642 12659 (17814) MERS-CoV 12572 12589 (17444) + 13 18.75 0.00 0.00 2019-nCoV 12679 12710 (17763) MERS-CoV 12612 12643 (17390) + 12 0.00 0.00 0.00 2019-nCoV 13158 13172 (17301) MERS-CoV 13082 13096 (16937) + 14 20.97 1.61 1.61 2019-nCoV 13230 13291 (17182) MERS-CoV 13154 13215 (16818) + 11 14.29 0.00 0.00 2019-nCoV 13320 13340 (17133) MERS-CoV 13244 13264 (16769) + 11 6.25 0.00 0.00 2019-nCoV 13355 13370 (17103) MERS-CoV 13279 13294 (16739) + 20 14.29 0.00 0.00 2019-nCoV 13538 13572 (16901) MERS-CoV 13465 13499 (16534) + 12 14.29 0.00 0.00 2019-nCoV 13664 13684 (16789) MERS-CoV 13597 13617 (16416) + 12 26.23 0.00 0.00 2019-nCoV 13801 13861 (16612) MERS-CoV 13734 13794 (16239) + 13 11.54 3.85 0.00 2019-nCoV 13939 13964 (16509) MERS-CoV 13869 13895 (16138) + 10 0.00 0.00 0.00 2019-nCoV 13940 13952 (16521) MERS-CoV 14032 14044 (15989) + 40 21.74 0.00 0.72 2019-nCoV 13985 14122 (16351) MERS-CoV 13916 14052 (15981) + 173 28.11 0.96 0.96 2019-nCoV 14298 17943 (12530) MERS-CoV 14228 17873 (12160) + 14 12.50 0.00 0.00 2019-nCoV 18064 18087 (12386) MERS-CoV 17985 18008 (12025) + 11 17.39 0.00 0.00 2019-nCoV 18122 18144 (12329) MERS-CoV 18043 18065 (11968) + 22 28.39 0.00 0.00 2019-nCoV 18256 18410 (12063) MERS-CoV 18177 18331 (11702) + 10 16.67 0.00 0.00 2019-nCoV 18491 18514 (11959) MERS-CoV 18412 18435 (11598) + 15 14.81 0.00 0.00 2019-nCoV 18611 18637 (11836) MERS-CoV 18532 18558 (11475) + 17 24.36 0.00 0.00 2019-nCoV 18723 18800 (11673) MERS-CoV 18644 18721 (11312) + 16 14.29 0.00 0.00 2019-nCoV 18871 18898 (11575) MERS-CoV 18792 18819 (11214) + 10 12.50 0.00 0.00 2019-nCoV 19049 19064 (11409) MERS-CoV 18970 18985 (11048) + 12 0.00 0.00 0.00 2019-nCoV 19108 19119 (11354) MERS-CoV 19029 19040 (10993) + 53 22.95 2.32 1.47 2019-nCoV 19199 19673 (10800) MERS-CoV 19108 19586 (10447) + 10 7.69 0.00 0.00 2019-nCoV 19692 19704 (10769) MERS-CoV 19604 19616 (10417) + 20 20.69 0.00 0.00 2019-nCoV 19761 19818 (10655) MERS-CoV 19673 19730 (10303) + 12 5.88 0.00 0.00 2019-nCoV 19926 19942 (10531) MERS-CoV 19838 19854 (10179) + 18 27.22 0.00 0.00 2019-nCoV 20507 20664 (9809) MERS-CoV 20410 20567 (9466) + 59 27.90 0.20 0.20 2019-nCoV 20798 21288 (9185) MERS-CoV 20701 21191 (8842) + 15 20.00 0.00 0.00 2019-nCoV 21329 21373 (9100) MERS-CoV 21232 21276 (8757) + 20 7.14 0.00 0.00 2019-nCoV 21543 21570 (8903) MERS-CoV 21446 21473 (8560) + 16 5.00 0.00 0.00 2019-nCoV 23922 23941 (6532) C MERS-CoV (18876) 11157 11138 + 18 23.68 0.00 0.00 2019-nCoV 24287 24362 (6111) MERS-CoV 24361 24436 (5597) + 11 11.11 0.00 0.00 2019-nCoV 24341 24358 (6115) C MERS-CoV (19291) 10742 10725 + 11 21.88 0.00 0.00 2019-nCoV 24421 24452 (6021) MERS-CoV 24495 24526 (5507) + 10 6.67 0.00 0.00 2019-nCoV 24485 24499 (5974) MERS-CoV 24559 24573 (5460) + 27 9.62 1.92 1.92 2019-nCoV 25209 25260 (5213) MERS-CoV 25310 25361 (4672) + 18 8.33 0.00 0.00 2019-nCoV 26865 26888 (3585) MERS-CoV 28151 28174 (1859) + 17 20.00 0.00 0.00 2019-nCoV 28424 28468 (2005) MERS-CoV 28648 28692 (1341) + 18 19.05 0.00 0.00 2019-nCoV 28604 28645 (1828) MERS-CoV 28825 28866 (1167) + 10 0.00 0.00 0.00 2019-nCoV 28638 28647 (1826) C MERS-CoV (23604) 6429 6420 + 15 9.52 0.00 0.00 2019-nCoV 29026 29046 (1427) MERS-CoV 29253 29273 (760) + 12 6.67 0.00 0.00 2019-nCoV 29189 29203 (1270) MERS-CoV 29416 29430 (603) + 11 15.00 0.00 0.00 2019-nCoV 29617 29636 (837) MERS-CoV 29848 29867 (166) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: + 173 1 1 0.016 0 0 0.000 0 0 0.000 0 0 0.000 + 59 1 2 0.032 0 0 0.000 0 0 0.000 0 0 0.000 + 53 1 3 0.048 0 0 0.000 0 0 0.000 0 0 0.000 + 40 1 4 0.065 0 0 0.000 0 0 0.000 0 0 0.000 + 27 2 6 0.097 0 0 0.000 0 0 0.000 0 0 0.000 + 26 1 7 0.113 0 0 0.000 0 0 0.000 0 0 0.000 + 23 1 8 0.129 0 0 0.000 0 0 0.000 0 0 0.000 + 22 1 9 0.145 0 0 0.000 0 0 0.000 0 0 0.000 + 20 3 12 0.194 0 0 0.000 0 0 0.000 0 0 0.000 + 18 4 16 0.258 0 0 0.000 0 0 0.000 0 0 0.000 + 17 3 19 0.306 0 0 0.000 0 0 0.000 0 0 0.000 + 16 2 21 0.339 0 0 0.000 0 0 0.000 0 0 0.000 + 15 5 26 0.419 0 0 0.000 0 0 0.000 0 0 0.000 + 14 2 28 0.452 0 0 0.000 0 0 0.000 0 0 0.000 + 13 2 30 0.484 0 0 0.000 0 0 0.000 0 0 0.000 + 12 9 39 0.629 0 0 0.000 0 0 0.000 0 0 0.000 + 11 10 49 0.790 0 0 0.000 0 0 0.000 0 0 0.000 + 10 13 62 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 62 62 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) +cross_match 2019-nCoV.fa SARS-CoV.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 200126:080543 +Query file(s): 2019-nCoV.fa +Subject file(s): SARS-CoV.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: 2019-nCoV.fa 1 entries +Residue counts: + A 9115 + C 5605 + G 5951 + T 9802 +Total 30473 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 60949 seqs, 11 ids, 2 descrips + +NO QUALITY FILE 2019-nCoV.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 18 +Maximal single base matches (low complexity regions): + + 23 0.00 0.00 0.00 2019-nCoV 4 28 (30445) SARS-CoV 29193 29217 (534) + 660 22.96 1.11 0.95 2019-nCoV 29 3173 (27300) SARS-CoV 13 3162 (26589) + 57 24.55 1.03 1.03 2019-nCoV 3363 3749 (26724) SARS-CoV 3280 3666 (26085) + 23 16.98 0.00 0.00 2019-nCoV 3972 4024 (26449) SARS-CoV 3883 3935 (25816) +7577 17.13 0.29 0.28 2019-nCoV 4120 21589 (8884) SARS-CoV 4031 21502 (8249) + 10 7.69 0.00 0.00 2019-nCoV 12326 12338 (18135) SARS-CoV 12153 12165 (17586) + 10 20.69 0.00 0.00 2019-nCoV 21684 21712 (8761) SARS-CoV 21609 21637 (8114) + 12 24.14 0.00 0.00 2019-nCoV 21724 21781 (8692) SARS-CoV 21649 21706 (8045) + 30 25.33 0.00 0.00 2019-nCoV 21812 21961 (8512) SARS-CoV 21716 21865 (7886) + 10 17.14 2.86 0.00 2019-nCoV 22070 22104 (8369) SARS-CoV 21962 21997 (7754) + 12 14.89 2.13 2.13 2019-nCoV 22128 22174 (8299) SARS-CoV 22020 22066 (7685) + 11 18.52 0.00 0.00 2019-nCoV 22203 22229 (8244) SARS-CoV 22095 22121 (7630) + 10 0.00 0.00 0.00 2019-nCoV 22272 22282 (8191) SARS-CoV 22164 22174 (7577) + 11 16.67 0.00 0.00 2019-nCoV 22362 22385 (8088) SARS-CoV 22236 22259 (7492) + 19 15.38 0.00 0.00 2019-nCoV 22413 22451 (8022) SARS-CoV 22287 22325 (7426) +2776 17.22 0.81 1.26 2019-nCoV 22555 29875 (598) SARS-CoV 22429 29716 (35) + 10 0.00 0.00 0.00 2019-nCoV 27233 27242 (3231) SARS-CoV 27133 27142 (2609) + 10 7.69 0.00 0.00 2019-nCoV 27884 27896 (2577) SARS-CoV 27779 27791 (1960) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 2 2 0.111 0 0 0.000 0 0 0.000 0 0 0.000 + 660 1 3 0.167 0 0 0.000 0 0 0.000 0 0 0.000 + 57 1 4 0.222 0 0 0.000 0 0 0.000 0 0 0.000 + 30 1 5 0.278 0 0 0.000 0 0 0.000 0 0 0.000 + 23 2 7 0.389 0 0 0.000 0 0 0.000 0 0 0.000 + 19 1 8 0.444 0 0 0.000 0 0 0.000 0 0 0.000 + 12 2 10 0.556 0 0 0.000 0 0 0.000 0 0 0.000 + 11 2 12 0.667 0 0 0.000 0 0 0.000 0 0 0.000 + 10 6 18 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 16 16 0.889 0 0 0.000 0 0 0.000 0 0 0.000 + 28 1 17 0.944 0 0 0.000 0 0 0.000 0 0 0.000 + 3 1 18 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) +cross_match MERS-CoV.fa SARS-CoV.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 200126:080603 +Query file(s): MERS-CoV.fa +Subject file(s): SARS-CoV.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: MERS-CoV.fa 1 entries +Residue counts: + A 7862 + C 6074 + G 6280 + T 9817 +Total 30033 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 60069 seqs, 10 ids, 2 descrips + +NO QUALITY FILE MERS-CoV.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 92 +Maximal single base matches (low complexity regions): + + 12 21.05 0.00 0.00 MERS-CoV 21 58 (29975) SARS-CoV 37 74 (29677) + 13 0.00 0.00 0.00 MERS-CoV 217 229 (29804) SARS-CoV 195 207 (29544) + 23 17.02 0.00 0.00 MERS-CoV 3683 3729 (26304) SARS-CoV 3373 3419 (26332) + 22 6.25 3.12 0.00 MERS-CoV 3951 3982 (26051) SARS-CoV 3640 3672 (26079) + 10 0.00 0.00 0.00 MERS-CoV 4466 4475 (25558) SARS-CoV 4261 4270 (25481) + 10 0.00 0.00 0.00 MERS-CoV 4565 4574 (25459) SARS-CoV 4601 4610 (25141) + 10 0.00 0.00 0.00 MERS-CoV 6258 6267 (23766) SARS-CoV 6005 6014 (23737) + 10 7.69 0.00 0.00 MERS-CoV 7138 7150 (22883) SARS-CoV 7179 7191 (22560) + 15 11.54 0.00 0.00 MERS-CoV 7505 7530 (22503) SARS-CoV 7540 7565 (22186) + 11 0.00 0.00 0.00 MERS-CoV 7998 8009 (22024) SARS-CoV 8012 8023 (21728) + 10 7.14 0.00 0.00 MERS-CoV 8249 8262 (21771) SARS-CoV 8269 8282 (21469) + 10 7.14 0.00 0.00 MERS-CoV 8886 8899 (21134) SARS-CoV 8879 8892 (20859) + 14 20.51 0.00 0.00 MERS-CoV 8938 8976 (21057) SARS-CoV 8931 8969 (20782) + 10 0.00 0.00 0.00 MERS-CoV 9448 9460 (20573) SARS-CoV 9423 9435 (20316) + 11 10.00 0.00 0.00 MERS-CoV 9542 9561 (20472) SARS-CoV 9532 9551 (20200) + 12 10.53 0.00 0.00 MERS-CoV 9838 9856 (20177) SARS-CoV 9828 9846 (19905) + 11 0.00 0.00 0.00 MERS-CoV 9881 9891 (20142) SARS-CoV 9871 9881 (19870) + 13 9.52 0.00 0.00 MERS-CoV 9943 9963 (20070) SARS-CoV 9933 9953 (19798) + 12 26.32 0.00 0.00 MERS-CoV 10070 10126 (19907) SARS-CoV 10060 10116 (19635) + 13 13.33 0.00 3.33 MERS-CoV 10223 10252 (19781) SARS-CoV 10205 10233 (19518) + 14 5.88 0.00 0.00 MERS-CoV 10347 10363 (19670) SARS-CoV 10328 10344 (19407) + 12 18.60 0.00 0.00 MERS-CoV 10409 10451 (19582) SARS-CoV 10390 10432 (19319) + 15 14.29 0.00 0.00 MERS-CoV 10477 10504 (19529) SARS-CoV 10458 10485 (19266) + 20 15.79 0.00 0.00 MERS-CoV 10835 10872 (19161) SARS-CoV 10825 10862 (18889) + 14 18.75 0.00 0.00 MERS-CoV 11636 11667 (18366) SARS-CoV 11620 11651 (18100) + 43 20.35 0.00 0.00 MERS-CoV 11732 11844 (18189) SARS-CoV 11716 11828 (17923) + 14 17.24 0.00 0.00 MERS-CoV 11912 11940 (18093) SARS-CoV 11896 11924 (17827) + 29 25.00 0.00 0.00 MERS-CoV 12224 12351 (17682) SARS-CoV 12205 12332 (17419) + 10 7.69 0.00 0.00 MERS-CoV 12780 12792 (17241) SARS-CoV 12764 12776 (16975) + 11 0.00 0.00 0.00 MERS-CoV 13109 13119 (16914) SARS-CoV 13099 13109 (16642) + 11 0.00 0.00 0.00 MERS-CoV 13111 13121 (16912) SARS-CoV 12795 12805 (16946) + 13 21.05 0.00 0.00 MERS-CoV 13157 13194 (16839) SARS-CoV 13147 13184 (16567) + 12 14.29 0.00 0.00 MERS-CoV 13244 13264 (16769) SARS-CoV 13234 13254 (16497) + 10 12.50 0.00 0.00 MERS-CoV 13279 13294 (16739) SARS-CoV 13269 13284 (16467) + 10 7.69 0.00 0.00 MERS-CoV 13422 13434 (16599) SARS-CoV 13180 13192 (16559) + 18 16.67 0.00 0.00 MERS-CoV 13465 13500 (16533) SARS-CoV 13452 13487 (16264) + 10 11.76 0.00 0.00 MERS-CoV 13513 13529 (16504) SARS-CoV 13494 13510 (16241) + 26 8.33 0.00 0.00 MERS-CoV 13585 13620 (16413) SARS-CoV 13566 13601 (16150) + 16 14.29 0.00 0.00 MERS-CoV 13734 13761 (16272) SARS-CoV 13715 13742 (16009) + 10 0.00 0.00 0.00 MERS-CoV 13836 13846 (16187) SARS-CoV 13820 13830 (15921) + 15 17.50 2.50 0.00 MERS-CoV 13916 13955 (16078) SARS-CoV 13899 13939 (15812) + 27 16.67 0.00 0.00 MERS-CoV 14002 14055 (15978) SARS-CoV 13986 14039 (15712) + 14 14.63 2.44 2.44 MERS-CoV 14173 14213 (15820) SARS-CoV 14157 14197 (15554) + 17 25.81 0.00 0.00 MERS-CoV 14245 14337 (15696) SARS-CoV 14229 14321 (15430) + 15 22.45 0.00 0.00 MERS-CoV 14360 14408 (15625) SARS-CoV 14344 14392 (15359) + 16 12.00 0.00 0.00 MERS-CoV 14504 14528 (15505) SARS-CoV 14488 14512 (15239) + 10 7.69 0.00 0.00 MERS-CoV 14581 14593 (15440) SARS-CoV 14565 14577 (15174) + 128 26.83 0.00 0.00 MERS-CoV 14603 15325 (14708) SARS-CoV 14587 15309 (14442) + 29 11.36 0.00 0.00 MERS-CoV 15432 15475 (14558) SARS-CoV 15416 15459 (14292) + 61 27.25 0.00 0.00 MERS-CoV 15636 16002 (14031) SARS-CoV 15620 15986 (13765) + 11 23.08 0.00 0.00 MERS-CoV 16069 16107 (13926) SARS-CoV 16053 16091 (13660) + 43 26.05 0.00 0.00 MERS-CoV 16177 16391 (13642) SARS-CoV 16161 16375 (13376) + 19 17.78 0.00 0.00 MERS-CoV 16419 16463 (13570) SARS-CoV 16403 16447 (13304) + 17 28.80 0.00 0.00 MERS-CoV 16516 16640 (13393) SARS-CoV 16500 16624 (13127) + 36 20.79 0.00 0.00 MERS-CoV 16681 16781 (13252) SARS-CoV 16665 16765 (12986) + 10 11.11 0.00 0.00 MERS-CoV 16791 16808 (13225) SARS-CoV 16775 16792 (12959) + 17 13.79 0.00 0.00 MERS-CoV 16918 16946 (13087) SARS-CoV 16902 16930 (12821) + 39 28.55 0.88 0.88 MERS-CoV 17026 17820 (12213) SARS-CoV 17010 17804 (11947) + 14 5.56 0.00 0.00 MERS-CoV 17985 18002 (12031) SARS-CoV 17978 17995 (11756) + 12 13.04 0.00 0.00 MERS-CoV 18232 18254 (11779) SARS-CoV 18225 18247 (11504) + 16 17.14 0.00 0.00 MERS-CoV 18292 18326 (11707) SARS-CoV 18285 18319 (11432) + 20 4.35 0.00 0.00 MERS-CoV 18394 18416 (11617) SARS-CoV 18387 18409 (11342) + 14 5.56 0.00 0.00 MERS-CoV 18531 18548 (11485) SARS-CoV 18524 18541 (11210) + 29 19.74 0.00 0.00 MERS-CoV 18644 18719 (11314) SARS-CoV 18637 18712 (11039) + 16 19.51 0.00 0.00 MERS-CoV 18778 18818 (11215) SARS-CoV 18771 18811 (10940) + 11 9.09 0.00 4.55 MERS-CoV 18965 18986 (11047) SARS-CoV 18958 18978 (10773) + 12 19.35 0.00 0.00 MERS-CoV 19097 19127 (10906) SARS-CoV 19102 19132 (10619) + 50 23.90 0.63 1.57 MERS-CoV 19147 19464 (10569) SARS-CoV 19152 19466 (10285) + 14 25.29 0.00 0.00 MERS-CoV 19507 19593 (10440) SARS-CoV 19509 19595 (10156) + 10 26.32 0.00 0.00 MERS-CoV 19675 19731 (10302) SARS-CoV 19677 19733 (10018) + 13 20.59 0.00 0.00 MERS-CoV 20226 20259 (9774) SARS-CoV 20237 20270 (9481) + 11 0.00 0.00 0.00 MERS-CoV 20275 20285 (9748) SARS-CoV 20286 20296 (9455) + 11 21.05 0.00 0.00 MERS-CoV 20410 20447 (9586) SARS-CoV 20421 20458 (9293) + 13 10.53 0.00 0.00 MERS-CoV 20491 20509 (9524) SARS-CoV 20502 20520 (9231) + 29 5.71 0.00 0.00 MERS-CoV 20533 20567 (9466) SARS-CoV 20544 20578 (9173) + 22 28.11 1.02 1.02 MERS-CoV 20701 21191 (8842) SARS-CoV 20712 21202 (8549) + 11 19.23 0.00 0.00 MERS-CoV 21247 21272 (8761) SARS-CoV 21258 21283 (8468) + 15 13.79 0.00 0.00 MERS-CoV 21445 21473 (8560) SARS-CoV 21456 21484 (8267) + 10 0.00 0.00 0.00 MERS-CoV 22110 22119 (7914) SARS-CoV 21736 21745 (8006) + 10 10.53 0.00 0.00 MERS-CoV 24097 24115 (5918) SARS-CoV 23888 23906 (5845) + 10 7.14 0.00 0.00 MERS-CoV 24194 24207 (5826) SARS-CoV 23979 23992 (5759) + 16 22.64 0.00 0.00 MERS-CoV 24362 24414 (5619) SARS-CoV 24147 24199 (5552) + 10 0.00 0.00 0.00 MERS-CoV 24449 24458 (5575) SARS-CoV 24132 24141 (5610) + 10 21.21 0.00 0.00 MERS-CoV 25121 25153 (4880) SARS-CoV 24879 24911 (4840) + 22 16.67 0.00 0.00 MERS-CoV 25310 25357 (4676) SARS-CoV 25068 25115 (4636) + 16 0.00 0.00 0.00 MERS-CoV 27772 27788 (2245) C SARS-CoV (8130) 21621 21605 + 11 18.52 0.00 0.00 MERS-CoV 28648 28674 (1359) SARS-CoV 28257 28283 (1468) + 23 24.10 0.00 0.00 MERS-CoV 28826 28908 (1125) SARS-CoV 28438 28520 (1231) + 15 9.52 0.00 0.00 MERS-CoV 29253 29273 (760) SARS-CoV 28859 28879 (872) + 12 6.67 0.00 0.00 MERS-CoV 29416 29430 (603) SARS-CoV 29022 29036 (715) + 10 0.00 0.00 0.00 MERS-CoV 29583 29592 (441) SARS-CoV 29358 29367 (384) + 11 15.00 0.00 0.00 MERS-CoV 29848 29867 (166) SARS-CoV 29458 29477 (274) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: + 128 1 1 0.011 0 0 0.000 0 0 0.000 0 0 0.000 + 61 1 2 0.022 0 0 0.000 0 0 0.000 0 0 0.000 + 50 1 3 0.033 0 0 0.000 0 0 0.000 0 0 0.000 + 43 2 5 0.054 0 0 0.000 0 0 0.000 0 0 0.000 + 39 1 6 0.065 0 0 0.000 0 0 0.000 0 0 0.000 + 36 1 7 0.076 0 0 0.000 0 0 0.000 0 0 0.000 + 29 4 11 0.120 0 0 0.000 0 0 0.000 0 0 0.000 + 27 1 12 0.130 0 0 0.000 0 0 0.000 0 0 0.000 + 26 1 13 0.141 0 0 0.000 0 0 0.000 0 0 0.000 + 23 2 15 0.163 0 0 0.000 0 0 0.000 0 0 0.000 + 22 3 18 0.196 0 0 0.000 0 0 0.000 0 0 0.000 + 20 2 20 0.217 0 0 0.000 0 0 0.000 0 0 0.000 + 19 1 21 0.228 0 0 0.000 0 0 0.000 0 0 0.000 + 18 1 22 0.239 0 0 0.000 0 0 0.000 0 0 0.000 + 17 3 25 0.272 0 0 0.000 0 0 0.000 0 0 0.000 + 16 6 31 0.337 0 0 0.000 0 0 0.000 0 0 0.000 + 15 6 37 0.402 0 0 0.000 0 0 0.000 0 0 0.000 + 14 8 45 0.489 0 0 0.000 0 0 0.000 0 0 0.000 + 13 6 51 0.554 0 0 0.000 0 0 0.000 0 0 0.000 + 12 8 59 0.641 0 0 0.000 0 0 0.000 0 0 0.000 + 11 12 71 0.772 0 0 0.000 0 0 0.000 0 0 0.000 + 10 21 92 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 90 90 0.978 0 0 0.000 0 0 0.000 0 0 0.000 + 216 1 91 0.989 0 0 0.000 0 0 0.000 0 0 0.000 + 20 1 92 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) diff --git a/v1.2.4/test-hive/all.rep.success.svg b/v1.2.4/test-hive/all.rep.success.svg new file mode 100644 index 0000000..e442a5f --- /dev/null +++ b/v1.2.4/test-hive/all.rep.success.svg @@ -0,0 +1,218 @@ + + + + +2019-nCoV + + + + + + + + + + + + + + +SARS-CoV + +MERS-CoV + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Sequence identity (%) +Forward | Reverse + +100 + + +90+ + + +80+ + + +70+ + + +60+ + + +50+ + + +0-49 + + \ No newline at end of file diff --git a/v1.2.4/test-hive/config.txt b/v1.2.4/test-hive/config.txt new file mode 100644 index 0000000..9969e13 --- /dev/null +++ b/v1.2.4/test-hive/config.txt @@ -0,0 +1,3 @@ +1:2019-nCoV +2:SARS-CoV +3:MERS-CoV diff --git a/v1.2.4/test-hive/readme b/v1.2.4/test-hive/readme new file mode 100644 index 0000000..8a334d0 --- /dev/null +++ b/v1.2.4/test-hive/readme @@ -0,0 +1,17 @@ +# xmatchview-hive +# Test run instructions +# RLW2020 +# +# 1. Run minimap2 or cross_match between each pair of sequences. +# +# 2. Make a 3-way SVG hive synteny plot from +# minimap2 (.paf output) +../xmatchview-hive.py -q 2019-nCoV.txt -r SARS-CoV.txt -s MERS-CoV.txt -x 2019-nCoV.fa_vs_SARS-CoV.fa.paf -y 2019-nCoV.fa_vs_MERS-CoV.fa.paf -z MERS-CoV.fa_vs_SARS-CoV.fa.paf -i 0 -b 1 -c 50 -a 0.75 +# +# cross_match (.rep output) +../xmatchview-hive.py -q 2019-nCoV.txt -r SARS-CoV.txt -s MERS-CoV.txt -x 2019-nCoV.fa_vs_SARS-CoV.fa.rep -y 2019-nCoV.fa_vs_MERS-CoV.fa.rep -z MERS-CoV.fa_vs_SARS-CoV.fa.rep -e SARScds.gff -i 0 -b 1 -c 30 -a 0.75 +# +# 3. Compare your output to +# +# all.paf.success.svg +# all.rep.success.svg diff --git a/v1.2.4/test/FTL1_pa.fa b/v1.2.4/test/FTL1_pa.fa new file mode 100644 index 0000000..1b1f358 --- /dev/null +++ b/v1.2.4/test/FTL1_pa.fa @@ -0,0 +1,39 @@ +>JN039333.1_Picea_abies +ATGTCTCGCTTCGTGGAGCCATTAGTGGTTGGGAGAGTGATCGGAGATGTTCTGGATATGTTCGTGCCAA +GCGTCGATCTCGCTGTTACCTACGCCTCCAGGCAGGTCAACAATGGCTGCGAGCTCAAGCCCTCGGCAAT +TACATTGCTTCCTCGCGTCGACATCGGGGGTGAGGACCTTCGCAACTTTTACACTCTGGTAAGTAATTCT +TCAAATGCCGACAATAAATGCCCTAAATGTTCCCGATTTCTTTAAATGTTTATATAATTCTTCAAATGCC +GACAATAAATGCCCTAAATGTTCTCGATTTCTTTAAAATTTTATATAATCTGCAGAAAACGAATTCAAGA +TTTGAAAATTTTGAATAGGGTTTTAAAACTTTTTTTAAATATAATTGCATGTTCTATTTTGTTTTGTTGT +CAAATTTAACAAGCGTAGCTATTTAAATTTAAATATTGTAGAATTTAACCAGATAAGCTACAAAGCTCTT +TAAATTTAAATATTGTTTAAATTTAAATATTGTAAAATCTAACAAGATAAGTTACAATATAAGAAGAAAG +CTCTTTCAAAACCTATTACACAGTATTATTTAGAATTTTTTTCTAAAATACTTAAATTTAAATAAATGTT +TTGTATTATTTCTAAGGTTTTAAAACTTATTACATAGTATTTATTCAGAAAAATTAATTAATAATATTTA +AATTTAAATAAATGTTTCTGTTTTTTTGGTTTCAAAACCTATTATACAGTATTTATCCAGAAAAATTAAA +TAAGAATATTTAAATTTAAAGAAATGTTTCTGTTTTTAAGTTTCAAAACTTATTACACAGTATTTATCCA +GAAAAATTAACTAACAATACTTAAATTTAAATAAATATTTCTGTATTATTTCTTCTAAAGAATATATCAT +GCTCTTCATCTTTAAGTTTTTAAGATGAAAAATTCATATAAATGTTTCTGTTTTTAAGTTTCAAAACTTA +TTACACAGTATTTATCCAGAAAAATTAACTAACAATACTTAAATTTAAATAAATATTTCTGTATTATTTC +TTCTAAAGAATATATCATGCTCTTCATCTTTAAGTTTTTCTTCTTCTCATTAAGTTTTCCATATTTGCAA +AATAAACATAAATACAAAGTACATATAAATAGATGTTTCATTATTATTAAACTATGTTTCATTTACTACT +TTCATTATTAAACTATGATGAGAAGCTAAACTACTTAAATTTAAAGAAAGTGTTTCTGTATTATTTCTTT +TAAATAATATATCATTACATGCTCTTCATCTTTAAGTTTTTCTTCTTCTCATTAAGTTTTCCATATTTGC +AAAGTAAACATAGATAGATGTTCTTTTATTATATTTTTTCATTTCCTATTTCTATTATTAAGTGATATAG +GTGATGAGAAATTTATTTGAATTATCAGAATAAATGTTTTTATATAAAGATATTTTCTTAGTACTGTTAT +TTTATGTAAGTGTATGTAAGTGATTATCTAATTGATTTCAAATTGATAAATTTTCATTTTTTTTTAACAG +TAAGGTTTCTCTATTACCCTCCTTTATACTTAATTTTTGGAATGAAGAGTATTTTGAATTAAAAAAAAAA +AACATTATGGTATTTAAATAACAAATTCAACAAACTTCATTTTCTCACTTATAAAGGCTCGTAATTCCAA +ATATATCCAAAGTTTCATTTTCTTAACAGTTTCTTACAAGGGTTTCTTTATTACGTTCCTTTATACCTAA +TTTTTTTTAACAGTTTCTTACCATGGTTTCTCTATTACCTTCCTTTATACCTAATTTTTGGAATGAAGAA +TATTTTGAATTTTTTTAAAATAATTTTATGGTATTTAAATAACAAATTCAACCTTCTTCATTTTCTCACG +CTGGCTTGTAACTCTAAATCATATCCATATTCAGGTTATGACGGATCCAGATGCCCCAAGTCCCAGCGAT +CCAACACTGCGAGAATACCTACAATGGTTAGGATTGCCAACCAATTCTAAATCCTCGTAATATTAAACGC +AATTTCCATGGATCTTGTCGATAATGAATCTCTTTGTCTTGTATGCAGGATTGTAACTGATATTCCCGCT +ACAACTTCAGCCTCTTTCGGTAAGTCCAATCATGCATATGCATATATGCTAAATAATAATCATAATCCTT +GCTGGGATTTTTAGTTTAGGTAAAGGGCCTATGATAATAGTCAACGTTTGGGAATGAATGGATTCAGGCA +GAGAGCTGGTAAGTTACGAGTCACCGAGGCCTACCATCGGTATCCACAGGTTCATTTTTGTGTTGTTCAA +GCAGATGGGGCGGCAGACAGTTTACCCTCCAGGTTCGCGCCTCAATTTCAATACCAGAAATTTCGCACTC +TCCAACAGCCTTGGTCTGCCAGTGGCGGCCGTTTATTTCAATGCTCAGAAAGAGGCAGCAGGTAGAAGAC +GCTGAAGAGTATATGTTTTCGTTTATATGGCTGGCTATCTCACTATCTCGAACAGAAGCCATGAATGAAT +CAGACACTCATATTTATCTGCTGAACTGCCATATTGTACGCTCTATGTTCTGTTTTTTTCACAATTCCGT +AGAAACAAGCGTCTGAATCCG diff --git a/v1.2.4/test/FTL1_pa.fa_vs_FTL1_ss.fa.rep b/v1.2.4/test/FTL1_pa.fa_vs_FTL1_ss.fa.rep new file mode 100644 index 0000000..69718ab --- /dev/null +++ b/v1.2.4/test/FTL1_pa.fa_vs_FTL1_ss.fa.rep @@ -0,0 +1,169 @@ +/home/pubseq/BioSw/phrap/current/cross_match FTL1_pa.fa FTL1_ss.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 171211:081724 +Query file(s): FTL1_pa.fa +Subject file(s): FTL1_ss.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: FTL1_pa.fa 1 entries +Residue counts: + A 843 + C 415 + G 355 + T 998 +Total 2611 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 5225 seqs, 24 ids, 2 descrips + +NO QUALITY FILE FTL1_pa.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 46 +Maximal single base matches (low complexity regions): +JN039333.1_Picea_abies Aa Score: 12 Residues: 1601 - 1612 + + 609 3.59 0.72 0.84 JN039333.1_Picea_abies 97 931 (1680) KT263970.1_Picea_sitchensis 1 834 (1541) + 46 6.35 0.00 0.00 JN039333.1_Picea_abies 204 266 (2345) KT263970.1_Picea_sitchensis 169 231 (2144) + 52 3.08 0.00 0.00 JN039333.1_Picea_abies 265 329 (2282) KT263970.1_Picea_sitchensis 108 172 (2203) + 10 0.00 0.00 0.00 JN039333.1_Picea_abies 369 379 (2232) KT263970.1_Picea_sitchensis 546 556 (1819) + 13 5.88 0.00 0.00 JN039333.1_Picea_abies 417 433 (2178) KT263970.1_Picea_sitchensis 407 423 (1952) + 26 10.64 0.00 0.00 JN039333.1_Picea_abies 437 483 (2128) KT263970.1_Picea_sitchensis 388 434 (1941) + 10 8.70 0.00 0.00 JN039333.1_Picea_abies 440 462 (2149) C KT263970.1_Picea_sitchensis (1851) 524 502 + 16 4.55 0.00 0.00 JN039333.1_Picea_abies 484 505 (2106) KT263970.1_Picea_sitchensis 341 362 (2013) + 21 9.52 0.00 0.00 JN039333.1_Picea_abies 506 547 (2064) KT263970.1_Picea_sitchensis 346 387 (1988) + 10 0.00 0.00 0.00 JN039333.1_Picea_abies 557 566 (2045) KT263970.1_Picea_sitchensis 386 395 (1980) + 71 11.25 0.83 2.92 JN039333.1_Picea_abies 564 803 (1808) KT263970.1_Picea_sitchensis 548 782 (1593) + 24 13.92 1.27 3.16 JN039333.1_Picea_abies 564 721 (1890) KT263970.1_Picea_sitchensis 628 782 (1593) + 21 12.50 3.75 0.00 JN039333.1_Picea_abies 564 643 (1968) KT263970.1_Picea_sitchensis 709 791 (1584) + 10 8.33 0.00 0.00 JN039333.1_Picea_abies 601 624 (1987) C KT263970.1_Picea_sitchensis (2008) 367 344 + 17 4.88 4.88 0.00 JN039333.1_Picea_abies 603 643 (1968) KT263970.1_Picea_sitchensis 973 1015 (1360) + 11 0.00 0.00 0.00 JN039333.1_Picea_abies 645 656 (1955) C KT263970.1_Picea_sitchensis (1817) 558 547 + 95 8.86 2.11 1.27 JN039333.1_Picea_abies 648 884 (1727) KT263970.1_Picea_sitchensis 463 701 (1674) + 12 4.17 4.17 0.00 JN039333.1_Picea_abies 698 721 (1890) KT263970.1_Picea_sitchensis 982 1006 (1369) + 12 4.17 4.17 0.00 JN039333.1_Picea_abies 698 721 (1890) KT263970.1_Picea_sitchensis 1178 1202 (1173) + 201 9.24 5.35 1.46 JN039333.1_Picea_abies 730 1346 (1265) KT263970.1_Picea_sitchensis 463 1103 (1272) + 11 0.00 0.00 0.00 JN039333.1_Picea_abies 775 792 (1819) C KT263970.1_Picea_sitchensis (1967) 408 391 + 16 0.00 4.17 0.00 JN039333.1_Picea_abies 780 803 (1808) KT263970.1_Picea_sitchensis 982 1006 (1369) + 16 0.00 4.17 0.00 JN039333.1_Picea_abies 780 803 (1808) KT263970.1_Picea_sitchensis 1178 1202 (1173) + 31 14.66 4.74 1.72 JN039333.1_Picea_abies 811 1042 (1569) KT263970.1_Picea_sitchensis 463 701 (1674) + 11 5.00 0.00 0.00 JN039333.1_Picea_abies 854 873 (1738) C KT263970.1_Picea_sitchensis (2012) 363 344 + 28 5.41 8.11 0.00 JN039333.1_Picea_abies 858 931 (1680) KT263970.1_Picea_sitchensis 979 1058 (1317) + 24 6.94 8.33 0.00 JN039333.1_Picea_abies 860 931 (1680) KT263970.1_Picea_sitchensis 1177 1254 (1121) + 10 13.64 0.00 0.00 JN039333.1_Picea_abies 940 961 (1650) KT263970.1_Picea_sitchensis 761 782 (1593) + 38 10.67 0.00 0.00 JN039333.1_Picea_abies 968 1042 (1569) KT263970.1_Picea_sitchensis 547 621 (1754) + 38 7.23 0.00 2.41 JN039333.1_Picea_abies 969 1051 (1560) KT263970.1_Picea_sitchensis 463 543 (1832) + 11 5.00 0.00 0.00 JN039333.1_Picea_abies 1012 1031 (1580) C KT263970.1_Picea_sitchensis (2012) 363 344 +1034 2.75 0.70 2.26 JN039333.1_Picea_abies 1016 2434 (177) KT263970.1_Picea_sitchensis 979 2375 (0) + 48 6.84 5.13 0.85 JN039333.1_Picea_abies 1018 1134 (1477) KT263970.1_Picea_sitchensis 1177 1298 (1077) + 17 6.98 0.00 4.65 JN039333.1_Picea_abies 1216 1258 (1353) KT263970.1_Picea_sitchensis 503 543 (1832) + 73 5.26 0.00 0.88 JN039333.1_Picea_abies 1222 1335 (1276) KT263970.1_Picea_sitchensis 756 868 (1507) + 13 4.00 0.00 4.00 JN039333.1_Picea_abies 1225 1249 (1362) KT263970.1_Picea_sitchensis 678 701 (1674) + 20 5.77 5.77 0.00 JN039333.1_Picea_abies 1340 1391 (1220) KT263970.1_Picea_sitchensis 907 961 (1414) + 11 10.26 2.56 0.00 JN039333.1_Picea_abies 1354 1392 (1219) KT263970.1_Picea_sitchensis 1130 1169 (1206) + 10 0.00 0.00 0.00 JN039333.1_Picea_abies 1528 1541 (1070) KT263970.1_Picea_sitchensis 1692 1705 (670) + 63 11.30 0.00 0.00 JN039333.1_Picea_abies 1544 1658 (953) KT263970.1_Picea_sitchensis 1716 1830 (545) + 14 8.33 4.17 0.00 JN039333.1_Picea_abies 1666 1689 (922) KT263970.1_Picea_sitchensis 1834 1858 (517) + 19 11.76 0.00 0.00 JN039333.1_Picea_abies 1719 1752 (859) KT263970.1_Picea_sitchensis 1487 1520 (855) + 29 10.34 0.00 0.00 JN039333.1_Picea_abies 1752 1809 (802) KT263970.1_Picea_sitchensis 1639 1696 (679) + 10 0.00 0.00 0.00 JN039333.1_Picea_abies 1753 1764 (847) KT263970.1_Picea_sitchensis 1475 1486 (889) + 53 8.70 0.00 3.48 JN039333.1_Picea_abies 1775 1889 (722) KT263970.1_Picea_sitchensis 1489 1599 (776) + 10 12.00 0.00 4.00 JN039333.1_Picea_abies 1893 1917 (694) KT263970.1_Picea_sitchensis 1607 1630 (745) + +1 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 1 1 0.022 0 0 0.000 0 0 0.000 0 0 0.000 + 609 1 2 0.043 0 0 0.000 0 0 0.000 0 0 0.000 + 201 1 3 0.065 0 0 0.000 0 0 0.000 0 0 0.000 + 95 1 4 0.087 0 0 0.000 0 0 0.000 0 0 0.000 + 73 1 5 0.109 0 0 0.000 0 0 0.000 0 0 0.000 + 71 1 6 0.130 0 0 0.000 0 0 0.000 0 0 0.000 + 63 1 7 0.152 0 0 0.000 0 0 0.000 0 0 0.000 + 53 1 8 0.174 0 0 0.000 0 0 0.000 0 0 0.000 + 52 1 9 0.196 0 0 0.000 0 0 0.000 0 0 0.000 + 48 1 10 0.217 0 0 0.000 0 0 0.000 0 0 0.000 + 46 1 11 0.239 0 0 0.000 0 0 0.000 0 0 0.000 + 38 2 13 0.283 0 0 0.000 0 0 0.000 0 0 0.000 + 31 1 14 0.304 0 0 0.000 0 0 0.000 0 0 0.000 + 29 1 15 0.326 0 0 0.000 0 0 0.000 0 0 0.000 + 28 1 16 0.348 0 0 0.000 0 0 0.000 0 0 0.000 + 26 1 17 0.370 0 0 0.000 0 0 0.000 0 0 0.000 + 24 2 19 0.413 0 0 0.000 0 0 0.000 0 0 0.000 + 21 2 21 0.457 0 0 0.000 0 0 0.000 0 0 0.000 + 20 1 22 0.478 0 0 0.000 0 0 0.000 0 0 0.000 + 19 1 23 0.500 0 0 0.000 0 0 0.000 0 0 0.000 + 17 2 25 0.543 0 0 0.000 0 0 0.000 0 0 0.000 + 16 3 28 0.609 0 0 0.000 0 0 0.000 0 0 0.000 + 14 1 29 0.630 0 0 0.000 0 0 0.000 0 0 0.000 + 13 2 31 0.674 0 0 0.000 0 0 0.000 0 0 0.000 + 12 2 33 0.717 0 0 0.000 0 0 0.000 0 0 0.000 + 11 5 38 0.826 0 0 0.000 0 0 0.000 0 0 0.000 + 10 8 46 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 16 16 0.348 0 0 0.000 0 0 0.000 0 0 0.000 + 968 1 17 0.370 0 0 0.000 0 0 0.000 0 0 0.000 + 967 1 18 0.391 0 0 0.000 0 0 0.000 0 0 0.000 + 939 1 19 0.413 0 0 0.000 0 0 0.000 0 0 0.000 + 859 1 20 0.435 0 0 0.000 0 0 0.000 0 0 0.000 + 857 1 21 0.457 0 0 0.000 0 0 0.000 0 0 0.000 + 853 1 22 0.478 0 0 0.000 0 0 0.000 0 0 0.000 + 810 1 23 0.500 0 0 0.000 0 0 0.000 0 0 0.000 + 779 2 25 0.543 0 0 0.000 0 0 0.000 0 0 0.000 + 774 1 26 0.565 0 0 0.000 0 0 0.000 0 0 0.000 + 729 1 27 0.587 0 0 0.000 0 0 0.000 0 0 0.000 + 697 2 29 0.630 0 0 0.000 0 0 0.000 0 0 0.000 + 647 1 30 0.652 0 0 0.000 0 0 0.000 0 0 0.000 + 644 1 31 0.674 0 0 0.000 0 0 0.000 0 0 0.000 + 602 1 32 0.696 0 0 0.000 0 0 0.000 0 0 0.000 + 600 1 33 0.717 0 0 0.000 0 0 0.000 0 0 0.000 + 563 3 36 0.783 0 0 0.000 0 0 0.000 0 0 0.000 + 556 1 37 0.804 0 0 0.000 0 0 0.000 0 0 0.000 + 505 1 38 0.826 0 0 0.000 0 0 0.000 0 0 0.000 + 483 1 39 0.848 0 0 0.000 0 0 0.000 0 0 0.000 + 439 1 40 0.870 0 0 0.000 0 0 0.000 0 0 0.000 + 436 1 41 0.891 0 0 0.000 0 0 0.000 0 0 0.000 + 416 1 42 0.913 0 0 0.000 0 0 0.000 0 0 0.000 + 368 1 43 0.935 0 0 0.000 0 0 0.000 0 0 0.000 + 264 1 44 0.957 0 0 0.000 0 0 0.000 0 0 0.000 + 203 1 45 0.978 0 0 0.000 0 0 0.000 0 0 0.000 + 96 1 46 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) diff --git a/v1.2.4/test/FTL1_pa.gff b/v1.2.4/test/FTL1_pa.gff new file mode 100644 index 0000000..d5ad7a1 --- /dev/null +++ b/v1.2.4/test/FTL1_pa.gff @@ -0,0 +1,4 @@ +JN039333.1_Picea_abies . . 1 198 . . . . blue +JN039333.1_Picea_abies . . 1925 1986 . . . . yellow +JN039333.1_Picea_abies . . 2079 2119 . . . . green +JN039333.1_Picea_abies . . 2238 2455 . . . . purple diff --git a/v1.2.4/test/FTL1_paMF.fa b/v1.2.4/test/FTL1_paMF.fa new file mode 100644 index 0000000..6a4a911 --- /dev/null +++ b/v1.2.4/test/FTL1_paMF.fa @@ -0,0 +1,4 @@ +>JN039333.1_Picea_abies +ATGTCTCGCTTCGTGGAGCCATnnnTAGTGGTTGGGAGAGTGATCGGAGATGTTCTGGATATGTTCGTGCCAAGCGTCGATCTCGCTGTTACCTACGCCTCCAGGCAGGTCAACAATGGCTGCGAGCTCAAGCCCTCGGCAATTACATTGCTTCCTCGCGTCGACATCGGGGGTGAGGACCTTCGCAACTTTTACACTCTGGTAAGTAATTCTTCAAATGCCGACAATAAATGCCCTAAATGTTCCCGATTTCTTTAAATGTTTATATAATTCTTCAAATGCCGACAATAAATGCCCTAAATGTTCTCGATTTCTTTAAAATTTTATATAATCTGCAGAAAACGAATTCAAGATTTGAAAATTTTGAATAGGGTTTTAAAACnnnnTTTTTTTAAATATAATTGCATGTTCTATTTTGTTTTGTTGTCAAATTTAACAAGCGTAGCTATTTAAATTTAAATATTGTAGAATTTAACCAGATAAGCTACAAAGCTCTTTAAATTTAAATATTGTTTAAATTTAAATATTGTAAAATCTAACAAGATAAGTTACAATATAAGAAGAAAGCTCTTTCAAAACCTATTACACAGTATTATTTAGAATTTTTTTCTAAAATACTTAAATTTAAATAAATGTTTTGTATTATTTCTAAGGTTTTAAAACTTATTACATAGTATTTATTCAGAAAAATTAATTAATAATATTTAAATTTAAATAAATGTTTCTGTTTTTTTGGTTTCAAAACCTATTATACAGTATTTATCCAGAAAAATTAAA +>JN039333.2_Picea_abies +TAAGAATATTTAAATTTAAAGAAATGTTTCTGTTTTTAAGTTTCAAAACTTATTACACAGTATTTATCCAGAAAAATTAACTAACAATACTTAAATTTAAATAAATATTTCTGTATTATTTCTTCTAAAGAATATATCATGCTCTTCATCTTTAAGTTTTTAAGATGAAAAATTCATATAAATGTTTCTGTTTTTAAGTTTCAAAACTTATTACACAGTATTTATCCAGAAAAATTAACTAACAATACTTAAATTTAAATAAATATTTCTGTATTATTTCTTCTAAAGAATATATCATGCTCTTCATCTTTAAGTTTTTCTTCTTCTCATTAAGTTTTCCATATTTGCAAAATAAACATAAATACAAAGTACATATAAATAGATGTnnnTTCATTATTATTAAACTATGTTTCATTTACTACTTTCATTATTAAACTATGATGAGAAGCTAAACTACTTAAATTTAAAGAAAGTGTTTCTGTATTATTTCTTTTAAATAATATATCATTACATGCTCTTCATCTTTAAGTTTTTCTTCTTCTCATTAAGTTTTCCATATTTGCAAAGTAAACATAGATAGATGTTCTTTTATTATATTTTTTCATTTCCTATTTCTATTATTAAGTGATATAGGTGATGAGAAATTTATTTGAATTATCAGAATAAATGTTTTTATATAAAGATATTTTCTTAGTACTGTTATTTTATGTAAGTGTATGTAAGTGATTATCTAATTGATTTCAAATTGATAAATTTTCATTTTTTTTTAACAGTAAGGTTTCTCTATTACCCTCCTTTATACTTAATTTTTGGAATGAAGAGTATTTTGAATTAAAAAAAAAAAACATTATGGTATTTAAATAACAAATTCAACAAACTTCATTTTCTCACTTATAAAGGCTCGTAATTCCAAATATATCCAAAGTTTCATTTTCTTAACAGTTTCTTACAAGGGTTTCTTTATTACGTTCCTTTATACCTAATTTTTTTTAACAGTTTCTTACCATGGTTTCTCTATTACCTTCCTTTATACCTAATTTTTGGAATGAAGAATATTTTGAATTTTTTTAAAATAATTTTATGGTATTTAAATnnnnnnAACAAATTCAACCTTCTTCATTTTCTCACGCTGGCTTGTAACTCTAAATCATATCCATATTCAGGTTATGACGGATCCAGATGCCCCAAGTCCCAGCGATCCAACACTGCGAGAATACCTACAATGGTTAGGATTGCCAACCAATTCTAAATCCTCGTAATATTAAACGCAATTTCCATGGATCTTGTCGATAATGAATCTCTTTGTCTTGTATGCAGGATTGTAACTGATATTCCCGCTACAACTTCAGCCTCTTTCGGTAAGTCCAATCATGCATATGCATATATGCTAAATAATAATCATAATCCTTGCTGGGATTTTTAGTTTAGGTAAAGGGCCTATGATAATAGTCAACGTTTGGGAATGAATGGATTCAGGCAGAGAGCTGGTAAGTTACGAGTCACCGAGGCCTACCATCGGTATCCACAGGTTCATTTTTGTGTTGTTCAAGCAGATGGGGCGGCAGACAGTTTACCCTCCAGGTTCGCGCCTCAATTTCAATACCAGAAATTTCGCACTCTCCAACAGCCTTGGTCTGCCAGTGGCGGCCGTTTATTTCAATGCTCAGAAAGAGGCAGCAGGTAGAAGACGCTGAAGAGTATATGTTTTCGTTTATATGGCTGGCTATCTCACTATCTCGAACAGAAGCCATGAATGAATCAGACACTCATATTTATCTGCTGAACTGCCATATTGTACGCTCTATGTTCTGTTTTTTTCACAATTCCGTAGAAACAAGCGTCTGAATCCG diff --git a/v1.2.4/test/FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf b/v1.2.4/test/FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf new file mode 100644 index 0000000..f6be8e4 --- /dev/null +++ b/v1.2.4/test/FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf @@ -0,0 +1,5 @@ +JN039333.1_Picea_abies 770 101 768 + KT263970.1_Picea_sitchensis 1125 5 750 473 751 60 tp:A:P cm:i:66 s1:i:457 s2:i:0 dv:f:0.0420 rl:i:0 +JN039333.2_Picea_abies 1841 462 1657 + KT263970.2_Picea_sitchensis 1263 65 1256 931 1207 60 tp:A:P cm:i:148 s1:i:927 s2:i:0 dv:f:0.0279 rl:i:0 +JN039333.2_Picea_abies 1841 91 572 + KT263970.1_Picea_sitchensis 1125 601 1104 279 509 60 tp:A:P cm:i:34 s1:i:272 s2:i:125 dv:f:0.0656 rl:i:0 +JN039333.2_Picea_abies 1841 2 350 + KT263970.1_Picea_sitchensis 1125 673 1094 139 421 0 tp:A:S cm:i:16 s1:i:125 dv:f:0.0935 rl:i:0 +JN039333.2_Picea_abies 1841 50 227 + KT263970.1_Picea_sitchensis 1125 473 659 51 187 0 tp:A:S cm:i:5 s1:i:49 dv:f:0.1278 rl:i:0 diff --git a/v1.2.4/test/FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep b/v1.2.4/test/FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep new file mode 100644 index 0000000..6f37224 --- /dev/null +++ b/v1.2.4/test/FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep @@ -0,0 +1,197 @@ +/home/pubseq/BioSw/phrap/current/cross_match FTL1_paMF.fa FTL1_ssMF.fa -minmatch 5 -minscore 10 -masklevel 101 +cross_match version 1.080721 + +Run date:time 191008:145022 +Query file(s): FTL1_paMF.fa +Subject file(s): FTL1_ssMF.fa +Presumed sequence type (from score matrix): DNA + +Pairwise comparison algorithm: banded Smith-Waterman + +Score matrix (set by value of penalty: -2) + A C G T N X +A 1 -2 -2 -2 0 -3 +C -2 1 -2 -2 0 -3 +G -2 -2 1 -2 0 -3 +T -2 -2 -2 1 0 -3 +N 0 0 0 0 0 0 +X -3 -3 -3 -3 0 -3 + +Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, +Using complexity-adjusted scores. Assumed background frequencies: + Aa: 0.250 Cc: 0.250 Gg: 0.250 Tt: 0.250 Nn: 0.000 : 0.000 + +minmatch: 5, maxmatch: 20, max_group_size: 0 (turned off), minscore: 10, near_minscore: 10, bandwidth: 14, indexwordsize: 5, indexwordsize2: 4 +word_raw: 0 +vector_bound: 0 +gap1_minscore: 17, gap1_dropoff: -12 +masklevel: 101 (minmargin irrelevant) +splice_edge_length: 0, allocation: 1 bytes +min_intron_length: 30, max_intron_length: 10000, max_overlap: 20, min_exon_length: 6 + +Sequence file: FTL1_paMF.fa 2 entries +Residue counts: + A 843 + C 415 + G 355 + T 998 + n 16 +Total 2627 residues + +4 distinct alphabetic chars have freq > 1% -- + +Allocated space: 5259 seqs, 47 ids, 3 descrips + +NO QUALITY FILE FTL1_paMF.fa.qual WAS FOUND. REMAINING INPUT QUALITIES SET TO 15. +Base_llr -5.55, intron_coeff: -0.00288828 +Num. pairs: 53 +Maximal single base matches (low complexity regions): +JN039333.2_Picea_abies Aa Score: 12 Residues: 834 - 845 + + 473 3.98 0.59 1.62 JN039333.1_Picea_abies 100 777 (0) KT263970.1_Picea_sitchensis 1 671 (454) + 46 1.92 0.00 0.00 JN039333.1_Picea_abies 207 258 (519) KT263970.1_Picea_sitchensis 169 220 (905) + 52 3.08 0.00 0.00 JN039333.1_Picea_abies 268 332 (445) KT263970.1_Picea_sitchensis 108 172 (953) + 13 5.88 0.00 0.00 JN039333.1_Picea_abies 424 440 (337) KT263970.1_Picea_sitchensis 408 424 (701) + 26 10.64 0.00 0.00 JN039333.1_Picea_abies 444 490 (287) KT263970.1_Picea_sitchensis 389 435 (690) + 10 8.70 0.00 0.00 JN039333.1_Picea_abies 447 469 (308) C KT263970.1_Picea_sitchensis (600) 525 503 + 16 4.55 0.00 0.00 JN039333.1_Picea_abies 491 512 (265) KT263970.1_Picea_sitchensis 342 363 (762) + 21 9.52 0.00 0.00 JN039333.1_Picea_abies 513 554 (223) KT263970.1_Picea_sitchensis 347 388 (737) + 10 0.00 0.00 0.00 JN039333.1_Picea_abies 564 573 (204) KT263970.1_Picea_sitchensis 387 396 (729) + 55 10.19 1.94 3.40 JN039333.1_Picea_abies 571 776 (1) KT263970.1_Picea_sitchensis 549 751 (374) + 24 13.92 1.27 3.16 JN039333.1_Picea_abies 571 728 (49) KT263970.1_Picea_sitchensis 631 785 (340) + 17 13.33 4.00 0.00 JN039333.1_Picea_abies 571 645 (132) KT263970.1_Picea_sitchensis 712 789 (336) + 10 8.33 0.00 0.00 JN039333.1_Picea_abies 608 631 (146) C KT263970.1_Picea_sitchensis (757) 368 345 + 17 4.88 4.88 0.00 JN039333.1_Picea_abies 610 650 (127) KT263970.1_Picea_sitchensis 978 1020 (105) + 33 9.17 5.00 1.67 JN039333.1_Picea_abies 655 774 (3) KT263970.1_Picea_sitchensis 464 587 (538) + 12 4.17 4.17 0.00 JN039333.1_Picea_abies 705 728 (49) KT263970.2_Picea_sitchensis 58 82 (1181) + 12 4.17 4.17 0.00 JN039333.1_Picea_abies 705 728 (49) KT263970.1_Picea_sitchensis 987 1011 (114) + 27 5.26 0.00 0.00 JN039333.1_Picea_abies 737 774 (3) KT263970.1_Picea_sitchensis 464 501 (624) + + 59 8.77 0.00 0.88 JN039333.2_Picea_abies 1 114 (1736) KT263970.1_Picea_sitchensis 592 704 (421) + 102 1.86 4.35 0.00 JN039333.2_Picea_abies 1 161 (1689) KT263970.1_Picea_sitchensis 672 839 (286) + 164 9.04 6.43 1.91 JN039333.2_Picea_abies 5 579 (1271) KT263970.1_Picea_sitchensis 508 1108 (17) + 14 10.34 0.00 0.00 JN039333.2_Picea_abies 5 33 (1817) KT263970.1_Picea_sitchensis 757 785 (340) + 11 0.00 0.00 0.00 JN039333.2_Picea_abies 5 22 (1828) C KT263970.1_Picea_sitchensis (716) 409 392 + 16 0.00 4.17 0.00 JN039333.2_Picea_abies 10 33 (1817) KT263970.2_Picea_sitchensis 58 82 (1181) + 16 0.00 4.17 0.00 JN039333.2_Picea_abies 10 33 (1817) KT263970.1_Picea_sitchensis 987 1011 (114) + 38 7.23 0.00 2.41 JN039333.2_Picea_abies 41 123 (1727) KT263970.1_Picea_sitchensis 464 544 (581) + 11 5.00 0.00 0.00 JN039333.2_Picea_abies 84 103 (1747) C KT263970.1_Picea_sitchensis (761) 364 345 + 28 5.41 8.11 0.00 JN039333.2_Picea_abies 88 161 (1689) KT263970.1_Picea_sitchensis 984 1063 (62) + 24 6.94 8.33 0.00 JN039333.2_Picea_abies 90 161 (1689) KT263970.2_Picea_sitchensis 57 134 (1129) + 50 9.71 0.00 0.97 JN039333.2_Picea_abies 170 272 (1578) KT263970.1_Picea_sitchensis 603 704 (421) + 10 13.64 0.00 0.00 JN039333.2_Picea_abies 170 191 (1659) KT263970.1_Picea_sitchensis 764 785 (340) + 30 10.67 2.67 0.00 JN039333.2_Picea_abies 198 272 (1578) KT263970.1_Picea_sitchensis 548 624 (501) + 38 7.23 0.00 2.41 JN039333.2_Picea_abies 199 281 (1569) KT263970.1_Picea_sitchensis 464 544 (581) + 11 5.00 0.00 0.00 JN039333.2_Picea_abies 242 261 (1589) C KT263970.1_Picea_sitchensis (761) 364 345 + 66 6.62 4.41 0.00 JN039333.2_Picea_abies 246 381 (1469) KT263970.1_Picea_sitchensis 984 1125 (0) + 46 6.19 6.19 0.88 JN039333.2_Picea_abies 248 360 (1490) KT263970.2_Picea_sitchensis 57 175 (1088) + 12 10.34 0.00 0.00 JN039333.2_Picea_abies 410 438 (1412) KT263970.2_Picea_sitchensis 24 52 (1211) + 17 6.98 0.00 4.65 JN039333.2_Picea_abies 449 491 (1359) KT263970.1_Picea_sitchensis 504 544 (581) + 66 5.26 1.75 0.88 JN039333.2_Picea_abies 455 568 (1282) KT263970.1_Picea_sitchensis 759 873 (252) + 954 1.64 0.99 1.81 JN039333.2_Picea_abies 457 1673 (177) KT263970.2_Picea_sitchensis 57 1263 (0) + 13 4.00 0.00 4.00 JN039333.2_Picea_abies 458 482 (1368) KT263970.1_Picea_sitchensis 681 704 (421) + 20 5.77 5.77 0.00 JN039333.2_Picea_abies 573 624 (1226) KT263970.1_Picea_sitchensis 912 966 (159) + 11 10.26 2.56 0.00 JN039333.2_Picea_abies 587 625 (1225) KT263970.2_Picea_sitchensis 10 49 (1214) + 10 0.00 0.00 0.00 JN039333.2_Picea_abies 761 774 (1076) KT263970.2_Picea_sitchensis 575 588 (675) + 59 11.30 0.87 0.00 JN039333.2_Picea_abies 777 891 (959) KT263970.2_Picea_sitchensis 599 714 (549) + 12 14.71 0.00 0.00 JN039333.2_Picea_abies 778 811 (1039) KT263970.2_Picea_sitchensis 546 579 (684) + 14 8.33 4.17 0.00 JN039333.2_Picea_abies 899 922 (928) KT263970.2_Picea_sitchensis 718 742 (521) + 34 8.62 0.00 0.00 JN039333.2_Picea_abies 931 988 (862) KT263970.2_Picea_sitchensis 576 633 (630) + 19 11.76 0.00 0.00 JN039333.2_Picea_abies 952 985 (865) KT263970.2_Picea_sitchensis 370 403 (860) + 10 0.00 0.00 0.00 JN039333.2_Picea_abies 984 997 (853) KT263970.2_Picea_sitchensis 356 369 (894) + 29 10.34 0.00 0.00 JN039333.2_Picea_abies 985 1042 (808) KT263970.2_Picea_sitchensis 522 579 (684) + 34 8.26 0.00 8.26 JN039333.2_Picea_abies 1008 1128 (722) KT263970.2_Picea_sitchensis 372 482 (781) + 10 12.00 0.00 4.00 JN039333.2_Picea_abies 1132 1156 (694) KT263970.2_Picea_sitchensis 490 513 (750) + +2 matching entries (first file). + +Discrepancy summary: +Qual algn cum rcum (%) unalgn X N sub del ins total (%) cum rcum (%) + + +Score histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: + 954 1 1 0.019 0 0 0.000 0 0 0.000 0 0 0.000 + 473 1 2 0.038 0 0 0.000 0 0 0.000 0 0 0.000 + 164 1 3 0.057 0 0 0.000 0 0 0.000 0 0 0.000 + 102 1 4 0.075 0 0 0.000 0 0 0.000 0 0 0.000 + 66 2 6 0.113 0 0 0.000 0 0 0.000 0 0 0.000 + 59 2 8 0.151 0 0 0.000 0 0 0.000 0 0 0.000 + 55 1 9 0.170 0 0 0.000 0 0 0.000 0 0 0.000 + 52 1 10 0.189 0 0 0.000 0 0 0.000 0 0 0.000 + 50 1 11 0.208 0 0 0.000 0 0 0.000 0 0 0.000 + 46 2 13 0.245 0 0 0.000 0 0 0.000 0 0 0.000 + 38 2 15 0.283 0 0 0.000 0 0 0.000 0 0 0.000 + 34 2 17 0.321 0 0 0.000 0 0 0.000 0 0 0.000 + 33 1 18 0.340 0 0 0.000 0 0 0.000 0 0 0.000 + 30 1 19 0.358 0 0 0.000 0 0 0.000 0 0 0.000 + 29 1 20 0.377 0 0 0.000 0 0 0.000 0 0 0.000 + 28 1 21 0.396 0 0 0.000 0 0 0.000 0 0 0.000 + 27 1 22 0.415 0 0 0.000 0 0 0.000 0 0 0.000 + 26 1 23 0.434 0 0 0.000 0 0 0.000 0 0 0.000 + 24 2 25 0.472 0 0 0.000 0 0 0.000 0 0 0.000 + 21 1 26 0.491 0 0 0.000 0 0 0.000 0 0 0.000 + 20 1 27 0.509 0 0 0.000 0 0 0.000 0 0 0.000 + 19 1 28 0.528 0 0 0.000 0 0 0.000 0 0 0.000 + 17 3 31 0.585 0 0 0.000 0 0 0.000 0 0 0.000 + 16 3 34 0.642 0 0 0.000 0 0 0.000 0 0 0.000 + 14 2 36 0.679 0 0 0.000 0 0 0.000 0 0 0.000 + 13 2 38 0.717 0 0 0.000 0 0 0.000 0 0 0.000 + 12 4 42 0.792 0 0 0.000 0 0 0.000 0 0 0.000 + 11 4 46 0.868 0 0 0.000 0 0 0.000 0 0 0.000 + 10 7 53 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + +Start histogram: displayed unspliced matches/cum/frac, other unspliced, displayed spliced, other spliced: +> 1000 2 2 0.038 0 0 0.000 0 0 0.000 0 0 0.000 + 984 1 3 0.057 0 0 0.000 0 0 0.000 0 0 0.000 + 983 1 4 0.075 0 0 0.000 0 0 0.000 0 0 0.000 + 951 1 5 0.094 0 0 0.000 0 0 0.000 0 0 0.000 + 930 1 6 0.113 0 0 0.000 0 0 0.000 0 0 0.000 + 898 1 7 0.132 0 0 0.000 0 0 0.000 0 0 0.000 + 777 1 8 0.151 0 0 0.000 0 0 0.000 0 0 0.000 + 776 1 9 0.170 0 0 0.000 0 0 0.000 0 0 0.000 + 760 1 10 0.189 0 0 0.000 0 0 0.000 0 0 0.000 + 736 1 11 0.208 0 0 0.000 0 0 0.000 0 0 0.000 + 704 2 13 0.245 0 0 0.000 0 0 0.000 0 0 0.000 + 654 1 14 0.264 0 0 0.000 0 0 0.000 0 0 0.000 + 609 1 15 0.283 0 0 0.000 0 0 0.000 0 0 0.000 + 607 1 16 0.302 0 0 0.000 0 0 0.000 0 0 0.000 + 586 1 17 0.321 0 0 0.000 0 0 0.000 0 0 0.000 + 572 1 18 0.340 0 0 0.000 0 0 0.000 0 0 0.000 + 570 3 21 0.396 0 0 0.000 0 0 0.000 0 0 0.000 + 563 1 22 0.415 0 0 0.000 0 0 0.000 0 0 0.000 + 512 1 23 0.434 0 0 0.000 0 0 0.000 0 0 0.000 + 490 1 24 0.453 0 0 0.000 0 0 0.000 0 0 0.000 + 457 1 25 0.472 0 0 0.000 0 0 0.000 0 0 0.000 + 456 1 26 0.491 0 0 0.000 0 0 0.000 0 0 0.000 + 454 1 27 0.509 0 0 0.000 0 0 0.000 0 0 0.000 + 448 1 28 0.528 0 0 0.000 0 0 0.000 0 0 0.000 + 446 1 29 0.547 0 0 0.000 0 0 0.000 0 0 0.000 + 443 1 30 0.566 0 0 0.000 0 0 0.000 0 0 0.000 + 423 1 31 0.585 0 0 0.000 0 0 0.000 0 0 0.000 + 409 1 32 0.604 0 0 0.000 0 0 0.000 0 0 0.000 + 267 1 33 0.623 0 0 0.000 0 0 0.000 0 0 0.000 + 247 1 34 0.642 0 0 0.000 0 0 0.000 0 0 0.000 + 245 1 35 0.660 0 0 0.000 0 0 0.000 0 0 0.000 + 241 1 36 0.679 0 0 0.000 0 0 0.000 0 0 0.000 + 206 1 37 0.698 0 0 0.000 0 0 0.000 0 0 0.000 + 198 1 38 0.717 0 0 0.000 0 0 0.000 0 0 0.000 + 197 1 39 0.736 0 0 0.000 0 0 0.000 0 0 0.000 + 169 2 41 0.774 0 0 0.000 0 0 0.000 0 0 0.000 + 99 1 42 0.792 0 0 0.000 0 0 0.000 0 0 0.000 + 89 1 43 0.811 0 0 0.000 0 0 0.000 0 0 0.000 + 87 1 44 0.830 0 0 0.000 0 0 0.000 0 0 0.000 + 83 1 45 0.849 0 0 0.000 0 0 0.000 0 0 0.000 + 40 1 46 0.868 0 0 0.000 0 0 0.000 0 0 0.000 + 9 2 48 0.906 0 0 0.000 0 0 0.000 0 0 0.000 + 4 3 51 0.962 0 0 0.000 0 0 0.000 0 0 0.000 + 0 2 53 1.000 0 0 0.000 0 0 0.000 0 0 0.000 + + +Splice histogram : displayed 5' splice, other 5' splice: displayed 3' splice, other 3' splice: + + +Times in secs (cum) +read queries 0 ( 0) +sort queries 0 ( 0) +find matches 0 ( 0) + end 0 ( 0) diff --git a/v1.2.4/test/FTL1_ss.fa b/v1.2.4/test/FTL1_ss.fa new file mode 100644 index 0000000..77e3ca4 --- /dev/null +++ b/v1.2.4/test/FTL1_ss.fa @@ -0,0 +1,35 @@ +>KT263970.1_Picea_sitchensis +TCCAGGCAGGTCAACAATGGCTGCGAGCTCAAGCCCTCGGCACTTGCATTGCTTCCTCGCGTCGACATCG +GGGGTGAGGACCTTCGCAACTTTTACACTCTGGTAAGTAATTCTTCAAATGCCGACAATAAATGCCCTAA +ATGTTCTCGATTTCTTTAAATGTTTATATAATTCTTCAAATGCCGACAATAAATGCCCTAAATGTTCTCG +ATTTCTTTAAAATTTAATATATTCTGCAGAAAACGAATTCAAGATTTGAAAATTTTGAATAGGGTTTTAA +AACTTTTTTTAAATATAATTGCATGTTCTATTTTGTTTTGTTGTCAAATTTAACAAGCTTAGCTATTTAA +ATTTAAATATTGTAGAATTTAACCAGATAAGCTACAAAGCTCTTTAAATTTAAATATTGTCAAATCTAAC +AAGATAAGCTACAAGATAAGTTACAATATGAGAAGAAAGCTCTTTCAAAACCTATTACACAGTATTTATT +CAGAAAAATTGTCTAAAATACTTAAATTTAAATAAATGTTTTGCATTATTTCTAAGGTTTTAAAACCTAT +TATACAGTATTTATTCAGAAAAATTAACTAATAATATTTAAATTTAAATAAATGTTTTTGTTTTTGGTTT +CAAAACTTATTACACAGTATTTATCCAGAAAAAATAAATAAGAATATTTAAATTTAAAAAAATGTTTCTG +TTTTTAAGTTTCAAAACTTATTACACAGTATTTATCCAGAAAAATTAACTACCAATACTTAAATTTAAAT +AAATATTTCTGTATTATTTCTTCTGAAGAATATATCATTATATGCTCTTCATCTTTAAGTTTTTCTTCTT +CTCATTAAGTTTTCCATATTTGCAAAGTTCTTCTTATTTGCAAAGTAAACATAAATACAAAGTAATATAA +ATAGATGTTCTTTTTTATTTATATTTTTTCATTTACTATTTTTATTATTAAACTATGAGAAGCTAAACTA +CTTAAATTTAAAGAAAATGTTTCTGTATTATTTCTTTTAAATAATATATCATTACATGCTCTTCATCTTT +AAGTTTTTCTTCTTCTCATTAAATTTTCCATATTTGCAAAGTAAACATAAATACAAAATAAACATAAATA +AATGTTCTTTTTTATTTATATTTTTTCATTTACTATTTTCATTATAAAGCTACTAGCTTAAATTTAAAGA +AAGTGTTTCTGTATTATTTCTTTTAAATAATATATCATTACATTCTCTTCATCTTTAAGTTTTTTTTTTC +TCATTAAGTTTTCCATATTTGCAAAGTAAACATAGATAAATGTTCTTTTATCATATTTTTTCATTTCATA +TTTCTATTATTAAGTGATATAGGTGATGAGAAATTTATTTGAATTATCAGAATAAATGTTTTTATATATA +TAAAGATATTTTCTTAGTATTGTTATTTTATGTAAGAGATTATCTAATTGATTTCAAATTGATAAATTTT +CTTTTTTTTTAACAGTAGGGTTTCTCTATTACCCTCCTTTATACTTAATTATTGGAATGAAGAGTATTTT +GAATTTTAAAAAAAACTATGGTATTTAAATAACAAATTCAACAAACTTCATTTTCTCACTTATAAAGGCT +CGTAATTCCAAATATATCCAAAGTTTCATTTTCTTAACAGTTTCTTACCAGAGTTTTTCTAATACGTTCC +TTTATACCTAATTTTTTTTAACAGTTTCTTACCAGGGTTTCTCTATTACCCTCCTTTATACCTAATTTTT +GGAATGAAGAATATTTTGAATTTTTTTAAAATAATTTTATGGTATTTAAATAACAAATTCAACCTTCTTC +ATTTTCTCACGCTGGCTTGTAACTCCAAATCATATCCATATTCAGGTTATGACGGATCCAGATGCCCCAA +GTCCCAGCGATCCAACACTGCGAGAATACCTACAATGGTTAGGATTGCCAACCAATTCTAAATCCTCGTA +ATATTAAACGCAATTTCCATGGATCTTGTCGATAATGAATCTCTTTGTCTTGTATGCAGGATTGTAACTG +ATATTCCTGCTACAACTTCAGCCTCTTTCGGTAAGTCCAATCATGCATATGCATATATGCTAAATAATAA +TCATAATCCTTGCTGGGATTTTTAGTTTAGGTAAAGGGCCTATGATAATAGTCAACGTTTGGGAATGAAT +GGATTCAGGCAGAGAGCTGGTAAGTTACGAGTCACCGAGGCCTACCATCGGTATCCACAGGTTCATTTTT +GTGTTGTTCAAGCAGATGGGGCGGCAGACAGTTTACCCTCCAGGTTCACGCCTCAATTTCAATACCAGAA +ATTTCGCACTCTCCAACAGCCTTGGTCTGCCAGTGGCGGCCGTTTATTTCAATGCTCAGAAAGAG diff --git a/v1.2.4/test/FTL1_ss.gff b/v1.2.4/test/FTL1_ss.gff new file mode 100644 index 0000000..4bea49c --- /dev/null +++ b/v1.2.4/test/FTL1_ss.gff @@ -0,0 +1,4 @@ +KT263970.1_Picea_sitchensis . . 1 102 . . . . blue +KT263970.1_Picea_sitchensis . . 1866 1927 . . . . yellow +KT263970.1_Picea_sitchensis . . 2020 2060 . . . . green +KT263970.1_Picea_sitchensis . . 2179 2375 . . . . purple diff --git a/v1.2.4/test/FTL1_ssMF.fa b/v1.2.4/test/FTL1_ssMF.fa new file mode 100644 index 0000000..723164a --- /dev/null +++ b/v1.2.4/test/FTL1_ssMF.fa @@ -0,0 +1,4 @@ +>KT263970.1_Picea_sitchensis +TCCAGGCAGGTCAACAATGGCTGCGAGCTCAAGCCCTCGGCACTTGCATTGCTTCCTCGCGTCGACATCGGGGGTGAGGACCTTCGCAACTTTTACACTCTGGTAAGTAATTCTTCAAATGCCGACAATAAATGCCCTAAATGTTCTCGATTTCTTTAAATGTTTATATAATTCTTCAAATGCCGACAATAAATGCCCTAAATGTTCTCGATTTCTTTAANAATTTAATATATTCTGCAGAAAACGAATTCAAGATTTGAAAATTTTGAATAGGGTTTTAAAACTTTTTTTAAATATAATTGCATGTTCTATTTTGTTTTGTTGTCAAATTTAACAAGCTTAGCTATTTAAATTTAAATATTGTAGAATTTAACCAGATAAGCTACAAAGCTCTTTAAATTTAAATATTGTCAAATCTAACAAGATAAGCTACAAGATAAGTTACAATATGAGAAGAAAGCTCTTTCAAAACCTATTACACAGTATTTATTCAGAAAAATTGTCTAAAATACTTAAATTTAAATAAATGTTTTGCATTATTTCTAAGGTTTTAAAACCTATTATACAGTATTTATTZCAGAAAAANTTAACTAATAATATTTAAATTTAAATAAATGTTTTTGTTTTTGGTTTCAAAACTTATTACACAGTATTTATCCAGAAAAAATAAATAAGAATATTTAAATTTAAAAAAATGTTTCTGTTTTTAAGTTTCAAAACTTATTACACAGTATTTATCCAGAAAAATTAACTACCAATACTTAAATTTAAATAAATATTTCTGTATTAZZTTTCTTCTGAAGAATATATCATTATATGCTCTTCATCTTTAAGTTTTTCTTCTTCTCATTAAGTTTTCCATATTTGCAAAGTTCTTCTTATTTGCAAAGTAAACATAAATACAAAGTAATATAAATAGATGTTCTTTTTTATTTATATTTTTTCATTTACTATTTTTATTATTAAACTATGAGAAGCTAAACTACTTAAATTTAAAGAAAATGTTTCTGTATTATTTCTTTTAAATAATATATCATTACATGCTCTTCATCTTTAAGTTTTTCTTCTTCTCATTAAATTTTCCATATTTGCAAAGTAAACATAAATACAAAATAAACATAAATA +>KT263970.2_Picea_sitchensis +AATGTTCTTTTTTATTTATATTTTTTCATTTACTATTTTCATTATAAAGCTACTAGCTTAAATTTAAAGAAAGTGTTTCTGTATTATTTCTTTTAAATAATATATCATTACATTCTCTTCATCTTTAAGTTTTTTTTTTCTCATTAAGTTTTCCATATTTGCAAANGTAAACATAGNATAANATGTTCTTTTATCATATTTTTTCATTTCATATTTCTATTATTAAGTGATATAGGTGATGAGAAATTTATTTGAATTATCAGAATAAATGTTTTTATATATATAAAGATATTTTCTTAGTATTGTTATTTTATGTAAGAGATTATCTAATTGATTTCAAATTGATAAATTTTCTTTTTTTTTAACAGTAGGGTTTCTCTATTACCCTCCTTTATACTTAATTATTGGAATGAAGAGTATTTTGAATTTTAAAAAAAACTATGGTATTTAAATAACAAATTCAACAAACTTCATTTTCTCACTTATAAAGGCTCGTAATTCCAAATATATCCAAAGTTTCATTTTCTTAACAGTTTCTTACCAGAGTTTTTCTAATACGTTCCTTTATACCTAATTTTTTTTAACAGTTTCTTACCAGGGTTTCTCTATTACCCTCCTTTATACCTAATTTTTGGAATGAAGAATATTTTGAATTTTTTTAAAATAATTTTATGGZTATTTAAATAACAAATTCAACCTTCTTCATTTTCTCACGCTGGCTTGTAACTCCAAATCATATCCATATTCAGGTTATGACGGATCCAGATGCCCCAAGTCCCAGCGATCCAACACTGCGAGAATACCTACAATGGTTAGGATTGCCAACCAATTCTAAATCCTCGTAATATTAAACGCAATTTCCATGGATCTTGTCGATAATGAATCTCZTTTGTCTTGTATGCAGGATTGTAACTGATATTCCTGCTACAACTTCAGCCTCTTTCGGTAAGTCCAATCATGCATATGCATATATGCTAAATAATAATCATAATCCTTGCTGGGATTTTTAGTTTAGGTAAAGGGCCTATGZZZATAATAGTCAACGTTTGGGAATGAATGGATTCAGGCAGAGAGCTGGTAAGTTACGAGTCACCGAGGCCTACCATCGGTATCCACAGGTTCATTTTTGTGTTGTTCAAGCAGATGGGGCGGCAGACAGTTTACCCTCCAGGTTCACGCCTCAATTTCAATACCAGAAATTTCGCACTCTCCAACAGCCTTGGTCTGCCAGTGGCGGCCGTTTATTTCAATGCTCAGAAAGAG diff --git a/v1.2.4/test/runXMV-conifer.sh b/v1.2.4/test/runXMV-conifer.sh new file mode 100755 index 0000000..b339f32 --- /dev/null +++ b/v1.2.4/test/runXMV-conifer.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Rene Warren 2017, 2019 +# COMMAND TO RUN: +# ./runXMV-conifer.sh FTL1_pa.fa_vs_FTL1_ss.fa.rep FTL1_pa.fa FTL1_ss.fa 200 10 2 FTL1_pa.gff FTL1_ss.gff +# ./runXMV-conifer.sh FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep FTL1_paMF.fa FTL1_ssMF.fa 200 10 2 FTL1_pa.gff FTL1_ss.gff +# ./runXMV-conifer.sh FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf FTL1_paMF.fa FTL1_ssMF.fa 200 99 2 FTL1_pa.gff FTL1_ss.gff + +if [ $# -ne 8 ]; then + echo "Usage: $(basename $0) " + exit 1 +fi + +# source PATH-TO-SOURCE (IF NEEDED) +python3 ../xmatchview-conifer.py -x $1 -s $3 -q $2 -a $4 -m $5 -b 10 -l FTL1 -c $6 -f png -y $7 -e $8 -p ../../tarballs/fonts diff --git a/v1.2.4/test/runXMV.sh b/v1.2.4/test/runXMV.sh new file mode 100755 index 0000000..cc5de4c --- /dev/null +++ b/v1.2.4/test/runXMV.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Rene Warren 2017, 2019 +# COMMAND TO RUN: +# ./runXMV.sh FTL1_pa.fa_vs_FTL1_ss.fa.rep FTL1_pa.fa FTL1_ss.fa 200 10 2 FTL1_pa.gff FTL1_ss.gff + +if [ $# -ne 8 ]; then + echo "Usage: $(basename $0) " + exit 1 +fi + +# source PATH-TO-SOURCE (IF NEEDED) +python3 ../xmatchview.py -x $1 -s $3 -q $2 -a $4 -m $5 -b 10 -r 1 -c $6 -f png -y $7 -e $8 -p ../../tarballs/fonts + diff --git a/v1.2.4/test/xmv-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_r1_c2_success.png b/v1.2.4/test/xmv-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_r1_c2_success.png new file mode 100644 index 0000000..bf62cdb Binary files /dev/null and b/v1.2.4/test/xmv-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_r1_c2_success.png differ diff --git a/v1.2.4/test/xmv-FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep_m10_b10_r1_c2_success.png b/v1.2.4/test/xmv-FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep_m10_b10_r1_c2_success.png new file mode 100644 index 0000000..d36e126 Binary files /dev/null and b/v1.2.4/test/xmv-FTL1_paMF.fa_vs_FTL1_ssMF.fa.rep_m10_b10_r1_c2_success.png differ diff --git a/v1.2.4/test/xmvconifer-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_c2_success.png b/v1.2.4/test/xmvconifer-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_c2_success.png new file mode 100644 index 0000000..817622f Binary files /dev/null and b/v1.2.4/test/xmvconifer-FTL1_pa.fa_vs_FTL1_ss.fa.rep_m10_b10_c2_success.png differ diff --git a/v1.2.4/test/xmvconifer-FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf_m99_b10_c2_success.png b/v1.2.4/test/xmvconifer-FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf_m99_b10_c2_success.png new file mode 100644 index 0000000..24b5a07 Binary files /dev/null and b/v1.2.4/test/xmvconifer-FTL1_paMF.fa_vs_FTL1_ssMF.fa.paf_m99_b10_c2_success.png differ diff --git a/v1.2.4/v0.2/XMatchView-cmd.py b/v1.2.4/v0.2/XMatchView-cmd.py new file mode 100755 index 0000000..6001c39 --- /dev/null +++ b/v1.2.4/v0.2/XMatchView-cmd.py @@ -0,0 +1,631 @@ +#!/usr/local/python/2.3.3/bin/python2.3 +#Rene Warren 2005 + +import sys +import os +sys.path.insert(0,'/home/rwarren/python/Development/SeqDev/lib') +sys.path.insert(0,'/home/rwarren/python/standard/Imaging-1.1.4/PIL') + +import getopt +import re +import Image +import ImageDraw +import ImageFont +import ImageEnhance +import PSDraw +import commands + +#--------------------------------------------- +def checkFile(file): + + print "Checking input %s" % file + if not os.path.exists(file): + print "File %s" % file + " is not valid" + sys.exit(1) + else: + print "exists." + +#--------------------------------------------- +def readCrossMatch(crossmatch_file,mismatch,block_length,reference,scale): + + (nocdt,match,query_hit)=({},{},{}) + + xmatch_obj=open(crossmatch_file, 'r') + + for line in xmatch_obj: + ###reverse matches + rev_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+C\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + + + fwd_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+") + fm = fwd_regex.match(line) + + if rm != None: + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7)) + + #(percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(1)), rm.group(2), float(rm.group(3)), float(rm.group(4)), rm.group(5), float(rm.group(6)), float(rm.group(7))) + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(2)), rm.group(3), float(rm.group(4)), float(rm.group(5)), rm.group(6), float(rm.group(7)), float(rm.group(8))) + + + ####no autovivification in python + if not nocdt.has_key(primary_match): + nocdt[primary_match]={} + if not nocdt[primary_match].has_key(secondary_match): + nocdt[primary_match][secondary_match]={} + if not nocdt[primary_match][secondary_match].has_key(startFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch): + break + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if reference.has_key(primary_match): + startFirstMatch=startFirstMatch/scale + endFirstMatch=endFirstMatch/scale + startSecondMatch=startSecondMatch/scale + endSecondMatch=endSecondMatch/scale + + print "%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch) + + if not match.has_key(primary_match): + match[primary_match]={} + if not match[primary_match].has_key(secondary_match): + match[primary_match][secondary_match]={} + if not match[primary_match][secondary_match].has_key(startFirstMatch): + match[primary_match][secondary_match][startFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + if not query_hit.has_key(primary_match): + query_hit[secondary_match]=int(0) + + query_hit[secondary_match]=query_hit[secondary_match]+1 + + + ###forward matches + elif fm != None: + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7)) +# (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(1)), fm.group(2), float(fm.group(3)), float(fm.group(4)), fm.group(5), float(fm.group(6)), float(fm.group(7))) + + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(2)), fm.group(3), float(fm.group(4)), float(fm.group(5)), fm.group(6), float(fm.group(7)), float(fm.group(8))) + + + ####no autovivification in python + if not nocdt.has_key(primary_match): + nocdt[primary_match]={} + if not nocdt[primary_match].has_key(secondary_match): + nocdt[primary_match][secondary_match]={} + if not nocdt[primary_match][secondary_match].has_key(startFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + if reference.has_key(primary_match): + startFirstMatch=startFirstMatch/scale + endFirstMatch=endFirstMatch/scale + startSecondMatch=startSecondMatch/scale + endSecondMatch=endSecondMatch/scale + + print "%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch) + + if not match.has_key(primary_match): + match[primary_match]={} + if not match[primary_match].has_key(secondary_match): + match[primary_match][secondary_match]={} + if not match[primary_match][secondary_match].has_key(startFirstMatch): + match[primary_match][secondary_match][startFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + if not query_hit.has_key(primary_match): + query_hit[secondary_match]=int(0) + query_hit[secondary_match]=query_hit[secondary_match]+1 + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + return nocdt, match, query_hit + +#--------------------------------------------- +def generateCoords(nocdt, size, leap, protein): + + freq={} + + pos_range=range(0,size,leap) + + for pos in pos_range: + print "%i out of %i" % (pos,size) + for reference in nocdt: + for comparison in nocdt[reference]: + start1_dict=nocdt[reference][comparison].keys() + start1_dict.sort() + for start1 in start1_dict: + end1_dict=nocdt[reference][comparison][start1].keys() + end1_dict.sort() + for end1 in end1_dict: + start2_dict=nocdt[reference][comparison][start1][end1].keys() + start2_dict.sort() + + (ss,ee) = (start1,end1) + + if protein: + size_ref = end1 - start1 + buffer = ((size_ref - (size_ref/3)) / 2) + ss = start1 + buffer + ee = end1 - buffer + + if((pos >= ss and pos <= ee) or (pos >= ee and pos <= ss)): + #print "%i >= %i and %i<=%i OR %i>=%i and %i<=%i" % (pos,ss,pos,ee,pos,ee,pos,ss) + for start2 in start2_dict: + end2_dict=nocdt[reference][comparison][start1][end1][start2].keys() + end2_dict.sort() + for end2 in end2_dict: + current_mismatch=float(nocdt[reference][comparison][start1][end1][start2][end2]) + if not freq.has_key(pos): + freq[pos]={} + if not freq[pos].has_key(current_mismatch): + freq[pos][current_mismatch]=int(0) + freq[pos][current_mismatch]=freq[pos][current_mismatch]+1 + return freq + +#--------------------------------------------- +def readFasta(file, scale): + + (head_match, previous_contig,seq_length) = (None,None,0) + L1={} + + + file_obj = open(file, 'r') + + for line in file_obj: + head_match_regex = re.compile('>(\S+)') + head_match = head_match_regex.match(line) + if head_match != None: + if (head_match != previous_contig and previous_contig != None): + (seq_length, scale)=(int(seq_length), int(scale)) + L1[previous_contig] = float(seq_length/scale) + seq_length = 0 #resets the sequence length + previous_contig = head_match.group(1) + + seq_subset_regex = re.compile('(.*)', re.I) + seq_subset = seq_subset_regex.match(line) + if seq_subset != None: + seq_length += len(seq_subset.group(1)) + + (seq_length, scale)=(int(seq_length), int(scale)) + L1[previous_contig] = float(seq_length/scale) #for the last sequence + + file_obj.close() + + + print "scaled down %s =%f total=%i " % (previous_contig, L1[previous_contig], seq_length) + + return (L1, seq_length) + +#--------------------------------------------- +def initColor(): + color={} + + #allocate colors + color["white"] = (255,255,255) + color["black"] = (0,0,0) + color["swamp"] = (150,150,30) + color["blue"] = (0,102,204) + color["yellow"] = (255,255,0) + color["cyan"] = (0,255,255) + color["purple"] = (255,0,255) + color["green"] = (100,250,25) + color["red"] = (250,25,75) + color["forrest"] = (25,175,0) + color["dirtyred"] = (200,0,120) + color["navy"] = (0,0,150) + color["dirtyyellow"] = (200,200,75) + color["grey"] = (153,153,153) + color["lightgrey"] = (220,220,220) + color["salmon"] = (255,153,153) + color["lightblue"] = (153,204,255) + color["orange"] = (255,153,51) + color["beige"] = (222,184,135) + + return color + +#--------------------------------------------- +def initGraph(): + data={} + + #default data points + data['width']=2400 + data['height']=1200 + data['ref_y']=250 + data['mis_bar']=50 + data['query_y']=70 + data['x']=100 + data['xlabel']=110 + data['bar_thick']=20 + data['query_thick']=15 + data['reference_thick']=15 + data['x_legend']=600 + data['y_legend']=750 + data['x_legend_picto']=100 + data['thick_up']=25 + data['thick_down']=40 + + return data + +#--------------------------------------------- +def drawRectangle(draw,start,end,y,thickness,bar_color,text,font,text_color): + + draw.rectangle((start,y,end,y+thickness), bar_color) + draw.text((start-80, y), text, font=font, fill=text_color) + +#--------------------------------------------- +def plotFrequency(freq,size,scale,draw,color,data,leap): + + pos_range=range(0,size,leap) + + for pos in pos_range: + if freq.has_key(pos): + freq_list=freq[pos] + previous=data['mis_bar'] + identity_range=range(99,-1,-1) + for id in identity_range: + cumul=int(0) + for freq_keys in freq_list: + if id >= freq_keys: + cumul += freq_list[freq_keys] + + if cumul<1: + color_now="white" + elif cumul==1: + color_now="blue" + elif cumul==2: + color_now="cyan" + elif cumul==3: + color_now="green" + elif cumul==4: + color_now="dirtyred" + elif cumul==5: + color_now="purple" + elif cumul==6: + color_now="salmon" + elif cumul==7: + color_now="orange" + elif cumul>=8: + color_now="yellow" + + extension=((200-(2*id))+data['mis_bar']) #y + compressed=(pos/scale)+data['x'] #x + + if color_now != "white": + #print "%i, %i, %i, %i %s" % (compressed,previous,compressed,extension,color_now) + draw.line((compressed,previous,compressed,extension),color[color_now]) + + previous = extension + + +#--------------------------------------------- +def drawRelationship(reference_list, query_list, match_list, scale, query_hit, mismatch, block_length, crossmatch_file, freq, reflength, leap, format, formatdict, protein): + + scaled_reflength=int(reflength/scale) + + ###Initialize new graph + data=initGraph() + + ###Get colors + color=initColor() + + ###Set Font + + ###Set Font + #medium_font=ImageFont.load_path("/home/rwarren/fonts/pil/helvB12.pil") + arial_18=ImageFont.truetype("/home/rwarren/fonts/truetype/arial.ttf",18) + arialb_18=ImageFont.truetype("/home/rwarren/fonts/truetype/arialbd.ttf",18) + arial_20=ImageFont.truetype("/home/rwarren/fonts/truetype/arial.ttf",20) + arialb_20=ImageFont.truetype("/home/rwarren/fonts/truetype/arialbd.ttf",20) + ariali_20=ImageFont.truetype("/home/rwarren/fonts/truetype/ariali.ttf",20) + arialbi_20=ImageFont.truetype("/home/rwarren/fonts/truetype/arialbi.ttf",20) + arialb_22=ImageFont.truetype("/home/rwarren/fonts/truetype/arialbd.ttf",22) + + ###Define Image + im = Image.new("RGB", (data['width'],data['height']),color['white']) + draw = ImageDraw.Draw(im) + + ###Draw Legend + date=commands.getstatusoutput("date") + + ###Picto Legend + draw.text((data['x_legend_picto']+50,data['y_legend']), "Legend", font=arialb_22, fill=color['black']) + y_legend = data['y_legend']+30 + draw.text((data['x_legend_picto'],y_legend), "Frequency Repeated", font=arialbi_20, fill=color['black']) + + #### + draw.text((data['x_legend'],y_legend), "Mismatch threshold %i" % mismatch, font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+20), "Minimum Block Length=%i" % block_length, font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+40), "Scale=1:%i" % scale, font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+60), "%s" % date[1], font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+80), "rwarren@bcgsc.ca", font=arial_20, fill=color['black']) + #### + + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['blue']) + draw.text((data['x_legend_picto']+25,y_legend), "1X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['cyan']) + draw.text((data['x_legend_picto']+25,y_legend), "2X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['green']) + draw.text((data['x_legend_picto']+25,y_legend), "3X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['dirtyred']) + draw.text((data['x_legend_picto']+25,y_legend), "4X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['purple']) + draw.text((data['x_legend_picto']+25,y_legend), "5X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['salmon']) + draw.text((data['x_legend_picto']+25,y_legend), "6X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['orange']) + draw.text((data['x_legend_picto']+25,y_legend), "7X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['yellow']) + draw.text((data['x_legend_picto']+25,y_legend), "8X and over", font=arial_20, fill=color['black']) + y_legend+=40 + + draw.text((data['x_legend_picto'],y_legend), "Collinear Blocks", font=arialbi_20, fill=color['black']) + y_legend+=30 + + draw.polygon((data['x_legend_picto']-5,y_legend,data['x_legend_picto'],y_legend+25,data['x_legend_picto']+25,y_legend+25,data['x_legend_picto']+20,y_legend), outline=color['navy'], fill=color['lightblue']) + draw.text((data['x_legend_picto']+30,y_legend), "Direct", font=arial_20, fill=color['black']) + + y_legend+=30 + draw.polygon((data['x_legend_picto']-5,y_legend,data['x_legend_picto']+25,y_legend+25,data['x_legend_picto']-5,y_legend+25,data['x_legend_picto']+25,y_legend), outline=color['purple'], fill=color['salmon']) + draw.text((data['x_legend_picto']+30,y_legend), "Inverted", font=arial_20, fill=color['black']) + + y_legend+=40 + + draw.text((data['x_legend_picto'],y_legend), "Other", font=arialbi_20, fill=color['black']) + y_legend+=30 + + draw.rectangle((data['x_legend_picto']-5,y_legend+5,data['x_legend_picto']+25,y_legend+7), fill=color['red']) + draw.text((data['x_legend_picto']+30,y_legend), "Mismatch threshold", font=arial_20, fill=color['black']) + + #### + for ref in reference_list: + init_coord=int(data['x']) + last_coord=int(data['x']+reference_list[ref]) + + drawRectangle(draw,init_coord, last_coord,data['ref_y'],data['reference_thick'],color['black'],ref,arialb_18,color['black']) + x_range=range(init_coord, last_coord, 100) + + for position in x_range: + draw.rectangle((position,data['thick_up'],position+2,data['thick_down']),color['black']) + base_number=int(((position-data['x'])*scale)/1000) + draw.text((position-10, data['thick_up']-25), "%i kb" % base_number, font=arial_18, fill=color['black']) + + ###Mismatch Axis + identity=int(0) + grid_range=range(data['mis_bar'], data['ref_y'], 20) + + for grid in grid_range: + draw.rectangle((data['x'],grid,data['x']+scaled_reflength+5,grid+2),color['lightgrey']) + draw.text((data['x']+scaled_reflength+10, grid-7), "%i " % identity, font=arial_18, fill=color['black']) + identity += 10 + + draw.text((data['x']+scaled_reflength+60, 150), "% Identity", font=arial_18, fill=color['black']) + + ###Draw Repeat Frequency + plotFrequency(freq,reflength,scale,draw,color,data,leap) + + ###Draw Threshold + threshold_line= data['mis_bar'] + (200-(2*mismatch)) + draw.rectangle((data['x'],threshold_line,data['x']+scaled_reflength+5,threshold_line+2), color['red']) + + ###Draw Query & Collinear blocks + (decay, current_position, LCB)=(350, data['x'], 10) + + for match in match_list: + allhit=match_list[match] + for hit in allhit: + start1_list=allhit[hit] + stop=current_position + query_list[hit] + if match != hit: + drawRectangle(draw,current_position,stop,data['ref_y']+decay,data['query_thick'],color['black'], hit, arialb_18, color['black']) + s1_list_sort=start1_list.keys() + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=end1_list.keys() + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=start2_list.keys() + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=end2_list.keys() + e2_list_sort.sort() + for end2 in e2_list_sort: + outline_color="forrest" + fill_color="lightblue" + + if start2 > end2: + outline_color="purple" + fill_color="salmon" + else: + outline_color="navy" + fill_color="lightblue" + ###draw ORF on upper + draw.rectangle((data['x']+start1,data['ref_y'],data['x']+end1,data['ref_y']+data['reference_thick']), outline=color["black"], fill=color["lightgrey"]) + size_ref = end1 - start1 + size_qry = end2 - start2 + buf_ref = ((size_ref - (size_ref/3)) / 2) + buf_qry = ((size_qry - (size_qry/3)) / 2) + ss1 = start1 + buf_ref + ee1 = end1 - buf_ref + ss2 = start2 + buf_qry + ee2 = end2 - buf_qry + print "%s (%i-%i) hits %s :: mismatch %.2f target(%i) block %i target (%i) " % (match,start1,end1,hit,end2_list[end2],mismatch,size_ref,block_length) + + if match == hit: + if start1 <= start2: + + repeat_size = start2 - start1 + size_chunk = int(decay * repeat_size / scaled_reflength) + print "%i %i %i" % (size_chunk, repeat_size, scaled_reflength) + size_chunk += 50 + + if protein: + draw.rectangle((data['x']+ss1,data['ref_y']+data['reference_thick']+7,data['x']+ee1,data['ref_y']+data['reference_thick']+17), outline=color["black"], fill=color["red"]) + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + draw.arc((data['x']+ss1,data['ref_y']+data['reference_thick']+15-size_chunk,data['x']+ss2,data['ref_y']+data['reference_thick']+17+size_chunk),360,180, color[outline_color]) + else: + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + draw.arc((data['x']+start1,data['ref_y']+data['reference_thick']-size_chunk,data['x']+start2,data['ref_y']+data['reference_thick']+size_chunk),360,180, color[outline_color]) + else: + draw.rectangle((data['x']+start2,data['ref_y']+decay,data['x']+end2,data['ref_y']+decay+data['reference_thick']), outline=color["black"], fill=color["lightgrey"]) + + if protein: + draw.rectangle((data['x']+ss1,data['ref_y']+data['reference_thick']+7,data['x']+ee1,data['ref_y']+data['reference_thick']+17), outline=color["black"], fill=color["red"]) + draw.rectangle((data['x']+ss2,data['ref_y']+decay-17,data['x']+ee2,data['ref_y']+decay-7), outline=color["black"], fill=color["red"]) + + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + draw.polygon((data['x']+ss1,data['ref_y']+data['reference_thick']+17,data['x']+ss2,data['ref_y']+decay-17,data['x']+ee2,data['ref_y']+decay-17,data['x']+ee1,data['ref_y']+data['reference_thick']+17), outline=color[outline_color], fill=color[fill_color]) + else: + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + draw.polygon((data['x']+start1,data['ref_y']+data['reference_thick'],data['x']+start2,data['ref_y']+decay,data['x']+end2,data['ref_y']+decay,data['x']+end1,data['ref_y']+data['reference_thick']), outline=color[outline_color], fill=color[fill_color]) + + + #enhancer = ImageEnhance.Sharpness(im) + #for i in range(8): + # factor = i / 4.0 + # enhancer.enhance(factor).show("Sharpness %f" % factor) + + ###getFileName + #xm_regex = re.compile('(\S+)\.\S+') + #xm_name = xm_regex.match(crossmatch_file) + #file = xm_name.group(1) + "_m" + str(mismatch) + "_b" + str(block_length) + "_l" + str(leap) + "_s" + str(scale) + "." + format + file = crossmatch_file + "_m" + str(mismatch) + "_b" + str(block_length) + "_l" + str(leap) + "_s" + str(scale) + "." + format + print "Saving %s..." % file + im.save(open(file, 'wb'), formatdict[format]) + print "done." + return file + +#--------------------------------------------- +def main(): + + opts, args = getopt.getopt(sys.argv[1:], "x:s:q:m:r:c:l:f:p:") + + (crossmatch_file, reference_file, query_file, format)=(None,None,None,None) + (mismatch, block_length, scale, leap, protein)=(0,0,0,0,0) + (reference, reflength)=([],[]) + #formatdict = {'PNG':'png','GIF':'gif','TIFF':'tiff','BMP':'bmp','JPEG':'jpeg','EPS':'ps'} + formatdict = {'png':'PNG','gif':'GIF','tiff':'TIFF','bmp':'BMP','jpeg':'JPEG','ps':'EPS'} + + for o, v in opts: + if o == "-x": + crossmatch_file=str(v) + if o == "-s": + reference_file=str(v) + if o == "-q": + query_file=str(v) + if o == "-m": + mismatch=int(v) + if o == "-r": + block_length=int(v) + if o == "-c": + scale=int(v) + if o == "-l": + leap=int(v) + if o == "-f": + format=str(v) + if o == "-p": + protein=int(v) + + + if (crossmatch_file == None or reference_file == None or query_file == None or mismatch == 0 or block_length == 0 or scale ==0 or leap == 0): + print "Usage: %s" % (sys.argv[0:]) + print "-x crossmatch file" + print "-s reference genome fasta file" + print "-q query contig/genome fasta file" + print "-m mismatch threshold (e.g. -m 10 allows representation of repeats having up to 10% mismatch" + print "-r length of similarity block to display" + print "-c scale (for displaying the image)" + print "-l leap to evaluate repeat frequency (smaller numbers will increase the resolution, but will affect drastically the run time. recommended -l=50" + print "-f file format (bmp, jpeg, png, ps, gif, pdf, tiff) NOTE: the png, ps, tiff and bmp are much better.\n" + print "-p transform bacterial ORF into protein (i.e. plot alignment between ORF products? 1/0)\n"; + print "* Files for the -s and -q options must correspond to fasta files used to run cross_match" + sys.exit(1) + + #====Graph Format + if not formatdict.has_key(format): + print "Not a valid Graph Format. Please Select: bmp, jpeg, png, ps, gif, pdf or tiff" + sys.exit(1) + + #====Mismatch checks + if (mismatch <0 or mismatch >99): + print "-m must be a valid number between 0-99" + sys.exit(1) + + #===Scale checks + if (scale<1): + print "Not a possible scale. Make sure you select a number >1." + sys.exit(1) + + #====File checks + checkFile(crossmatch_file) + checkFile(reference_file) + checkFile(query_file) + + #====Parse Fasta Files + (reference, reflength)=readFasta(reference_file, scale) + (query, qrylength)=readFasta(query_file, scale) + + print "Reading Crossmatch file..." + (nocdt, match, query_hit)=readCrossMatch(crossmatch_file, mismatch, block_length, reference, scale) + print "done." + print "Computing Repeat frequencies..." + (freq)=generateCoords(nocdt, reflength, leap, protein) + print "done." + print "Drawing repeats..." + drawRelationship(reference, query, match, scale, query_hit, mismatch, block_length, crossmatch_file, freq, reflength, leap, format, formatdict, protein) + +#--------------------------------------------- +#Main Call + +main() +sys.exit(1) + + diff --git a/v1.2.4/v0.2/XMatchView.py b/v1.2.4/v0.2/XMatchView.py new file mode 100755 index 0000000..ab83df2 --- /dev/null +++ b/v1.2.4/v0.2/XMatchView.py @@ -0,0 +1,996 @@ +#!/usr/bin/python + +#NAME +# XMatchView.py v0.2 Rene Warren, March 2005/May 2005/January 2006 + +#SYNOPSIS +# Allows users to align 2 DNA sequences in fasta format using cross_match and displays the alignment in a variety of formats + +#LICENSE +# Copyright (c) 2004-2006 Canada's Michael Smith Genome Science Centre. All rights reserved. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +#INSTALL + +# A) If you're running this program outside the GSC, you will need to do the following before you can proceed: +# 1)Download python2.3 or 2.4 from: http://www.python.org/ and change the shebang line to reflect this +# 2)Download the Python Imaging Library (PIL) from: http://www.pythonware.com/products/pil/ +# 3)Copy true type fonts from c:\WINDOWS\Fonts to a unix directory and change the line truetype= below to reflect the location of your ttf +# 4)Change the sys.path.append line below to reflect the location of PIL +# 5)Make sure cross_match is in your $PATH or change the line cross_match_exec= below +# 6)Copy the image pbp.gif to the same directory where the XMatchView.py program resides, make a fake gif with that name or comment the whole "###Just for fun code block" below + +# B) If you're running this program remotely, but on the GSC servers make sure you are running it on xhost01.bcgsc.ca +# C) A windows version of this program exists (XMatchView_win.py). However, you won't be able to run crossmatch with that version unless you have purchased cross_match for windows. + + +import sys +import commands + +sys.path.insert(0,'/home/rwarren/python/standard/Imaging-1.1.4/PIL') +#sys.path.append('/home/rwarren/python/standard/Imaging-1.1.4/PIL') +from Tkinter import * +import tkSimpleDialog +import tkFileDialog +import tkMessageBox +import sys +import os +import getopt +import re +import Image +import ImageDraw +import ImageFont +import ImageEnhance +import PSDraw +import commands +import ImageTk + +#############Global Variables +default_minmatch=30 +default_minscore=60 +default_mismatch=30 +default_block=500 +default_scale=1000 +default_leap=50 +default_width=2400 +default_height=1200 +default_angle=0 +default_format="gif" +truetype="/home/rwarren/fonts/truetype/" +arial=truetype + "arial.ttf" +arial_bold=truetype + "arialbd.ttf" +arial_italic=truetype + "ariali.ttf" +arial_bold_italic=truetype + "arialbi.ttf" +cross_match_exec="cross_match" +############################### + +formatdict = {'png':'PNG','gif':'GIF','tiff':'TIFF','bmp':'BMP','jpeg':'JPEG','ps':'EPS'} + +#=============================================================== +class XMDialog(tkSimpleDialog.Dialog): + + def body(self, master): + + self.result = {} + + Label(master, text="Minmatch").grid(row=0, sticky=W) + Label(master, text="Minscore").grid(row=1, sticky=W) + + self.e1 = Entry(master) + self.e1.insert(0,default_minmatch) + + self.e2 = Entry(master) + self.e2.insert(0,default_minscore) + + self.e1.grid(row=0, column=1) + self.e2.grid(row=1, column=1) + + #return self.e1 # initial focus + + def validate(self): + + try: + minmatch = int(self.e1.get()) + minscore = int(self.e2.get()) + + if (minmatch > 0 and minscore > 0): + self.result["minmatch"] = minmatch + self.result["minscore"] = minscore + + return 1 + else: + tkMessageBox.showwarning( + "Bad input", + "Illegal values, please try again" + ) + return 0 + + except ValueError: + tkMessageBox.showwarning( + "Bad input", + "Illegal data type, please try again" + ) + return 0 + +#=============================================================== +class XGDialog(tkSimpleDialog.Dialog): + + def body(self, master): + + self.result = {} + + Label(master, text="Mismatch").grid(row=0, sticky=W) + Label(master, text="Minimum Block Length").grid(row=1, sticky=W) + Label(master, text="Scale 1:").grid(row=2, sticky=W) + Label(master, text="Sliding Window Leap").grid(row=3, sticky=W) + Label(master, text="Save as").grid(row=4, sticky=W) + + self.e1 = Entry(master) + self.e1.insert(0,default_mismatch) + self.e2 = Entry(master) + self.e2.insert(0,default_block) + self.e3 = Entry(master) + self.e3.insert(0,default_scale) + self.e4 = Entry(master) + self.e4.insert(0,default_leap) + + self.e1.grid(row=0, column=1, sticky=W) + self.e2.grid(row=1, column=1, sticky=W) + self.e3.grid(row=2, column=1, sticky=W) + self.e4.grid(row=3, column=1, sticky=W) + + self.var = StringVar() + self.var.set(default_format) # initial value + + self.option = OptionMenu(master, self.var,'png', 'gif', 'tiff', 'bmp', 'jpeg', 'ps') + self.option.grid(row=4, column=1, sticky=W) + + + def validate(self): + try: + mismatch = int(self.e1.get()) + block = int(self.e2.get()) + scale = int(self.e3.get()) + leap = int(self.e4.get()) + graph_format = self.var.get() + + if (mismatch >= 0 and mismatch < 100 and block > 0 and scale > 0 and leap >0): + self.result["mismatch"] = mismatch + self.result["block"] = block + self.result["scale"] = scale + self.result["leap"] = leap + self.result["format"] = graph_format + return 1 + elif (mismatch < 0 or mismatch >= 100): + tkMessageBox.showwarning( + "Bad input", + "Illegal mismatch value.\nMust be between 0-99." + ) + return 0 + elif (block < 1): + tkMessageBox.showwarning( + "Bad input", + "Illegal block length value.\nMust be larger than zero." + ) + return 0 + elif (scale < 1): + tkMessageBox.showwarning( + "Bad input", + "Illegal scale value.\nMust be smaller than, or equal 1:1" + ) + return 0 + elif (leap < 1): + tkMessageBox.showwarning( + "Bad input", + "Illegal sliding window leap value.\nMust be larger than zero." + ) + return 0 + + except ValueError: + tkMessageBox.showwarning( + "Bad input", + "Illegal data type, please try again" + ) + return 0 + + +#=============================================================== +class MatchViz: + def __init__(self, parent): + ###root becomes parent + self.myParent = parent + self.im = None + + self.crossMatchFile = None + self.fasta_query = None + self.fasta_reference = None + self.graph_file = None + + self.xm_exec=cross_match_exec + self.minmatch=default_minmatch + self.minscore=default_minscore + self.mismatch=default_mismatch + self.scale=default_scale + self.block=default_block + self.leap=default_leap + self.width=default_width + self.height=default_height + self.angle=default_angle + self.format=default_format + + self.menubar = Menu(parent) #Container Menu + + self.oldCursor=parent["cursor"] + + ####SCROLL + self.scrollbarx = Scrollbar(parent, orient='horizontal') + self.scrollbary = Scrollbar(parent, orient='vertical') + self.scrollbary.pack(side=RIGHT, fill=Y) + self.scrollbarx.pack(side=BOTTOM, fill=X) + + ####CANVAS + self.can = Canvas(parent, width=1200, height=900, background='white', xscrollcommand=self.scrollbarx.set, yscrollcommand=self.scrollbary.set, scrollregion=(0, 0, default_width, default_height)) + + ####FILE Menu (pull down) + self.filemenu = Menu(self.menubar, tearoff=0) + self.filemenu.add_command(label="Open CrossMatch Output", underline=5, command=self.openCM) + self.filemenu.add_command(label="Open Graph", underline=0, command=self.openGraph) + self.filemenu.add_separator() + self.filemenu.add_command(label="Select Reference Fasta", underline=5, command=self.openFaR) + self.filemenu.add_command(label="Select Query Fasta", underline=5, command=self.openFaQ) + self.filemenu.add_separator() + self.filemenu.add_command(label="Exit", underline=1, command=parent.quit) + self.menubar.add_cascade(label="File", underline=0, menu=self.filemenu) + + ####OPTION Menu + self.optmenu = Menu(self.menubar, tearoff=0) + self.optmenu.add_command(label="CrossMatch", underline=5, command=self.optionXM) + self.optmenu.add_separator() + self.optmenu.add_command(label="Graph", underline=0, command=self.optionXG) + self.menubar.add_cascade(label="Option", underline=0, menu=self.optmenu) + + ####VIEW Menu + self.viewmenu = Menu(self.menubar, tearoff=0) + self.viewmenu.add_command(label="Zoom In", underline=5, command=self.zoomIn) + self.viewmenu.add_command(label="Zoom Out", underline=5, command=self.zoomOut) + self.viewmenu.add_separator() + self.CrossMatchWindow=BooleanVar() + self.viewmenu.add_checkbutton(label="CrossMatch Window", variable=self.CrossMatchWindow, command=self.showCM) + #self.viewmenu.add_command(label="Rotate Clock Wise", command=self.rotate) + self.menubar.add_cascade(label="View", underline=0, menu=self.viewmenu) + + ####TOOL Menu + self.toolmenu = Menu(self.menubar, tearoff=0) + self.toolmenu.add_command(label="Run CrossMatch", underline=0, command=self.runXM) + self.filemenu.add_separator() + self.toolmenu.add_command(label="Draw Repeat Graph", underline=0, command=self.runXG) + self.menubar.add_cascade(label="Tool", underline=0, menu=self.toolmenu) + + ####HELP Menu + self.helpmenu = Menu(self.menubar, tearoff=0) + self.helpmenu.add_command(label="About", underline=0, command=self.help) + self.menubar.add_cascade(label="Help", underline=0, menu=self.helpmenu) + + ####Just for fun: + self.default_image = Image.open('/home/rwarren/python/Development/SeqDev/bin/pbp.gif') + self.default_open = ImageTk.PhotoImage(self.default_image) + self.label = Label(image=self.default_open) + self.label.pack() + + + self.scrollbarx.config(command=self.can.xview) + self.scrollbary.config(command=self.can.yview) + + self.myParent.config(menu=self.menubar) + + self.can.pack() + + #--------------------------------------------- + def runXM(self): + + if (self.fasta_query == None): + tkMessageBox.showwarning("File missing", "You must open a\nquery fasta file.") + elif (self.fasta_reference == None): + tkMessageBox.showwarning("File missing", "You must open a\nreference fasta file.") + else: + self.crossMatchFile = tkFileDialog.asksaveasfilename(defaultextension = ".rep", filetypes = [("CrossMatch", "*.rep"), ("CrossMatch", "*.txt"), ("CrossMatch", "*.screen")]) + command=self.xm_exec + " " + self.fasta_reference + " " + self.fasta_query + " -minmatch " + str(self.minmatch) + " -minscore " + str(self.minscore) +" -masklevel 101 >" + self.crossMatchFile + if (self.crossMatchFile != '' and self.crossMatchFile != None): + if (tkMessageBox.askokcancel("Proceed?", "Run %s ?" % command) and self.crossMatchFile): + + self.myParent["cursor"]="watch" + self.myParent.update() + + (status, out)=commands.getstatusoutput(command) + print "%s" % out + + self.CrossMatchWindow.set(1) + self.showCM() + + self.myParent["cursor"]=self.oldCursor + self.myParent.update() + + #--------------------------------------------- + def runXG(self): + + if (self.fasta_query == None): + tkMessageBox.showwarning("File missing", "You must open a\nquery fasta file.") + elif (self.fasta_reference == None): + tkMessageBox.showwarning("File missing", "You must open a\nreference fasta file.") + elif (self.crossMatchFile == None): + tkMessageBox.showwarning("File missing", "You must open a\nCrossMatch file.") + elif (tkMessageBox.askokcancel("Proceed?", "Visualize Collinear Blocks? \nScale 1:%s \nMismatch threshold = %s \nMin. Block Length = %s \nSliding window leap = %s \nGraph format = %s" % (self.scale, self.mismatch, self.block, self.leap, self.format))): + #====Parse Fasta Files + self.myParent["cursor"]="watch" + self.myParent.update() + + (reference, reflength)=self.readFasta(self.fasta_reference, self.scale) + (query, qrylength)=self.readFasta(self.fasta_query, self.scale) + print "Reading Crossmatch file..." + (nocdt, match, query_hit)=self.readCrossMatch(self.crossMatchFile, self.mismatch, self.block, reference, self.scale, reflength, qrylength) + print "done.\nComputing Repeat frequencies..." + (freq)=self.generateCoords(nocdt, reflength, self.leap) + print "done.\nDrawing Collinear Blocks..." + self.graph_file=self.drawRelationship(reference, query, match, self.scale, query_hit, self.mismatch, self.block, self.crossMatchFile, freq, reflength, self.leap, self.format, formatdict) + + self.im = Image.open(self.graph_file) + self.photo_open = ImageTk.PhotoImage(self.im) + self.can.delete("repeatgraph") + self.can.create_image(1, 1, anchor=NW, image=self.photo_open, tag="repeatgraph") + self.can.pack() + + self.myParent["cursor"]=self.oldCursor + self.myParent.update() + + #--------------------------------------------- + def zoomIn(self): + + if self.im != None: + self.width *= 1.5 + self.height *= 1.5 + + self.myParent["cursor"]="watch" + self.myParent.update() + + self.can.delete("repeatgraph") + + self.zoomin = ImageTk.PhotoImage(self.im.resize((int(self.width),int(self.height)))) + self.can.create_image(1, 1, anchor=NW, image=self.zoomin, tag="repeatgraph") + self.can.pack() + + self.myParent["cursor"]=self.oldCursor + self.myParent.update() + + #--------------------------------------------- + def zoomOut(self): + + if self.im != None: + self.width /= 1.5 + self.height /= 1.5 + + self.myParent["cursor"]="watch" + self.myParent.update() + + self.can.delete("repeatgraph") + + self.zoomout = ImageTk.PhotoImage(self.im.resize((int(self.width),int(self.height)))) + self.can.create_image(1, 1, anchor=NW, image=self.zoomout, tag="repeatgraph") + self.can.pack() + + self.myParent["cursor"]=self.oldCursor + self.myParent.update() + + #--------------------------------------------- + def rotate(self): + + if self.im != None: + self.angle += 90 + + self.rotate = ImageTk.PhotoImage(self.im.rotate(self.angle)) + + self.can.delete("repeatgraph") + + self.can.create_image(1, 1, anchor=NW, image=self.rotate, tag="repeatgraph") + self.can.pack() + + #--------------------------------------------- + def openCM(self): + self.crossMatchFile=tkFileDialog.askopenfilename(defaultextension = ".rep", filetypes = [("CrossMatch", "*.rep"), ("CrossMatch", "*.txt"), ("CrossMatch", "*.screen")]) + + if (self.crossMatchFile != '' and self.crossMatchFile != None): + self.CrossMatchWindow.set(1) + self.showCM() + + #--------------------------------------------- + def showCM(self): + + if (self.CrossMatchWindow.get()): + + self.top = Toplevel() + self.top.destroy() + self.top = Toplevel() + self.MainFrame=Frame(self.top) + self.top.title(self.crossMatchFile) + self.TextBox=Text(self.MainFrame) + + if (self.crossMatchFile==None or self.crossMatchFile==""): + return + try: + File=open(self.crossMatchFile,"r") + NewText=File.read() + File.close() + self.FileName=self.crossMatchFile + #self.top.title(self.crossMatchFile) + except IOError: + tkMessageBox.showerror("Read error...", + "Could not read from '%s'" % self.crossMatchFile) + return + + self.ClearText() + self.TextBox.insert(END,NewText) + self.TextBox.pack(fill=BOTH,expand=YES) + self.MainFrame.pack(fill=BOTH,expand=YES) + else: + self.top.destroy() + + #--------------------------------------------- + def ClearText(self): + self.TextBox.delete("1.0",END) + + #--------------------------------------------- + def openFaQ(self): + self.fasta_query=tkFileDialog.askopenfilename(defaultextension = ".fa", filetypes = [("fasta", "*.fa"), ("fasta", "*.fasta"), ("fasta", "*.txt")]) + + #--------------------------------------------- + def openFaR(self): + self.fasta_reference=tkFileDialog.askopenfilename(defaultextension = ".fa", filetypes = [("fasta", "*.fa"), ("fasta", "*.fasta"), ("fasta", "*.txt")]) + + #--------------------------------------------- + def optionXM(self): + xmdialog = XMDialog(self.myParent, title="CrossMatch Options") + + if xmdialog.result.has_key("minmatch") and xmdialog.result.has_key("minscore"): + self.minmatch=xmdialog.result["minmatch"] + self.minscore=xmdialog.result["minscore"] + + #--------------------------------------------- + def optionXG(self): + xgdialog = XGDialog(self.myParent, title="Graphics Options") + + if xgdialog.result.has_key("mismatch") and xgdialog.result.has_key("block") and xgdialog.result.has_key("scale") and xgdialog.result.has_key("leap"): + self.mismatch=xgdialog.result["mismatch"] + self.block=xgdialog.result["block"] + self.scale=xgdialog.result["scale"] + self.leap=xgdialog.result["leap"] + self.format=xgdialog.result["format"] + + #--------------------------------------------- + def help(self): + + tkMessageBox.showinfo("About", "XMatchView.py\nCopyright 2004-2006\nRene Warren") + + #--------------------------------------------- + def openGraph(self): + self.graph_file=tkFileDialog.askopenfilename(defaultextension = ".fa", filetypes = [("GIF", "*.gif"), ("PNG", "*.png"), ("TIFF", "*.tiff"), ("BMP", "*.bmp"), ("JPEG", "*.jp*g"), ("postcript EPS", "*.ps")]) + + if (self.graph_file != '' and self.graph_file != None): + + self.myParent["cursor"]="watch" + self.myParent.update() + + self.im = Image.open(self.graph_file) + self.photo_open = ImageTk.PhotoImage(self.im) + self.can.delete("repeatgraph") + self.can.create_image(1, 1, anchor=NW, image=self.photo_open, tag="repeatgraph") + self.can.pack() + + self.myParent["cursor"]=self.oldCursor + self.myParent.update() + + #--------------------------------------------- + def readCrossMatch(self,crossmatch_file,mismatch,block_length,reference,scale,reflength,qrylength): + + (nocdt,match,query_hit)=({},{},{}) + + xmatch_obj=open(crossmatch_file, 'r') + + for line in xmatch_obj: + ###reverse matches + rev_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+C\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + fwd_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+") + fm = fwd_regex.match(line) + + if rm != None: + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7)) + + #(percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(1)), rm.group(2), float(rm.group(3)), float(rm.group(4)), rm.group(5), float(rm.group(6)), float(rm.group(7))) + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(2)), rm.group(3), int(rm.group(4)), int(rm.group(5)), rm.group(6), int(rm.group(7)), int(rm.group(8))) + + repeat_size = endFirstMatch - startFirstMatch + 1 + print "%i-%i l=%i q=%i rsize=%i" % (endFirstMatch,startFirstMatch,reflength,qrylength,repeat_size) + + if(repeat_size >= reflength-100): + continue + elif (repeat_size >= block_length) and (percentMis <= mismatch): ### REPLACE BY "else:" if you want to see all repeats+frequency + + ####no autovivification in python + if not nocdt.has_key(primary_match): + nocdt[primary_match]={} + if not nocdt[primary_match].has_key(secondary_match): + nocdt[primary_match][secondary_match]={} + if not nocdt[primary_match][secondary_match].has_key(startFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch): ###prevents exact matches + continue + elif (repeat_size < block_length) or (repeat_size >= reflength-100): + continue #will skip smaller alignment + else: + if reference.has_key(primary_match): + startFirstMatch=int(startFirstMatch/scale) + endFirstMatch=int(endFirstMatch/scale) + startSecondMatch=int(startSecondMatch/scale) + endSecondMatch=int(endSecondMatch/scale) + + if not match.has_key(primary_match): + match[primary_match]={} + if not match[primary_match].has_key(secondary_match): + match[primary_match][secondary_match]={} + if not match[primary_match][secondary_match].has_key(startFirstMatch): + match[primary_match][secondary_match][startFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + if not query_hit.has_key(primary_match): + query_hit[secondary_match]=int(0) + + query_hit[secondary_match]=query_hit[secondary_match]+1 + + + ###forward matches + elif fm != None: + + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(2)), fm.group(3), int(fm.group(4)), int(fm.group(5)), fm.group(6), int(fm.group(7)), int(fm.group(8))) + + repeat_size = endFirstMatch - startFirstMatch + 1 + print "%i-%i l=%i q=%i rsize=%i" % (endFirstMatch,startFirstMatch,reflength,qrylength,repeat_size) + + if(repeat_size >= reflength-100): + continue + elif (repeat_size >= block_length) and (percentMis <= mismatch): ### REPLACE BY "else:" if you want to see all repeats+frequency + + ####no autovivification in python + if not nocdt.has_key(primary_match): + nocdt[primary_match]={} + if not nocdt[primary_match].has_key(secondary_match): + nocdt[primary_match][secondary_match]={} + if not nocdt[primary_match][secondary_match].has_key(startFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + nocdt[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch): ###prevents exact matches + continue + elif (repeat_size < block_length) or (repeat_size >= reflength-100): + continue #will skip smaller alignment + else: + if reference.has_key(primary_match): + startFirstMatch=int(startFirstMatch/scale) + endFirstMatch=int(endFirstMatch/scale) + startSecondMatch=int(startSecondMatch/scale) + endSecondMatch=int(endSecondMatch/scale) + + if not match.has_key(primary_match): + match[primary_match]={} + if not match[primary_match].has_key(secondary_match): + match[primary_match][secondary_match]={} + if not match[primary_match][secondary_match].has_key(startFirstMatch): + match[primary_match][secondary_match][startFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch].has_key(endFirstMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch].has_key(startSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if not match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch].has_key(endSecondMatch): + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + if not query_hit.has_key(primary_match): + query_hit[secondary_match]=int(0) + query_hit[secondary_match]=query_hit[secondary_match]+1 + + return nocdt, match, query_hit + + #--------------------------------------------- + def generateCoords(self, nocdt, size, leap): + + freq={} + + pos_range=range(0,size,leap) + + for pos in pos_range: + print "%i out of %i bases" % (pos,size) + for reference in nocdt: + for comparison in nocdt[reference]: + start1_dict=nocdt[reference][comparison].keys() + start1_dict.sort() + for start1 in start1_dict: + end1_dict=nocdt[reference][comparison][start1].keys() + end1_dict.sort() + for end1 in end1_dict: + start2_dict=nocdt[reference][comparison][start1][end1].keys() + start2_dict.sort() + if((pos >= start1 and pos <= end1) or (pos >= end1 and pos <= start1)): + #print "%i >= %i and %i<=%i OR %i>=%i and %i<=%i" % (pos,start1,pos,end1,pos,end1,pos,start1) + for start2 in start2_dict: + end2_dict=nocdt[reference][comparison][start1][end1][start2].keys() + end2_dict.sort() + for end2 in end2_dict: + current_mismatch=float(nocdt[reference][comparison][start1][end1][start2][end2]) + if not freq.has_key(pos): + freq[pos]={} + if not freq[pos].has_key(current_mismatch): + freq[pos][current_mismatch]=int(0) + freq[pos][current_mismatch]=freq[pos][current_mismatch]+1 + return freq + + #--------------------------------------------- + def readFasta(self, file, scale): + + (head_match, previous_contig,seq_length) = (None,None,0) + L1={} + + file_obj = open(file, 'r') + + for line in file_obj: + head_match_regex = re.compile('>(\S+)') + head_match = head_match_regex.match(line) + if head_match != None: + if (head_match != previous_contig and previous_contig != None): + (seq_length, scale)=(int(seq_length), int(scale)) + L1[previous_contig] = float(seq_length/scale) + seq_length = 0 #resets the sequence length + previous_contig = head_match.group(1) + + seq_subset_regex = re.compile('^(\S+)$', re.I) + seq_subset = seq_subset_regex.match(line) + if seq_subset != None: + seq_length += len(seq_subset.group(1)) + + (seq_length, scale)=(int(seq_length), int(scale)) + L1[previous_contig] = float(seq_length/scale) #for the last sequence + + file_obj.close() + + print "scaled down %s =%f total=%i " % (previous_contig, L1[previous_contig], seq_length) + + return (L1, seq_length) + + #--------------------------------------------- + def initColor(self): + color={} + + #allocate colors + color["white"] = (255,255,255) + color["black"] = (0,0,0) + color["swamp"] = (150,150,30) + color["blue"] = (0,102,204) + color["yellow"] = (255,255,0) + color["cyan"] = (0,255,255) + color["purple"] = (255,0,255) + color["green"] = (100,250,25) + color["red"] = (250,25,75) + color["forrest"] = (25,175,0) + color["dirtyred"] = (200,0,120) + color["navy"] = (0,0,150) + color["dirtyyellow"] = (200,200,75) + color["grey"] = (153,153,153) + color["lightgrey"] = (220,220,220) + color["salmon"] = (255,153,153) + color["lightblue"] = (153,204,255) + color["orange"] = (255,153,51) + color["beige"] = (222,184,135) + + return color + + #--------------------------------------------- + def initGraph(self): + data={} + + #default data points + data['width']=default_width + data['height']=default_height + data['ref_y']=250 + data['mis_bar']=50 + data['query_y']=70 + data['x']=20 + data['xlabel']=110 + data['bar_thick']=20 + data['query_thick']=15 + data['reference_thick']=15 + data['x_legend']=600 + data['y_legend']=750 + data['x_legend_picto']=100 + data['thick_up']=25 + data['thick_down']=40 + + return data + + #--------------------------------------------- + def drawRectangle(self,draw,start,end,y,thickness,bar_color,text,font,text_color): + + draw.rectangle((start,y,end,y+thickness), bar_color) + draw.text((end+5, y-2), text, font=font, fill=text_color) + + #--------------------------------------------- + def plotFrequency(self,freq,size,scale,draw,color,data,leap): + + pos_range=range(0,size,leap) + + for pos in pos_range: + if freq.has_key(pos): + freq_list=freq[pos] + previous=data['mis_bar'] + identity_range=range(99,-1,-1) + for id in identity_range: + cumul=int(0) + for freq_keys in freq_list: + if id >= freq_keys: + cumul += freq_list[freq_keys] + + if cumul<1: + color_now="white" + elif cumul==1: + color_now="blue" + elif cumul==2: + color_now="cyan" + elif cumul==3: + color_now="green" + elif cumul==4: + color_now="dirtyred" + elif cumul==5: + color_now="purple" + elif cumul==6: + color_now="salmon" + elif cumul==7: + color_now="orange" + elif cumul>=8: + color_now="yellow" + + extension=((200-(2*id))+data['mis_bar']) #y + compressed=(pos/scale)+data['x'] #x + + if color_now != "white": + #print "%i, %i, %i, %i %s" % (compressed,previous,compressed,extension,color_now) + draw.line((compressed,previous,compressed,extension),color[color_now]) + + previous = extension + + #--------------------------------------------- + def drawRelationship(self,reference_list, query_list, match_list, scale, query_hit, mismatch, block_length, crossmatch_file, freq, reflength, leap, format, formatdict): + + scaled_reflength=int(reflength/scale) + + ###Initialize new graph + data=self.initGraph() + + ###Get colors + color=self.initColor() + + ###Set Font + arial_18=ImageFont.truetype(arial,18) + arialb_18=ImageFont.truetype(arial_bold,18) + arial_20=ImageFont.truetype(arial,20) + arialb_20=ImageFont.truetype(arial_bold,20) + ariali_20=ImageFont.truetype(arial_italic,20) + arialbi_20=ImageFont.truetype(arial_bold_italic,20) + arialb_22=ImageFont.truetype(arial_bold,22) + + ###Define Image + im = Image.new("RGB", (data['width'],data['height']),color['white']) + draw = ImageDraw.Draw(im) + + ###Draw Legend + date=commands.getstatusoutput("date") + + ###Picto Legend + draw.text((data['x_legend_picto']+50,data['y_legend']), "Legend", font=arialb_22, fill=color['black']) + y_legend = data['y_legend']+30 + draw.text((data['x_legend_picto'],y_legend), "Frequency Repeated", font=arialbi_20, fill=color['black']) + + #### + draw.text((data['x_legend'],y_legend), "Mismatch threshold %i" % mismatch, font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+20), "Minimum Block Length=%i" % block_length, font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+40), "Scale=1:%i" % scale, font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+60), "%s" % date[1], font=arial_20, fill=color['black']) + draw.text((data['x_legend'],y_legend+80), "rwarren@bcgsc.ca", font=arial_20, fill=color['black']) + #### + + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['blue']) + draw.text((data['x_legend_picto']+25,y_legend), "1X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['cyan']) + draw.text((data['x_legend_picto']+25,y_legend), "2X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['green']) + draw.text((data['x_legend_picto']+25,y_legend), "3X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['dirtyred']) + draw.text((data['x_legend_picto']+25,y_legend), "4X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['purple']) + draw.text((data['x_legend_picto']+25,y_legend), "5X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['salmon']) + draw.text((data['x_legend_picto']+25,y_legend), "6X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['orange']) + draw.text((data['x_legend_picto']+25,y_legend), "7X", font=arial_20, fill=color['black']) + y_legend+=25 + draw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['yellow']) + draw.text((data['x_legend_picto']+25,y_legend), "8X and over", font=arial_20, fill=color['black']) + y_legend+=40 + + draw.text((data['x_legend_picto'],y_legend), "Collinear Blocks", font=arialbi_20, fill=color['black']) + y_legend+=30 + + draw.polygon((data['x_legend_picto']-5,y_legend,data['x_legend_picto'],y_legend+25,data['x_legend_picto']+25,y_legend+25,data['x_legend_picto']+20,y_legend), outline=color['navy'], fill=color['lightblue']) + draw.text((data['x_legend_picto']+30,y_legend), "Direct", font=arial_20, fill=color['black']) + + y_legend+=30 + draw.polygon((data['x_legend_picto']-5,y_legend,data['x_legend_picto']+25,y_legend+25,data['x_legend_picto']-5,y_legend+25,data['x_legend_picto']+25,y_legend), outline=color['purple'], fill=color['salmon']) + draw.text((data['x_legend_picto']+30,y_legend), "Inverted", font=arial_20, fill=color['black']) + + y_legend+=40 + + draw.text((data['x_legend_picto'],y_legend), "Other", font=arialbi_20, fill=color['black']) + y_legend+=30 + + draw.rectangle((data['x_legend_picto']-5,y_legend+5,data['x_legend_picto']+25,y_legend+7), fill=color['red']) + draw.text((data['x_legend_picto']+30,y_legend), "Mismatch threshold", font=arial_20, fill=color['black']) + + #### + for ref in reference_list: + init_coord=int(data['x']) + last_coord=int(data['x']+reference_list[ref]) + + self.drawRectangle(draw,init_coord, last_coord,data['ref_y'],data['reference_thick'],color['black'],ref,arialb_18,color['black']) + x_range=range(init_coord, last_coord, 100) + + for position in x_range: + draw.rectangle((position,data['thick_up'],position+2,data['thick_down']),color['black']) + base_number=int(((position-data['x'])*scale)/1000) + draw.text((position-10, data['thick_up']-25), "%i kb" % base_number, font=arial_18, fill=color['black']) + + ###Mismatch Axis + identity=int(0) + grid_range=range(data['mis_bar'], data['ref_y'], 20) + + for grid in grid_range: + draw.rectangle((data['x'],grid,data['x']+scaled_reflength+5,grid+2),color['lightgrey']) + draw.text((data['x']+scaled_reflength+10, grid-7), "%i " % identity, font=arial_18, fill=color['black']) + identity += 10 + + draw.text((data['x']+scaled_reflength+60, 150), "% Identity", font=arial_18, fill=color['black']) + + ###Draw Repeat Frequency + self.plotFrequency(freq,reflength,scale,draw,color,data,leap) + + ###Draw Threshold + threshold_line= data['mis_bar'] + (200-(2*mismatch)) + draw.rectangle((data['x'],threshold_line,data['x']+scaled_reflength+5,threshold_line+2), color['red']) + + ###Draw Query & Collinear blocks + (decay, current_position, LCB)=(350, data['x'], 10) + + for match in match_list: + allhit=match_list[match] + for hit in allhit: + start1_list=allhit[hit] + stop=current_position + query_list[hit] + if match != hit: + self.drawRectangle(draw,current_position,stop,data['ref_y']+decay,data['query_thick'],color['black'], hit, arialb_18, color['black']) + s1_list_sort=start1_list.keys() + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=end1_list.keys() + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=start2_list.keys() + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=end2_list.keys() + e2_list_sort.sort() + for end2 in e2_list_sort: + outline_color="forrest" + fill_color="lightblue" + + if start2 > end2: + outline_color="purple" + fill_color="salmon" + else: + outline_color="navy" + fill_color="lightblue" + + + draw.rectangle((data['x']+start1,data['ref_y']+data['reference_thick']-LCB,data['x']+end1,data['ref_y']+data['reference_thick']), color[fill_color]) + print "%s-%s S1:%i S2:%i" % (match,hit,start1,start2) + + + if match == hit: + if start1 <= start2: + + repeat_size = start2 - start1 + size_chunk = int(decay * repeat_size / scaled_reflength) + print "%i %i %i" % (size_chunk, repeat_size, scaled_reflength) + size_chunk += 50 + + draw.arc((data['x']+start1,data['ref_y']+data['reference_thick']-size_chunk,data['x']+start2,data['ref_y']+data['reference_thick']+size_chunk),360,180, color[outline_color]) + else: + draw.rectangle((current_position+start2,data['ref_y']+decay,current_position+end2,data['ref_y']+decay+LCB), color[fill_color]) + draw.polygon((data['x']+start1,data['ref_y']+data['reference_thick'],current_position+start2,data['ref_y']+decay,current_position+end2,data['ref_y']+decay,data['x']+end1,data['ref_y']+data['reference_thick']), outline=color[outline_color], fill=color[fill_color]) + + + #enhancer = ImageEnhance.Sharpness(im) + #for i in range(8): + # factor = i / 4.0 + # enhancer.enhance(factor).show("Sharpness %f" % factor) + + ###getFileName + #xm_regex = re.compile('(\S+)\.\S+') + #xm_name = xm_regex.match(crossmatch_file) + #file = xm_name.group(1) + "_m" + str(mismatch) + "_b" + str(block_length) + "_l" + str(leap) + "_s" + str(scale) + "." + format + file = crossmatch_file + "_m" + str(mismatch) + "_b" + str(block_length) + "_l" + str(leap) + "_s" + str(scale) + "." + format + print "Saving %s..." % file + im.save(open(file, 'wb'), formatdict[format]) + print "done." + return file + + +#--------------------------------------------- +# display the menu + +root = Tk() +root.title(sys.argv[0:]) +matchviz = MatchViz(root) +root.mainloop() diff --git a/v1.2.4/v0.2/pbp.gif b/v1.2.4/v0.2/pbp.gif new file mode 100644 index 0000000..2689807 Binary files /dev/null and b/v1.2.4/v0.2/pbp.gif differ diff --git a/v1.2.4/xmatchview-conifer.py b/v1.2.4/xmatchview-conifer.py new file mode 100755 index 0000000..99eefd9 --- /dev/null +++ b/v1.2.4/xmatchview-conifer.py @@ -0,0 +1,1109 @@ +#!/usr/bin/env python3 +# xmatchview-conifer.py +# Visualizing genome synteny with an evergreen representation +# Rene L Warren 2005-2020 + +import sys +import os +import getopt +import re +import csv +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont +from PIL import ImageEnhance +from PIL import PSDraw +import subprocess + +#--------------------------------------------- +def checkFile(file): + + print("Checking input %s" % file) + if not os.path.exists(file): + print("File %s" % file + " is not valid") + sys.exit(1) + else: + print("exists.") + +#--------------------------------------------- +def readGFF(file,scale): + + feature = {} + (start,end) = (0,0) + + with open(file) as fd: + rd = csv.reader(fd, delimiter="\t", quotechar='"') + for row in rd: + id = row[0] + start = float(int(row[3])/scale) + end = float(int(row[4])/scale) + print("__%s__ - __%s__ <<<<<" % (start,end)) + + color = row[9] + if color == None: + color = "black" + + if id not in feature: + feature[id] = {} + if start not in feature[id]: + feature[id][start] = {} + if 'end' not in feature[id][start]: + feature[id][start]['end'] = "" + if 'color' not in feature[id][start]: + feature[id][start]['color'] = "" + + feature[id][start]['end'] = end + feature[id][start]['color'] = color + print("INITIALIZED %s : %i,%i with %s" %(row[0],start,end,color)) + + return feature + +#--------------------------------------------- +def readPAF(paf_file,mismatch,block_length,reference,query,scale): + + match = {} + ctline = 0 + + xmatch_obj=open(paf_file, 'r') + + for line in xmatch_obj: + ### reverse matches qryname qrystart qryend orient hitname hitstart hitend match block + ### 1 2 3 4 5 6 7 8 + rev_regex = re.compile("(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\-\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + + fwd_regex = re.compile("(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\+\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)") + fm = fwd_regex.match(line) + + if rm != None: + ctline = 1 + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s %s" % (rm.group(1), rm.group(2), rm.group(3), rm.group(4), rm.group(5), rm.group(6), rm.group(7), rm.group(8)) + + alignLen = float(rm.group(6)) - float(rm.group(5)) + 1 + percentMis = 100 * float(( alignLen - float(rm.group(7))) / alignLen ) + #print "=== %.2f === %.2f ===" % (alignLen,percentMis) + #sys.exit(1) + (primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(str(rm.group(1)), float(rm.group(2)), float(rm.group(3)), str(rm.group(4)), float(rm.group(6)), float(rm.group(5))) ### needs 5/6 reversed to plot reverse align + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + ###forward matches + elif fm != None: + ctline = 1 + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7), fm.group(8)) + + alignLen = float(fm.group(6)) - float(fm.group(5)) + 1 + percentMis = 100 * float(( alignLen - float(fm.group(7))) / alignLen ) + #percentMis2 = 100 * float((float(fm.group(8)) - float(fm.group(7))) / float(fm.group(8))) + #print "=== %.2f === %.2f === %.2f" % (alignLen,percentMis,percentMis2) + #sys.exit(1) + (primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(str(fm.group(1)), float(fm.group(2)), float(fm.group(3)), str(fm.group(4)), float(fm.group(5)), float(fm.group(6))) + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + if ctline == 0 : + print("There are no alignments to plot. Make sure your file %s is reporting alignments -- fatal." % paf_file) + sys.exit(1) + + return match + +#--------------------------------------------- +def readCrossMatch(crossmatch_file,mismatch,block_length,reference,query,scale): + + match = {} + ctline = 0 + + xmatch_obj=open(crossmatch_file, 'r') + + for line in xmatch_obj: + + # Query start end R Ref end start + # 10 8.70 0.00 0.00 JN039333.1_Picea_abies 440 462 (2149) C KT263970.1_Picea_sitchensis (1851) 524 502 + # start end + # 16 4.55 0.00 0.00 JN039333.1_Picea_abies 484 505 (2106) KT263970.1_Picea_sitchensis 341 362 (2013) + + ###reverse matches s.i. qry + rev_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+C\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + fwd_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+") + fm = fwd_regex.match(line) + + if rm != None and rm.group(3) != "0" and rm.group(6) != "0": + ctline = 1 + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s" % (rm.group(1), rm.group(2), rm.group(3), rm.group(4), rm.group(5), rm.group(6), rm.group(7)) + + #(percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(1)), rm.group(2), float(rm.group(3)), float(rm.group(4)), rm.group(5), float(rm.group(6)), float(rm.group(7))) + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(2)), rm.group(3), float(rm.group(4)), float(rm.group(5)), rm.group(6), float(rm.group(7)), float(rm.group(8)))### has to be in this order to plot reverse align in diff color + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("REVERSE %i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + ###forward matches + elif fm != None and fm.group(3) != "0" and fm.group(6) != "0": + ctline = 1 + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7)) +# (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(1)), fm.group(2), float(fm.group(3)), float(fm.group(4)), fm.group(5), float(fm.group(6)), float(fm.group(7))) + + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(2)), fm.group(3), float(fm.group(4)), float(fm.group(5)), fm.group(6), float(fm.group(7)), float(fm.group(8))) + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("FORWARD %i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + if ctline == 0 : + print("There are no alignments to plot. Make sure your file %s is reporting alignments -- fatal." % crossmatch_file) + sys.exit(1) + + return match + + +#--------------------------------------------- +def generateCoords(nocdt, size, leap, protein): + + freq={} + + pos_range=list(range(0,size,leap)) + + for pos in pos_range: + print("%i out of %i" % (pos,size)) + for reference in nocdt: + for comparison in nocdt[reference]: + start1_dict=list(nocdt[reference][comparison].keys()) + start1_dict.sort() + for start1 in start1_dict: + end1_dict=list(nocdt[reference][comparison][start1].keys()) + end1_dict.sort() + for end1 in end1_dict: + start2_dict=list(nocdt[reference][comparison][start1][end1].keys()) + start2_dict.sort() + + (ss,ee) = (start1,end1) + + if protein: + size_ref = end1 - start1 + buffer = ((size_ref - (size_ref/3)) / 2) + ss = start1 + buffer + ee = end1 - buffer + + if((pos >= ss and pos <= ee) or (pos >= ee and pos <= ss)): + #print "%i >= %i and %i<=%i OR %i>=%i and %i<=%i" % (pos,ss,pos,ee,pos,ee,pos,ss) + for start2 in start2_dict: + end2_dict=list(nocdt[reference][comparison][start1][end1][start2].keys()) + end2_dict.sort() + for end2 in end2_dict: + current_mismatch=float(nocdt[reference][comparison][start1][end1][start2][end2]) + if pos not in freq: + freq[pos]={} + if current_mismatch not in freq[pos]: + freq[pos][current_mismatch]=int(0) + freq[pos][current_mismatch]=freq[pos][current_mismatch]+1 + return freq + +#--------------------------------------------- +def findOccurences(s, ch): + return [i for i, letter in enumerate(s) if letter == ch] + +#--------------------------------------------- +def readFasta(file, scale): + + (head_match, previous_contig,seq_length) = (None,None,0) + L1={} + order=[]#RLW + npos={} #RLW + tot_length = 0 #RLW + + file_obj = open(file, 'r') + + for line in file_obj: + head_match_regex = re.compile('>(\S+)') + head_match = head_match_regex.match(line) + if head_match != None: + if (head_match != previous_contig and previous_contig != None): + (seq_length, scale)=(int(seq_length), int(scale)) + if previous_contig not in L1:#RLW + L1[previous_contig]={} #RLW + L1[previous_contig]['scaled_len'] = float(seq_length/scale) #RLW + L1[previous_contig]['offset_len'] = float(tot_length/scale) #RLW first ID will be offset at 0 as it should + L1[previous_contig]['npos'] = npos + print("NPOS: (Ns only tracked on 1-line sequences)") + print(npos) + + order.append(previous_contig) #RLW + print("%s length = %i bp, scaled to %.0f pixels" % (previous_contig,seq_length,L1[previous_contig]['scaled_len'])) #RLW + tot_length += seq_length #RLW + seq_length = 0 #resets the sequence length + previous_contig = head_match.group(1) + else: + seq_subset_regex = re.compile('(.*)', re.I) + seq_subset = seq_subset_regex.match(line) + if seq_subset != None: + seq_length += len(seq_subset.group(1)) + npos=findOccurences(seq_subset.group(1).upper(), "N") + + (seq_length, scale)=(int(seq_length), int(scale)) + if previous_contig not in L1: #RLW + L1[previous_contig]={} #RLW + L1[previous_contig]['scaled_len'] = float(seq_length/scale) #RLW + L1[previous_contig]['offset_len'] = float(tot_length/scale) #RLW + L1[previous_contig]['npos'] = npos + print("NPOS: (Ns only tracked on 1-line sequences)") + print(npos) + + order.append(previous_contig) #RLW + print("%s length = %i bp, scaled to %.0f pixels" % (previous_contig,seq_length,L1[previous_contig]['scaled_len'])) #RLW + tot_length += seq_length #RLW + + file_obj.close() + + scaled_tot_len = float(tot_length/scale) #RLW + print("Total length = %i bp, scaled to : %.0f pixels " % (tot_length,scaled_tot_len)) #RLW + + return (order, L1, tot_length) + + +#--------------------------------------------- +def initColor(alpha): + color={} + + #allocate colors + color["white"] = (255,255,255,255) + #color["black"] = (255,255,255,255)#(0,0,0,255) + color["black"] = (0,0,0,255) + color["swamp"] = (150,150,30,255) + color["blue"] = (0,102,204,255) + color["yellow"] = (255,255,0,255) + color["cyan"] = (0,255,255,255) + color["purple"] = (255,0,255,255) + color["lime"] = (57,255,20,255) ### XXX + color["green"] = (100,250,25,255) + color["red"] = (250,25,75,255) + color["forest"] = (0,100,0,255) + color["dirtyred"] = (200,0,120,255) + color["navy"] = (0,0,150,255) + color["dirtyyellow"] = (200,200,75,255) + color["grey"] = (153,153,153,255) + color["lightgrey"] = (220,220,220,255) + color["salmon"] = (255,153,153,255) + color["lightblue"] = (153,204,255,255) + color["orange"] = (255,153,51,255) + color["forestt"] = (0,100,0,alpha) + + color["green1t"] = (223,238,218,alpha) + color["green2t"] = (208,221,203,alpha) + color["green3t"] = (184,212,178,alpha) + color["green4t"] = (162,203,155,alpha) + color["green5t"] = (141,186,127,alpha) + color["green6t"] = (119,176,108,alpha) + color["green7t"] = (98,166,92,alpha) + color["green8t"] = (72,146,73,alpha) + color["green9t"] = (21,119,40,alpha) + color["green10t"] = (0,82,33,alpha) + + color["red1t"] = (252,227,229,alpha) + color["red2t"] = (249,214,215,alpha) + color["red3t"] = (244,187,188,alpha) + color["red4t"] = (240,161,161,alpha) + color["red5t"] = (235,134,134,alpha) + color["red6t"] = (231,107,108,alpha) + color["red7t"] = (226,81,81,alpha) + color["red8t"] = (222,54,54,alpha) + color["red9t"] = (217,27,27,alpha) + color["red10t"] = (213,1,1,alpha) + + color["green1"] = (247,252,245,255) + color["green2"] = (229,245,224,255) + color["green3"] = (199,233,192,255) + color["green4"] = (161,217,155,255) + color["green5"] = (116,196,118,255) + color["green6"] = (65,171,93,255) + color["green7"] = (35,139,69,255) + color["green8"] = (0,109,44,255) + color["green9"] = (0,68,27,255) + color["brown"] = (83,49,24,255) + color["brownt"] = (83,49,24,alpha) + color["beige"] = (210,180,140,255) + color["beiget"] = (210,180,140,alpha) + return color + +#--------------------------------------------- +def initGraph(): + data={} + + #default data points + data['width']=2000 #5000 + data['height']=2000 #5000 + data['ref_y'] = (data['height'] / 1.5) + data['skew']=300 #400 ### CHANGE THIS FOR THE PITCH OF THE TREE + data['decay']=120 #200### THIS IS THE SPACE BETWEEN BOTH SIDES, TOP OF TREE + data['ref_y_skew']=data['ref_y']-data['skew'] ###DON'T CHANGE THIS + data['mis_bar']=50 + data['query_y']=70 + data['x']=100 + data['xlabel']=110 + data['bar_thick']=20 + data['query_thick']=15 + data['reference_thick']=15 + data['x_legend'] = data['width'] - 600 + data['y_legend'] = data['height'] - 100 ###WAS 1500XXX + data['x_legend_picto'] = data['width'] / 1.5 + data['tick_up']=data['ref_y_skew'] - 120 + data['tick_down']=data['tick_up'] + 20 + + return data + +#--------------------------------------------- +def drawRectangle(draw,start,end,y,thickness,bar_color,text,font,text_color): + + draw.rectangle((start,y,end,y+thickness), bar_color) + draw.text((start-80, y), text, font=font, fill=text_color) + +#--------------------------------------------- +def plotFrequency(freq,size,scale,draw,color,data,leap): + + pos_range=list(range(0,size,leap)) + + for pos in pos_range: + if pos in freq: + freq_list=freq[pos] + previous=data['mis_bar'] + identity_range=list(range(9,-1,-1)) + for id in identity_range: + cumul=int(0) + for freq_keys in freq_list: + if id >= freq_keys: + cumul += freq_list[freq_keys] + + if cumul<1: + color_now="white" + elif cumul==1: + color_now="blue" + elif cumul==2: + color_now="cyan" + elif cumul==3: + color_now="green" + elif cumul==4: + color_now="dirtyred" + elif cumul==5: + color_now="purple" + elif cumul==6: + color_now="salmon" + elif cumul==7: + color_now="orange" + elif cumul>=8: + color_now="yellow" + + extension=((200-(20*id))+data['mis_bar']) #y + compressed=(pos/scale)+data['x'] #x + + if color_now != "white": + #print "%i, %i, %i, %i %s" % (compressed,previous,compressed,extension,color_now) + draw.line((compressed,previous,compressed,extension),color[color_now]) + + previous = extension + + +#--------------------------------------------- +def drawRelationship(reference_list, order_ref, query_list, order_qry, match_list, scale, mismatch, block_length, alignment_file, reflength, format, formatdict, protein, label, alpha, refgff, qrygff, qrylength, fontpath): + + scaled_reflength=float(reflength/scale) + scaled_qrylength=float(qrylength/scale) + + ###Capture last coordinates of relationships + (u2max,v2max,x2max,y2max)=(0,0,0,0) + + ###Initialize new graph + data=initGraph() + + ###Get colors + color=initColor(alpha) + + ###Set Font + arialfont = fontpath + "/arial.ttf" + pilfont = fontpath + "/helvR14.pil" + + #default all font sizes to default (it is quite small, you must provide a valid path for best results) + font_18=ImageFont.load_default() + font_20=ImageFont.load_default() + fontb_20=ImageFont.load_default() + fontbi_20=ImageFont.load_default() + fontb_22=ImageFont.load_default() + font_24=ImageFont.load_default() + fontb_24=ImageFont.load_default() + font_28=ImageFont.load_default() + fontb_28=ImageFont.load_default() + fontbi_28=ImageFont.load_default() + fontb_92=ImageFont.load_default() + fontb_80=ImageFont.load_default() + + if os.path.exists(arialfont): ### Will check for truetype first, they look better + ###Set Font (truetype) + font_18=ImageFont.truetype(fontpath + "/arial.ttf",18) + font_20=ImageFont.truetype(fontpath + "/arial.ttf",20) + fontb_20=ImageFont.truetype(fontpath + "/arialbd.ttf",20) + fontbi_20=ImageFont.truetype(fontpath + "/arialbi.ttf",20) + fontb_22=ImageFont.truetype(fontpath + "/arialbd.ttf",22) + + font_24=ImageFont.truetype(fontpath + "/arial.ttf",24) + fontb_24=ImageFont.truetype(fontpath + "/arialbd.ttf",24) + + ### XXX change + font_24=ImageFont.truetype(fontpath + "/arialbd.ttf",30) + fontb_24=ImageFont.truetype(fontpath + "/arialbd.ttf",32) + + + font_28=ImageFont.truetype(fontpath + "/arial.ttf",28) + fontb_28=ImageFont.truetype(fontpath + "/arialbd.ttf",28) + fontbi_28=ImageFont.truetype(fontpath + "/arialbi.ttf",28) + + ### XXX change + font_28=ImageFont.truetype(fontpath + "/arialbd.ttf",40) + fontb_28=ImageFont.truetype(fontpath + "/arialbd.ttf",40) + fontbi_28=ImageFont.truetype(fontpath + "/arialbi.ttf",34) + + fontb_92=ImageFont.truetype(fontpath + "/arialbd.ttf",92) + fontb_80=ImageFont.truetype(fontpath + "/arialbd.ttf",78) + elif os.path.exists(pilfont): ### Will settle for PIL font, if ttf do not exist. Otherwise, sticking with default. + ###Set font (pil) (sizes are limited, made to be compatible with TT fonts) + font_18=ImageFont.load_path(fontpath + "/helvR14.pil") + font_20=ImageFont.load_path(fontpath + "/helvR18.pil") + fontb_20=ImageFont.load_path(fontpath + "/helvB18.pil") + fontbi_20=ImageFont.load_path(fontpath + "/helvBO18.pil") + fontb_22=ImageFont.load_path(fontpath + "/helvB24.pil") + font_24=ImageFont.load_path(fontpath + "/helvR24.pil") + fontb_24=ImageFont.load_path(fontpath + "/helvB24.pil") + font_28=ImageFont.load_path(fontpath + "/helvR24.pil") + fontb_28=ImageFont.load_path(fontpath + "/helvB24.pil") + fontbi_28=ImageFont.load_path(fontpath + "/helvBO24.pil") + fontb_92=ImageFont.load_path(fontpath + "/helvR24.pil") + fontb_80=ImageFont.load_path(fontpath + "/helvR24.pil") + + ###Define Image + back = Image.new("RGBA", (data['width'],data['height']),(0,0,0,0)) + bdraw = ImageDraw.Draw(back) + + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + + ticklabel = Image.new("RGBA", (data['width'],data['height'])) + + back = back.rotate(90) + + decay = data['decay'] + + ### REFERENCE slope calculations -- IMPORTANT, USED THROUGHOUT + x1ref = data['x'] + x2ref = data['x'] + scaled_reflength + y1ref = data['ref_y'] + y2ref = data['ref_y']-data['skew'] + + mrref = (y2ref - y1ref ) / (x2ref - x1ref) + brref = y2ref - (mrref * x2ref) + + ###QRY slope calculations -- IMPORTANT, USED THROUGHOUT + x1qry = data['x'] + x2qry = data['x'] + scaled_qrylength + y1qry = data['ref_y']+decay + y2qry = data['ref_y']+decay+data['skew'] + + mqqry = (y2qry - y1qry ) / (x2qry - x1qry) + bqqry = y2qry - (mqqry * x2qry) + + ####REFERENCE + for ref in order_ref: #RLW + scaled_offset_len = 0 #RLW + if ref in reference_list: #RLW + scaled_offset_len = reference_list[ref]['offset_len'] #RLW + + init_coord=int(data['x']+scaled_offset_len) #RLW + last_coord=int(data['x']+scaled_offset_len+reference_list[ref]['scaled_len']) #RLW + + a1 = data['x'] + scaled_offset_len + a2 = data['x'] + scaled_offset_len+reference_list[ref]['scaled_len'] + b1 = (mrref * a1 ) + brref + b2 = (mrref * a2 ) + brref + draw.polygon((a1,b1,a1,b1+data['reference_thick'],a2,b2+data['reference_thick'],a2,b2),outline=color['brown'], fill=color['brown'])###references rect + draw.text((init_coord+5, data['ref_y_skew']-53), ref, font=fontb_24, fill=color['grey'])### XXX WAS fill=color['green9'])###label for ref XXXXTOCHANGE + ####draw features/exons on side of ref + ###REF gene model + if ref in refgff: + for exstart in refgff[ref]: + exend = refgff[ref][exstart]['end'] + l1 = data['x'] + exstart + scaled_offset_len + l2 = data['x'] + exend + scaled_offset_len + m1 = (mrref * l1 ) + brref + m2 = (mrref * l2 ) + brref + draw.polygon((l1,m1-11,l1,m1,l2,m2,l2,m2-11),outline=refgff[ref][exstart]['color'], fill=refgff[ref][exstart]['color'])###features/exons + + ###ticks on reference + draw.line((a1,b1-50,a1,b1),color['grey'],width=1) ### ZZZZ + draw.line((a2,b2-50,a2,b2),color['grey'],width=1) ### ZZZZ + + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + + last_ref_coord = last_coord + + (current_position, LCB, skew,stop)=(data['x'], 10, data['skew'],data['x']) + + ####Draw Query (only if not in reference list) + decay = data['decay'] + for qry in order_qry: #RLW + + scaled_offset_len = 0 #RLW + if qry in query_list: #RLW + scaled_offset_len = query_list[qry]['offset_len'] #RLW + + init_coord=int(data['x']+scaled_offset_len) #RLW + last_coord=int(data['x']+scaled_offset_len+query_list[qry]['scaled_len']) #RLW + stop = current_position + query_list[qry]['offset_len'] + query_list[qry]['scaled_len'] #RLW + + a1 = data['x'] + scaled_offset_len + a2 = data['x'] + scaled_offset_len+query_list[qry]['scaled_len'] + b1 = (mqqry * a1 ) + bqqry + b2 = (mqqry * a2 ) + bqqry + + ### draw one side of tree + draw.polygon((a1,b1,a1,b1+(data['query_thick']),a2,b2+(data['query_thick']),a2,b2),outline=color['brown'], fill=color['brown'])###queries rect + draw.text((init_coord+5, data['ref_y']+decay+skew+36), qry, font=fontb_24, fill=color['grey'])### XXX WASfill=color['green9'])### label for query + + ####draw features/exons on side of query + ###QRY gene model + if qry in qrygff: + for exstart in qrygff[qry]: + exend = qrygff[qry][exstart]['end'] + l1 = data['x'] + exstart + scaled_offset_len + l2 = data['x'] + exend + scaled_offset_len + m1 = (mqqry * l1 ) + bqqry + m2 = (mqqry * l2 ) + bqqry + draw.polygon((l1,m1+(data['query_thick'])+1,l1,m1+(data['query_thick'])+11,l2,m2+(data['query_thick'])+11,l2,m2+(data['query_thick'])+1),outline=qrygff[qry][exstart]['color'], fill=qrygff[qry][exstart]['color'])###features/exons + + ###ticks on query + draw.line((a1,b1+(data['query_thick']),a1,b1+(data['query_thick'])+50),color['grey'],width=1) ### ZZZZ + draw.line((a2,b2+(data['query_thick']),a2,b2+(data['query_thick'])+50),color['grey'],width=1) ### ZZZZ + + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + + last_qry_coord = last_coord + + + ###DRAW BLOCKS + plotflag = 0 + for qry in match_list: + allhit = match_list[qry] + for hit in allhit: + start1_list = allhit[hit] + stop = current_position + query_list[qry]['offset_len'] + query_list[qry]['scaled_len'] + + s1_list_sort=list(start1_list.keys()) + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=list(end1_list.keys()) + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=list(start2_list.keys()) + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=list(end2_list.keys()) + e2_list_sort.sort() + for end2 in e2_list_sort: + seqid = 100 - end2_list[end2] + print("si=%.2f mis=%.2f" % (seqid,end2_list[end2])) + + if start2 < end2: + if seqid >=99: + fill_color="green10t" + elif seqid >= 95: + fill_color="green9t" + elif seqid >= 90: + fill_color="green8t" + elif seqid >= 85: + fill_color="green7t" + elif seqid >= 80: + fill_color="green6t" + elif seqid >= 75: + fill_color="green5t" + elif seqid >= 70: + fill_color="green4t" + elif seqid >= 65: + fill_color="green3t" + elif seqid >= 60: + fill_color="green2t" + elif seqid >0: + fill_color="green1t" + outline_color = "green10t" + + else:#### inverted hits + if seqid >=99: + fill_color="red10t" + elif seqid >= 95: + fill_color="red9t" + elif seqid >= 90: + fill_color="red8t" + elif seqid >= 85: + fill_color="red7t" + elif seqid >= 80: + fill_color="red6t" + elif seqid >= 75: + fill_color="red5t" + elif seqid >= 70: + fill_color="red4t" + elif seqid >= 65: + fill_color="red3t" + elif seqid >= 60: + fill_color="red2t" + elif seqid >0: + fill_color="red1t" + outline_color = "red10t" + + ###draw ORF on upper + #draw.rectangle((data['x']+start1,data['ref_y']+1,data['x']+end1,data['ref_y']+data['reference_thick']-1), outline=color["lightgrey"], fill=color["lightgrey"]) + size_qry = end1 - start1 + size_ref = end2 - start2 + buf_ref = ((size_ref - (size_ref/3)) / 2) + buf_qry = ((size_qry - (size_qry/3)) / 2) + print("%s (%i-%i) hits %s :: mismatch %.2f target(%i) block %i target (%i) " % (qry,start1,end1,hit,end2_list[end2],mismatch,size_ref,block_length)) + + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + + x1 = data['x'] + start1 + x2 = data['x'] + end1 + y1 = (mqqry * x1 ) + bqqry + y2 = (mqqry * x2 ) + bqqry + print("x1=%i y1=%i x2=%i y2=%i M=%.2f B=%.2f " % (x1,y1,x2,y2,mqqry,bqqry)) + u1 = data['x'] + start2 + u2 = data['x'] + end2 + v1 = (mrref * u1 ) + brref + v2 = (mrref * u2 ) + brref + print("u1=%i v1=%i u2=%i v2=%i M=%.2f B=%.2f " % (u1,v1,u2,v2,mrref,brref)) + + if x2 > x2max: + u2max = u2 + v2max = v2 + x2max = x2 + y2max = y2 + + + ### LINES + draw.polygon((u2,v2+data['reference_thick'],x2,y2,x1,y1,u1,v1+data['reference_thick']), outline=color[outline_color], fill=color[fill_color]) + ### REPEAT FEATURE + draw.polygon((x1,y1,x1,y1+data['reference_thick'],x2,y2+data['reference_thick'],x2,y2),outline=color[outline_color], fill=color[fill_color])###colinear block on query + back.paste(poly, mask=poly) + draw.polygon((u1,v1,u2,v2,u2,v2+data['reference_thick'],u1,v1+data['reference_thick']),outline=color[outline_color], fill=color[fill_color])###colinear block on reference + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + plotflag = 1 + + if(plotflag == 0): + print("It looks like there is nothing to plot, try increasing -m 99 -- FATAL") + sys.exit(1) + + #enhancer = ImageEnhance.Sharpness(im) + #for i in range(8): + # factor = i / 4.0 + # enhancer.enhance(factor).show("Sharpness %f" % factor) + + + ### draw Ns on reference + for ref in order_ref: + + scaled_offset_len = 0 #RLW + if ref in reference_list: #RLW + scaled_offset_len = reference_list[ref]['offset_len'] #RLW + + if 'npos' in reference_list[ref]: + for nstart in reference_list[ref]['npos']: ### draw Ns + nstart = data['x'] + (nstart/scale) + scaled_offset_len + ny = (mrref * nstart) + brref + draw.line((nstart,ny,nstart,ny+data['reference_thick']-1),color['red'],width=2) + + ### draw Ns on query + for qry in order_qry: + + scaled_offset_len = 0 #RLW + if qry in query_list: #RLW + scaled_offset_len = query_list[qry]['offset_len'] #RLW + + if 'npos' in query_list[qry]: ### draw Ns + for nstart in query_list[qry]['npos']: + nstart = data['x'] + (nstart/scale) + scaled_offset_len + ny = (mqqry * nstart) + bqqry + draw.line((nstart,ny+2,nstart,ny+data['reference_thick']+1),color['red'],width=2) + + + ### calculate placement of tree trunk/label + if label != "": + charwidth = 55 ### approximate character width in pixels + labellength = len(label) + totaltrunklength = (labellength + 1) * charwidth ### the 1 is for a one-character buffer before/after + mtrunk = (y2max - v2max ) / (x2max - u2max + 1) + btrunk = y2max - (mtrunk * x2max) + + #ytrunk = y1ref + #if mtrunk > 0: ###positive slope#u2max > x2max: + # ytrunk = y1ref + decay # + #else: + # ytrunk = y1ref + #xtrunk = (ytrunk - btrunk) / mtrunk + #draw.rectangle((xtrunk+5+addbuffer,y1ref+data['reference_thick']+4,xtrunk+totaltrunklength+addbuffer,y1ref+decay-5), outline=color['brown'], fill=color['brown'])###trunk + + addbuffer=10 #this is sometimes necessary when trunk overlaps with lines + + ytrunk1 = y1ref + ytrunk2 = y1ref + decay + xtrunk1 = (ytrunk1 - btrunk) / mtrunk + xtrunk2 = (ytrunk2 - btrunk) / mtrunk + draw.polygon((xtrunk1+addbuffer,ytrunk1+data['reference_thick'],xtrunk1+totaltrunklength+addbuffer,ytrunk1+data['reference_thick'],xtrunk1+totaltrunklength+addbuffer,y1ref+decay-5,xtrunk2+addbuffer,y1ref+decay-5), outline=color['brown'], fill=color['brown'])###trunk + + draw.text((xtrunk1+charwidth+addbuffer,data['ref_y']+(decay/4)), label, font=fontb_80, fill=color['beige'])###label ADJUST y+ YYY for position of label + back.paste(poly, mask=poly) + ### end trunk code + + ### FINAL IMAGE PROCESSING + ### rotate plot to be able to place scale + back = back.rotate(270) + del draw + drawtl = ImageDraw.Draw(ticklabel) + ###final tick labels + if last_ref_coord > last_qry_coord: + last_coord = last_ref_coord + else: + last_coord = last_qry_coord + + ###ticks + x_range=list(range(data['x'], int(last_coord), 100)) + + for position in x_range: + drawtl.rectangle((data['x_legend_picto'],position+5,data['x_legend_picto']+15,position+8),color['black']) + + if reflength >= 10000: + for position in x_range: + base_number=int(((position-data['x'])*scale)/1000) + drawtl.text((data['x_legend_picto']+25,position-18), "%i" % base_number, font=font_28, fill=color['black']) ### XXX + else: + for position in x_range: + base_number=(position-data['x']) * scale + base_number=float(base_number) + base_number=base_number/1000 + #print "%i %i %i >>> %.2f <<< %i,%i" % (data['x'],position,scale,base_number,data['x_legend_picto'],position) + drawtl.text((data['x_legend_picto']+25,position-15), "%.1f" % base_number, font=font_28, fill=color['black'])### was position-15 + + drawtl.text((data['x_legend_picto']+25,last_coord+25), "kbp", font=fontb_28, fill=color['black']) + + ###Draw Legend + date=subprocess.getstatusoutput("date") + + ###Picto Legend + y_legend = last_coord + 100 ### WAS 30 XXX + drawtl.text((data['x_legend_picto'],y_legend), "Sequence identity (%)", font=fontbi_28, fill=color['black']) + #### + + y_legend+=40 ### was 35 XXX + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green10t'], fill=color['green10t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red10t'], fill=color['red10t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "99-100", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green9t'], fill=color['green9t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red9t'], fill=color['red9t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "95-98", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green8t'], fill=color['green8t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red8t'], fill=color['red8t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "90-94", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green7t'], fill=color['green7t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red7t'], fill=color['red7t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "85-89", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green6t'], fill=color['green6t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red6t'], fill=color['red6t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "80-84", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green5t'], fill=color['green5t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red5t'], fill=color['red5t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "75-79", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green4t'], fill=color['green4t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red4t'], fill=color['red4t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "70-74", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green3t'], fill=color['green3t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red3t'], fill=color['red3t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "65-69", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green2t'], fill=color['green2t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red2t'], fill=color['red2t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "60-64", font=font_24, fill=color['black']) + y_legend+=30 + drawtl.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+30,y_legend+30), outline=color['green1t'], fill=color['green1t']) + drawtl.rectangle((data['x_legend_picto']+31,y_legend,data['x_legend_picto']+61,y_legend+30), outline=color['red1t'], fill=color['red1t']) + drawtl.text((data['x_legend_picto']+65,y_legend), "<60", font=font_24, fill=color['black']) + + + y_legend+=35 + identity_threshold = 100-mismatch + drawtl.text((data['x_legend_picto'],y_legend), "Minimum identity threshold : %i %%" % identity_threshold, font=font_24, fill=color['black']) + drawtl.text((data['x_legend_picto'],y_legend+30), "Minimum block length : %i bp" % block_length, font=font_24, fill=color['black']) + drawtl.text((data['x_legend_picto'],y_legend+60), "Transparency : %i" % alpha, font=font_24, fill=color['black']) + drawtl.text((data['x_legend_picto'],y_legend+90), "Scale (pixel:bp) 1:%i" % scale, font=font_24, fill=color['black']) + + back.paste(ticklabel, mask=ticklabel) + + del drawtl + file = "xmvconifer-" + alignment_file + "_m" + str(mismatch) + "_b" + str(block_length) + "_c" + str(scale) + "." + format + print("Saving %s..." % file) + back.save(open(file, 'wb'), formatdict[format]) + print("done.") + return file + +#--------------------------------------------- +def main(): + opts, args = getopt.getopt(sys.argv[1:], "x:s:q:m:r:c:l:f:p:a:b:e:y:") + + (ref_gff_file, qry_gff_file, alignment_file, reference_file, query_file, format, fontpath)=(None,None,None,None,None,"png","") + (mismatch, block_length, scale, protein, alpha, label)=(0,0,0,0,255,"") + (reference, reflength)=([],[]) + formatdict = {'png':'PNG','gif':'GIF','tiff':'TIFF','jpeg':'JPEG'} + + for o, v in opts: + if o == "-x": + alignment_file=str(v) + if o == "-s": + reference_file=str(v) + if o == "-q": + query_file=str(v) + if o == "-m": + mismatch=int(v) + if o == "-b": + block_length=int(v) + if o == "-c": + scale=int(v) + if o == "-l": + label=str(v) + if o == "-f": + format=str(v) + if o == "-e": + ref_gff_file=str(v) + if o == "-y": + qry_gff_file=str(v) + if o == "-a": + alpha = int(v) + if o == "-p": + fontpath=str(v) + + if (alignment_file == None or reference_file == None or query_file == None or mismatch == 0 or block_length == 0 or scale ==0): + print("Usage: %s v1.2.4" % (sys.argv[0:])) + print("-x alignment file (cross_match .rep or Pairwise mApping Format .paf) ") + print("-s reference genome fasta file") + print("-q query contig/genome fasta file") + print("-e reference features (eg. exons) coordinates GFF tsv file (start end) - optional") + print("-y query features (eg. exons) coordinates GFF tsv file (start end) - optional") + print("-m maximum mismatch threshold (e.g. -m 10 allows representation of repeats having up to 10% mismatch") + print("-b minimum length (bp) of similarity block to display") + print("-c scale (pixel to basepair scale, for displaying the image)") + print("-l label for the tree trunk (6 characters or less for best result)") + print("-a alpha value, from 0 (transparent) to 255 (solid, default)") + print("-f output image file format (png, tiff, jpeg, or gif) NOTE: png and tiff recommended.") + print("-p full path to the directory with fonts on your system (please refer to the documentation for fonts used)") + #print "-z transform bacterial ORF into protein (i.e. plot alignment between ORF products? 1/0) DEPRECATED\n"; + print("* Files for the -s and -q options must correspond to fasta files used to run cross_match") + sys.exit(1) + + #====Graph Format + if format not in formatdict: + print("Not a valid Graph Format. Please Select: png, tiff, jpeg, or gif. NOTE: png and tiff recommended.") + sys.exit(1) + + #====Mismatch checks + if (mismatch <0 or mismatch >99): + print("-m must be a valid number between 0-99") + sys.exit(1) + + #====Alpha checks + if (alpha<0 or alpha >255): + print("-a must be a valid number between 0-255") + sys.exit(1) + + #===Scale checks + if (scale<1): + print("Not a possible scale. Make sure you select a number >1.") + sys.exit(1) + + #====File checks + checkFile(alignment_file) + checkFile(reference_file) + checkFile(query_file) + + ###OPTIONAL, FOR FEATURES REPRESENTATION + (refgff,qrygff) = ({},{}) + if(ref_gff_file != None): + checkFile(ref_gff_file) + print("Reading reference feature file %s ..." % (ref_gff_file)) + refgff=readGFF(ref_gff_file,scale) + print("done.") + + if(qry_gff_file != None): + checkFile(qry_gff_file) + print("Reading query feature file %s ..." % (qry_gff_file)) + qrygff=readGFF(qry_gff_file,scale) + print("done.") + + #====Parse Fasta Files + (order_ref, reference, reflength)=readFasta(reference_file, scale) + (order_qry, query, qrylength)=readFasta(query_file, scale) + + #====Raise error if features out of bounds + data=initGraph() + if reflength / data['width'] > scale: + estscale = int(reflength / data['width']) + 1 + sys.exit("\n\n! The reference sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + if qrylength / data['width'] > scale: + estscale = int(qrylength / data['width']) + 1 + sys.exit("\n\n! The query sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + + print("Reading alignment file %s ..." % (alignment_file)) + match = {} + if alignment_file.endswith("rep"): + match = readCrossMatch(alignment_file, mismatch, block_length, reference, query, scale) + elif alignment_file.endswith("paf"): + match = readPAF(alignment_file, mismatch, block_length, reference, query, scale) + else: + print("The alignment file provided (-x %s) does not end in .rep (cross_match) or .paf (PAF) -- fatal" % alignment_file) + sys.exit(1) + print("done.") + print("done.") + print("Drawing ...") + drawRelationship(reference, order_ref, query, order_qry, match, scale, mismatch, block_length, alignment_file, reflength, format, formatdict, protein, label, alpha, refgff, qrygff, qrylength, fontpath) + +#--------------------------------------------- +#Main Call + +main() +sys.exit(1) + + diff --git a/v1.2.4/xmatchview-hive.py b/v1.2.4/xmatchview-hive.py new file mode 100755 index 0000000..a85daab --- /dev/null +++ b/v1.2.4/xmatchview-hive.py @@ -0,0 +1,1052 @@ +#!/usr/bin/env python3 +# xmatchview-hive.py +# Visualizing 3-way genome synteny with a hive plot representation +# Rene L Warren 2005-2020 + +import sys +import os +import getopt +import re +import csv +import subprocess +import math + +#--------------------------------------------- +def checkFile(file): + + print("Checking input %s" % file) + if not os.path.exists(file): + print("File %s" % file + " is not valid") + sys.exit(1) + else: + print("exists.") + +#--------------------------------------------- +def readGFF(file,scale): + + feature = {} + (start,end) = (0,0) + + with open(file) as fd: + rd = csv.reader(fd, delimiter="\t", quotechar='"') + for row in rd: + id = row[0] + start = float(int(row[3])/scale) + end = float(int(row[4])/scale) + #print "__%s__ - __%s__ <<<<<" % (start,end) + + color = row[9] + if color == None: + color = "black" + + if id not in feature: + feature[id] = {} + if start not in feature[id]: + feature[id][start] = {} + if 'end' not in feature[id][start]: + feature[id][start]['end'] = "" + if 'color' not in feature[id][start]: + feature[id][start]['color'] = "" + + feature[id][start]['end'] = end + feature[id][start]['color'] = color + #print "INITIALIZED %s : %i,%i with %s" %(row[0],start,end,color) + + return feature + +#--------------------------------------------- +def readPAF(paf_file,mismatch,block_length,query,reference,scale): + + match = {} + ctline = 0 + + xmatch_obj=open(paf_file, 'r') + + for line in xmatch_obj: + ### reverse matches qryname qrystart qryend orient hitname hitstart hitend match block + ### 1 2 3 4 5 6 7 8 + rev_regex = re.compile("(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\-\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + + fwd_regex = re.compile("(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\+\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)") + fm = fwd_regex.match(line) + + if rm != None: + ctline = 1 + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s %s" % (rm.group(1), rm.group(2), rm.group(3), rm.group(4), rm.group(5), rm.group(6), rm.group(7), rm.group(8)) + + alignLen = float(rm.group(6)) - float(rm.group(5)) + 1 + percentMis = 100 * float(( alignLen - float(rm.group(7))) / alignLen ) + #print "=== %.2f === %.2f ===" % (alignLen,percentMis) + #sys.exit(1) + (primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(str(rm.group(1)), float(rm.group(2)), float(rm.group(3)), str(rm.group(4)), float(rm.group(6)), float(rm.group(5))) ### needs 5/6 reversed to plot reverse align + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + #print "%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + ###forward matches + elif fm != None: + ctline = 1 + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7), fm.group(8)) + + alignLen = float(fm.group(6)) - float(fm.group(5)) + 1 + percentMis = 100 * float(( alignLen - float(fm.group(7))) / alignLen ) + #percentMis2 = 100 * float((float(fm.group(8)) - float(fm.group(7))) / float(fm.group(8))) + #print "=== %.2f === %.2f === %.2f" % (alignLen,percentMis,percentMis2) + #sys.exit(1) + (primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(str(fm.group(1)), float(fm.group(2)), float(fm.group(3)), str(fm.group(4)), float(fm.group(5)), float(fm.group(6))) + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + #print "%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + if ctline == 0 : + print("There are no alignments to plot. Make sure your file %s is reporting alignments -- fatal." % paf_file) + sys.exit(1) + + return match + +#--------------------------------------------- +def readCrossMatch(crossmatch_file,mismatch,block_length,query,reference,scale): + + match = {} + ctline = 0 + + xmatch_obj=open(crossmatch_file, 'r') + + for line in xmatch_obj: + + # Query start end R Ref end start + # 10 8.70 0.00 0.00 JN039333.1_Picea_abies 440 462 (2149) C KT263970.1_Picea_sitchensis (1851) 524 502 + # start end + # 16 4.55 0.00 0.00 JN039333.1_Picea_abies 484 505 (2106) KT263970.1_Picea_sitchensis 341 362 (2013) + + ###reverse matches s.i. qry + rev_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+C\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + fwd_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+") + fm = fwd_regex.match(line) + + if rm != None and rm.group(3) != "0" and rm.group(6) != "0": + ctline = 1 + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s" % (rm.group(1), rm.group(2), rm.group(3), rm.group(4), rm.group(5), rm.group(6), rm.group(7)) + + #(percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(1)), rm.group(2), float(rm.group(3)), float(rm.group(4)), rm.group(5), float(rm.group(6)), float(rm.group(7))) + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(2)), rm.group(3), float(rm.group(4)), float(rm.group(5)), rm.group(6), float(rm.group(7)), float(rm.group(8)))### has to be in this order to plot reverse align in diff color + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + #print "REVERSE %i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + ###forward matches + elif fm != None and fm.group(3) != "0" and fm.group(6) != "0": + ctline = 1 + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7)) +# (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(1)), fm.group(2), float(fm.group(3)), float(fm.group(4)), fm.group(5), float(fm.group(6)), float(fm.group(7))) + + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(2)), fm.group(3), float(fm.group(4)), float(fm.group(5)), fm.group(6), float(fm.group(7)), float(fm.group(8))) + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + #print "FORWARD %i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + if ctline == 0 : + print("There are no alignments to plot. Make sure your file %s is reporting alignments -- fatal." % crossmatch_file) + sys.exit(1) + + return match + + +#--------------------------------------------- +def findOccurences(s, ch): + return [i for i, letter in enumerate(s) if letter == ch] + + +#--------------------------------------------- +def readText(file, scale): + + L1={} + order=[]#RLW + npos={} #RLW + tot_length = 0 #RLW + buffer = scale * 3 + print("Reading %s..\n" % file) + with open(file) as fd: + rd = csv.reader(fd, delimiter=":", quotechar='"') + for row in rd: + if row[0] not in L1:#RLW + L1[row[0]]={} #RLW + L1[row[0]]['scaled_len'] = float(int(row[1])/scale) + L1[row[0]]['offset_len'] = float(tot_length/scale) #RLW first ID will be offset at 0 as it should + tot_length += (int(row[1]) + buffer) + order.append(row[0]) + + scaled_tot_len = float(tot_length/scale) #RLW + print("Total length = %i bp, scaled to : %.0f pixels " % (tot_length,scaled_tot_len)) #RLW + + return (order, L1, tot_length) + + +#--------------------------------------------- +def readFasta(file, scale): + + (head_match, previous_contig,seq_length) = (None,None,0) + L1={} + order=[]#RLW + npos={} #RLW + tot_length = 0 #RLW + buffer = scale * 3 + print("Reading %s..\n" % file) + + file_obj = open(file, 'r') + + for line in file_obj: + head_match_regex = re.compile('>(\S+)') + head_match = head_match_regex.match(line) + if head_match != None: + if (head_match != previous_contig and previous_contig != None): + (seq_length, scale)=(int(seq_length), int(scale)) + if previous_contig not in L1:#RLW + L1[previous_contig]={} #RLW + L1[previous_contig]['scaled_len'] = float(seq_length/scale) #RLW + L1[previous_contig]['offset_len'] = float(tot_length/scale) #RLW first ID will be offset at 0 as it should + L1[previous_contig]['npos'] = npos + #print "NPOS: (Ns only tracked on 1-line sequences)" + #print(npos) + + order.append(previous_contig) #RLW + #print "%s length = %i bp, scaled to %.0f pixels" % (previous_contig,seq_length,L1[previous_contig]['scaled_len']) #RLW + tot_length += (seq_length + buffer) + seq_length = 0 #resets the sequence length + previous_contig = head_match.group(1) + else: + seq_subset_regex = re.compile('(.*)', re.I) + seq_subset = seq_subset_regex.match(line) + if seq_subset != None: + seq_length += len(seq_subset.group(1)) + npos=findOccurences(seq_subset.group(1).upper(), "N") + + (seq_length, scale)=(int(seq_length), int(scale)) + if previous_contig not in L1: #RLW + L1[previous_contig]={} #RLW + L1[previous_contig]['scaled_len'] = float(seq_length/scale) #RLW + L1[previous_contig]['offset_len'] = float(tot_length/scale) #RLW + L1[previous_contig]['npos'] = npos + #print "NPOS: (Ns only tracked on 1-line sequences)" + #print(npos) + + order.append(previous_contig) #RLW + #print "%s length = %i bp, scaled to %.0f pixels" % (previous_contig,seq_length,L1[previous_contig]['scaled_len']) #RLW + tot_length += (seq_length + buffer) #RLW + + file_obj.close() + + scaled_tot_len = float(tot_length/scale) #RLW + print("Total length = %i bp, scaled to : %.0f pixels " % (tot_length,scaled_tot_len)) #RLW + + return (order, L1, tot_length) + + +#--------------------------------------------- +def initColor(alpha): + color={} + + #allocate colors + color["white"] = (255,255,255,255) + #color["black"] = (255,255,255,255)#(0,0,0,255) + color["black"] = (0,0,0,255) + color["swamp"] = (150,150,30,255) + color["blue"] = (0,102,204,255) + color["yellow"] = (255,255,0,255) + color["cyan"] = (0,255,255,255) + color["purple"] = (255,0,255,255) + color["lime"] = (57,255,20,255) ### XXX + color["green"] = (100,250,25,255) + color["red"] = (250,25,75,255) + color["forest"] = (0,100,0,255) + color["dirtyred"] = (200,0,120,255) + color["navy"] = (0,0,150,255) + color["dirtyyellow"] = (200,200,75,255) + color["grey"] = (153,153,153,255) + color["lightgrey"] = (220,220,220,255) + color["salmon"] = (255,153,153,255) + color["lightblue"] = (153,204,255,255) + color["orange"] = (255,153,51,255) + color["forestt"] = (0,100,0,alpha) + + color["green1t"] = (223,238,218,alpha) + color["green2t"] = (208,221,203,alpha) + color["green3t"] = (184,212,178,alpha) + color["green4t"] = (162,203,155,alpha) + color["green5t"] = (141,186,127,alpha) + color["green6t"] = (119,176,108,alpha) + color["green7t"] = (98,166,92,alpha) + color["green8t"] = (72,146,73,alpha) + color["green9t"] = (21,119,40,alpha) + color["green10t"] = (0,82,33,alpha) + + color["red1t"] = (252,227,229,alpha) + color["red2t"] = (249,214,215,alpha) + color["red3t"] = (244,187,188,alpha) + color["red4t"] = (240,161,161,alpha) + color["red5t"] = (235,134,134,alpha) + color["red6t"] = (231,107,108,alpha) + color["red7t"] = (226,81,81,alpha) + color["red8t"] = (222,54,54,alpha) + color["red9t"] = (217,27,27,alpha) + color["red10t"] = (213,1,1,alpha) + + color["green1"] = (247,252,245,255) + color["green2"] = (229,245,224,255) + color["green3"] = (199,233,192,255) + color["green4"] = (161,217,155,255) + color["green5"] = (116,196,118,255) + color["green6"] = (65,171,93,255) + color["green7"] = (35,139,69,255) + color["green8"] = (0,109,44,255) + color["green9"] = (0,68,27,255) + color["brown"] = (83,49,24,255) + color["brownt"] = (83,49,24,alpha) + color["beige"] = (210,180,140,255) + color["beiget"] = (210,180,140,alpha) + return color + +#--------------------------------------------- +def initGraph(): + data={} + + #default data points + data['width']=4000 #5000 + data['height']=4000 #5000 + data['mid']=data['height']/2 + data['midwidth']=data['width']/2 + + data['ref_y'] = (data['height'] / 1.5) + data['skew']=300 #400 ### CHANGE THIS FOR THE PITCH OF THE TREE + data['decay']=120 #200### THIS IS THE SPACE BETWEEN BOTH SIDES, TOP OF TREE + data['ref_y_skew']=data['ref_y']-data['skew'] ###DON'T CHANGE THIS + data['mis_bar']=50 + data['query_y']=70 + data['x']=100 + data['xlabel']=110 + data['bar_thick']=20 + data['query_thick']=15 + data['reference_thick']=15 + data['x_legend'] = data['width'] - 600 + data['y_legend'] = data['height'] - 100 ###WAS 1500XXX + data['x_legend_picto'] = data['width'] / 1.5 + data['tick_up']=data['ref_y_skew'] - 120 + data['tick_down']=data['tick_up'] + 20 + + return data + + +#--------------------------------------------- +def drawRelationship(info,gff1,gff2,gff3,order1,order2,order3,ref1,ref2,ref3,length1,length2,length3,match1,match2,match3,scale,seqidentity,mismatch,block_length,alpha): + + ###Initialize new graph + data=initGraph() + + ###var + xlegend = 0 + ylegend = 0 + output = "xmv-hive_i" + str(seqidentity) + "_b" + str(block_length) + "_c" + str(scale) + "_a" + str(alpha) + ".svg" + + ### write to svg + # xml / SVG + xml = open(output,"w+") + xml.write("\n") + xml.write("\n") + xml.write("\n" % (data['width'], data['height'])) + + ### draw axes + # axis 1 + y1 = data['mid'] - 50 + x1 = data['midwidth'] + xt = 0 + yt = 0 + + for ref in order1: #RLW + scaled_offset_len = 0 #RLW + if ref in ref1: #RLW + scaled_offset_len = ref1[ref]['offset_len'] #RLW + y1 = data['mid'] - 50 - scaled_offset_len + y2 = y1 - ref1[ref]['scaled_len'] + x2 = x1 + xt = x2 + 5 + yt = y2 - 5 + + xml.write("\n" % (x1,y1,x2,y2)) + + ####draw features/exons on side of ref + ###REF gene model + #print " 1 %s %i %i" % (ref,ref1[ref]['scaled_len'],ref1[ref]['offset_len']) + if ref in gff1: + print(" 1 %s" % ref) + for exstart in gff1[ref]: + exend = gff1[ref][exstart]['end'] + gy1 = y1 - (exstart) + gy2 = y1 - (exend) + xml.write("\n" % (x1,gy1,x2,gy2,gff1[ref][exstart]['color'])) + ylegend = yt + x1 = x2 + xml.write("%s\n" % (xt,yt,info['1'])) + ########################################### + # axis 2 + x1 = data['midwidth'] + 50 + y1 = data['mid'] + 50 + xtick1 = x1 - 6 + xtick2 = x1 + 6 + ytick1 = y1 + 10 + ytick2 = y1 - 10 + xt = 0 + yt = 0 + + for ref in order2: #RLW + scaled_offset_len = 0 #RLW + if ref in ref2: #RLW + scaled_offset_len = ref2[ref]['offset_len'] #RLW + #print "%s %f %f<<<\n" % (ref,ref2[ref]['scaled_len'],scaled_offset_len) + x1 = data['midwidth'] + 50 + (math.sin(45) * scaled_offset_len) + y1 = data['mid'] + 50 + (math.cos(45) * scaled_offset_len) + x2 = (math.sin(45) * ref2[ref]['scaled_len']) + x1 + y2 = (math.cos(45) * ref2[ref]['scaled_len']) + y1 + xt = x2 + 5 + yt = y2 + 5 + + xml.write("\n" % (x1,y1,x2,y2)) + + ####draw features/exons on side of ref + ###REF gene model + #print "%s" % ref + if ref in gff2: + #print "%s" % ref + for exstart in gff2[ref]: + exend = gff2[ref][exstart]['end'] + gx1 = (math.sin(45) * (exstart)) + x1 + gy1 = (math.cos(45) * (exstart)) + y1 + gx2 = (math.sin(45) * (exend)) + x1 + gy2 = (math.cos(45) * (exend)) + y1 + l2 = y1 - (exend + scaled_offset_len) + xml.write("\n" % (gx1,gy1,gx2,gy2,gff2[ref][exstart]['color'])) + xlegend = xt + xml.write("%s\n" % (xt,yt,info['2'])) + ########################################### + # axis 3 + x1 = data['midwidth'] - 50 + y1 = data['mid'] + 50 + xt = 0 + yt = 0 + + for ref in order3: #RLW + scaled_offset_len = 0 #RLW + if ref in ref3: #RLW + scaled_offset_len = ref3[ref]['offset_len'] #RLW + x1 = data['midwidth'] - 50 + (math.sin(-45) * scaled_offset_len) + y1 = data['mid'] + 50 + (math.cos(-45) * scaled_offset_len) + x2 = (math.sin(-45) * ref3[ref]['scaled_len']) + x1 + y2 = (math.cos(-45) * ref3[ref]['scaled_len']) + y1 + + ###axis 3 label coordinate + xt = x2 - 19 * len(info['3']) + yt = y2 + 5 + + ###axis + xml.write("\n" % (x1,y1,x2,y2)) + + ####draw features/exons on side of ref + ###REF gene model + #print "%s" % ref + if ref in gff3: + #print "%s" % ref + for exstart in gff3[ref]: + exend = gff3[ref][exstart]['end'] + gx1 = (math.sin(-45) * (exstart)) + x1 + gy1 = (math.cos(-45) * (exstart)) + y1 + gx2 = (math.sin(-45) * (exend)) + x1 + gy2 = (math.cos(-45) * (exend)) + y1 + l2 = y1 - (exend + scaled_offset_len) + xml.write("\n" % (gx1,gy1,gx2,gy2,gff3[ref][exstart]['color'])) + ### axis 3 label + xml.write("%s\n" % (xt,yt,info['3'])) + ####### END AXES + # Begin ALIGNMENTS + + xq1 = data['midwidth'] + 5 + xq2 = xq1 + + ###DRAW ALIGN 1vs2 + for qry in match1: + allhit = match1[qry] + for hit in allhit: + start1_list = allhit[hit] + s1_list_sort=list(start1_list.keys()) + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=list(end1_list.keys()) + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=list(start2_list.keys()) + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=list(end2_list.keys()) + e2_list_sort.sort() + for end2 in e2_list_sort: + seqid = 100 - end2_list[end2] + + if start2 < end2: + if seqid == 100: + fill_color="#005824" + elif seqid >= 90: + fill_color="#238b45" + elif seqid >= 80: + fill_color="#41ae76" + elif seqid >= 70: + fill_color="#66c2a4" + elif seqid >= 60: + fill_color="#99d8c9" + elif seqid >= 50: + fill_color="#ccece6" + elif seqid < 50: + fill_color="#edf8fb" + + else:#### inverted hits + if seqid == 100: + fill_color="#99000d" + elif seqid >= 90: + fill_color="#cb181d" + elif seqid >= 80: + fill_color="#ef3b2c" + elif seqid >= 70: + fill_color="#fb6a4a" + elif seqid >= 60: + fill_color="#fc9272" + elif seqid >= 50: + fill_color="#fcbba1" + elif seqid < 50: + fill_color="#fee5d9" + + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + + ### LINES + yq1 = data['mid'] - 50 - start1 + yq2 = data['mid'] - 50 - end1 + + ### axis2 + xstart_axis2 = data['midwidth'] + 50 + ystart_axis2 = data['mid'] + 45 + xt1 = (math.sin(45)*start2) + xstart_axis2 + yt1 = (math.cos(45)*start2) + ystart_axis2 + xt2 = (math.sin(45)*end2) + xstart_axis2 + yt2 = (math.cos(45)*end2) + ystart_axis2 + ### for the bezier point + xt1e = xt1 + xt2e = xt2 + yq1e = ((yt1 - yq1)/3) + yq1 + yq2e = ((yt2 - yq2)/3) + yq2 + + xml.write("\n" % (xq2,yq2,xq1,yq1,xt1e,yq1e,xt1,yt1,xt2,yt2,xt2e,yq2e,xq2,yq2,fill_color,fill_color,alpha)) + + ###DRAW ALIGN 1vs3 + ### axis 1 + xq1 = data['midwidth'] - 5 + xq2 = xq1 + + for qry in match2: + allhit = match2[qry] + for hit in allhit: + start1_list = allhit[hit] + s1_list_sort=list(start1_list.keys()) + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=list(end1_list.keys()) + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=list(start2_list.keys()) + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=list(end2_list.keys()) + e2_list_sort.sort() + for end2 in e2_list_sort: + seqid = 100 - end2_list[end2] + + if start2 < end2: + if seqid == 100: + fill_color="#005824" + elif seqid >= 90: + fill_color="#238b45" + elif seqid >= 80: + fill_color="#41ae76" + elif seqid >= 70: + fill_color="#66c2a4" + elif seqid >= 60: + fill_color="#99d8c9" + elif seqid >= 50: + fill_color="#ccece6" + elif seqid < 50: + fill_color="#edf8fb" + + else:#### inverted hits + if seqid == 100: + fill_color="#99000d" + elif seqid >= 90: + fill_color="#cb181d" + elif seqid >= 80: + fill_color="#ef3b2c" + elif seqid >= 70: + fill_color="#fb6a4a" + elif seqid >= 60: + fill_color="#fc9272" + elif seqid >= 50: + fill_color="#fcbba1" + elif seqid < 50: + fill_color="#fee5d9" + + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + + ### LINES + yq1 = data['mid'] - 50 - start1 + yq2 = data['mid'] - 50 - end1 + + ### axis3 + xstart_axis3 = data['midwidth'] - 50 + ystart_axis3 = data['mid'] + 45 + xt1 = (math.sin(-45)*start2) + xstart_axis3 + yt1 = (math.cos(-45)*start2) + ystart_axis3 + xt2 = (math.sin(-45)*end2) + xstart_axis3 + yt2 = (math.cos(-45)*end2) + ystart_axis3 + ### for the bezier point + xt1e = xt1 + xt2e = xt2 + yq1e = ((yt1 - yq1)/3) + yq1 + yq2e = ((yt2 - yq2)/3) + yq2 + + xml.write("\n" % (xq2,yq2,xq1,yq1,xt1e,yq1e,xt1,yt1,xt2,yt2,xt2e,yq2e,xq2,yq2,fill_color,fill_color,alpha)) + + ###DRAW ALIGN 3vs2 + for qry in match3: + allhit = match3[qry] + for hit in allhit: + start1_list = allhit[hit] + s1_list_sort=list(start1_list.keys()) + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=list(end1_list.keys()) + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=list(start2_list.keys()) + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=list(end2_list.keys()) + e2_list_sort.sort() + for end2 in e2_list_sort: + seqid = 100 - end2_list[end2] + + if start2 < end2: + if seqid == 100: + fill_color="#005824" + elif seqid >= 90: + fill_color="#238b45" + elif seqid >= 80: + fill_color="#41ae76" + elif seqid >= 70: + fill_color="#66c2a4" + elif seqid >= 60: + fill_color="#99d8c9" + elif seqid >= 50: + fill_color="#ccece6" + elif seqid < 50: + fill_color="#edf8fb" + + else:#### inverted hits + if seqid == 100: + fill_color="#99000d" + elif seqid >= 90: + fill_color="#cb181d" + elif seqid >= 80: + fill_color="#ef3b2c" + elif seqid >= 70: + fill_color="#fb6a4a" + elif seqid >= 60: + fill_color="#fc9272" + elif seqid >= 50: + fill_color="#fcbba1" + elif seqid < 50: + fill_color="#fee5d9" + + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + + ### axis2 + xstart_axis2 = data['midwidth'] + 50 + ystart_axis2 = data['mid'] + 55 + xq1 = (math.sin(45)*start2) + xstart_axis2 + yq1 = (math.cos(45)*start2) + ystart_axis2 + xq2 = (math.sin(45)*end2) + xstart_axis2 + yq2 = (math.cos(45)*end2) + ystart_axis2 + + ### axis3 + xstart_axis3 = data['midwidth'] - 50 + ystart_axis3 = data['mid'] + 55 + xt1 = (math.sin(-45)*start1) + xstart_axis3 + yt1 = (math.cos(-45)*start1) + ystart_axis3 + xt2 = (math.sin(-45)*end1) + xstart_axis3 + yt2 = (math.cos(-45)*end1) + ystart_axis3 + + #print "xq1 %f xt1 %f\n" % (xq1,xt1) + ### for the bezier point XXXX + #if (xq1 - data['midwidth']) > (data['midwidth'] - xt1): + # x1e = data['midwidth'] + (xq1-data['midwidth'])/30 + #else: + # x1e = data['midwidth'] - (data['midwidth'] - xt1)/30 + #x2e = x1e + x1e = xq1 - ((xq1 - xt1)/2) + x2e = x1e + if yq1 > yt1: + #y1e = yq1 + #y2e = yq2 + y1e = yq1 + (yq1 - ystart_axis3)*0.85 + (0.015 * yq1) + y2e = yq2 + (yq2 - ystart_axis3)*0.85 + (0.015 * yq2) + else: + #y1e = yt1 + #y2e = yt2 + y1e = yt1 + (yt1 - ystart_axis3)*0.85 + (0.015 * yt1) + y2e = yt2 + (yt2 - ystart_axis3)*0.85 + (0.015 * yt2) + + + xml.write("\n" % (xq2,yq2,xq1,yq1,x1e,y1e,xt1,yt1,xt2,yt2,x2e,y2e,xq2,yq2,fill_color,fill_color,alpha)) + + ######### END PLOT ALIGNMENTS + ### DRAW LEGEND + + fcol = ["#005824","#238b45","#41ae76","#66c2a4","#99d8c9","#ccece6","#edf8fb"] + rcol = ["#99000d","#cb181d","#ef3b2c","#fb6a4a","#fc9272","#fcbba1","#fee5d9"] + sic = ["100","90+","80+","70+","60+","50+","0-49"] + + xt = xlegend + yt = ylegend + + xtl = xt + 30 + xtl2 = xtl + 85 + xtl3 = xtl2 + 100 + xtl4 = xtl2 + 50 + yt2 = yt + 50 + + xml.write("Sequence identity (%%)\n" % (xt,yt)) + xml.write("Forward | Reverse\n" % (xtl,yt2)) + + el = 0 + for fc in fcol: + yt2 += 30 + + xml.write("\n"% (xtl2,yt2,fc,alpha)) + ytt = yt2 + 20 + xml.write("%s\n" % (xtl3,ytt,sic[el])) + xml.write("\n" % (xtl4,yt2,rcol[el],alpha)) + el+=1 + + #### DONE PLOTTING + ####### WRAP XML and CLOSE FILES + xml.write("") + print("xmatchview-hive svg output graph in %s\n" % (output)) + xml.close() + + + + +#--------------------------------------------- +def main(): + opts, args = getopt.getopt(sys.argv[1:], "x:y:z:q:r:s:d:e:f:i:b:c:a:") + + (gff_file1, gff_file2, gff_file3, align_file1, align_file2, align_file3, txt_file1, txt_file2, txt_file3)=(None,None,None,None,None,None,None,None,None) + (seqidentity, block_length, scale, protein, alpha)=(0,0,0,0,0.75) + + for o, v in opts: + if o == "-x": + align_file1=str(v) + if o == "-y": + align_file2=str(v) + if o == "-z": + align_file3=str(v) + if o == "-q": + txt_file1=str(v) + if o == "-r": + txt_file2=str(v) + if o == "-s": + txt_file3=str(v) + if o == "-i": + seqidentity=int(v) + if o == "-b": + block_length=int(v) + if o == "-c": + scale=int(v) + if o == "-d": + gff_file1=str(v) + if o == "-e": + gff_file2=str(v) + if o == "-f": + gff_file3=str(v) + if o == "-a": + alpha = float(v) + + if (txt_file1 == None or txt_file2 == None or txt_file3 == None or align_file1 == None or align_file2 == None or align_file3 == None or block_length == 0 or scale == 0): + print("Usage: %s v1.2.4" % (sys.argv[0:])) + print("-x alignment file [1 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)") + print("-y alignment file [1 vs. 3] (cross_match .rep or Pairwise mApping Format .paf)") + print("-z alignment file [3 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)") + print("-q genome text file 1 (format NAME:LENGTH)") + print("-r genome text file 2 (format NAME:LENGTH)") + print("-s genome text file 3 (format NAME:LENGTH)") + print("-d features (eg. exons) coordinates GFF tsv file 1 (start end) - optional") + print("-e features (eg. exons) coordinates GFF tsv file 2 (start end) - optional") + print("-f features (eg. exons) coordinates GFF tsv file 3 (start end) - optional") + print("-i sequence identity threshold (e.g. -i 90 will show colinear blocks >= 90% sequence identity)") + print("-b minimum length (bp) of similarity block to display") + print("-c scale (pixel to basepair scale, for displaying the image)") + #print "-l label for the tree trunk (6 characters or less for best result)" + print("-a alpha value, from 0 (transparent) to 1 (solid, default)") + #print "-f output image file format (png, tiff, jpeg, or gif) NOTE: png and tiff recommended." + #print "-p full path to the directory with fonts on your system (please refer to the documentation for fonts used)" + #print "-z transform bacterial ORF into protein (i.e. plot alignment between ORF products? 1/0) DEPRECATED\n" + print("* Files for the -q, -r and -s options must include header_names:base_length, with names that correspond to those in fasta files used to run cross_match or minimap2\n") + print("! Ensure the config.txt file exists in your run directory") + sys.exit(1) + + #====seqidentity checks + if (seqidentity <0 or seqidentity >100): + print("-i must be a valid number between 0-100") + sys.exit(1) + + #====Alpha checks + if (alpha<0 or alpha >1): + print("-a must be a valid number between 0-1") + sys.exit(1) + + #===Scale checks + if (scale<1): + print("Not a possible scale. Make sure you select a number >1.") + sys.exit(1) + + #====File checks + checkFile(txt_file1) + checkFile(txt_file2) + checkFile(txt_file3) + checkFile(align_file1) + checkFile(align_file2) + checkFile(align_file3) + checkFile("config.txt") + + #====Parse config.txt + info = {} + print("Reading configuration..\n") + with open("config.txt") as fd: + rd = csv.reader(fd, delimiter=":", quotechar='"') + for row in rd: + info[row[0]] = row[1] + print("axis %s = %s\n" %(row[0],row[1])) + + + ###OPTIONAL, FOR FEATURES REPRESENTATION + (gff1,gff2,gff3) = ({},{},{}) + + if(gff_file1 != None): + checkFile(gff_file1) + print("Reading reference feature file %s ..." % (gff_file1)) + gff1=readGFF(gff_file1,scale) + print("done.") + + if(gff_file2 != None): + checkFile(gff_file2) + print("Reading reference feature file %s ..." % (gff_file2)) + gff2=readGFF(gff_file2,scale) + print("done.") + + if(gff_file3 != None): + checkFile(gff_file3) + print("Reading reference feature file %s ..." % (gff_file3)) + gff3=readGFF(gff_file3,scale) + print("done.") + + #====Parse Fasta Files + (order1, ref1, length1)=readText(txt_file1, scale) + (order2, ref2, length2)=readText(txt_file2, scale) + (order3, ref3, length3)=readText(txt_file3, scale) + + #====Raise error if features out of bounds + data=initGraph() + if length1 / data['width'] > scale: + estscale = int(length1 / data['width']) + 1 + sys.exit("\n\n! The sequence in %s is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (txt_file1, estscale)) + if length2 / data['width'] > scale: + estscale = int(length2 / data['width']) + 1 + sys.exit("\n\n! The sequence in %s is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (txt_file2, estscale)) + if length3 / data['width'] > scale: + estscale = int(length3 / data['width']) + 1 + sys.exit("\n\n! The sequence in %s is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (txt_file3, estscale)) + + mismatch = 100 - seqidentity + + #====Parse Alignment files + print("Reading alignment file 1/3 %s ..." % (align_file1)) + (match1, match2, match3) = ({},{},{}) + if align_file1.endswith("rep"): + match1 = readCrossMatch(align_file1, mismatch, block_length, ref1, ref2, scale) + elif align_file1.endswith("paf"): + match1 = readPAF(align_file1, mismatch, block_length, ref1, ref2, scale) + else: + print("The alignment file provided (-x %s) does not end in .rep (cross_match) or .paf (PAF) -- fatal" % align_file1) + sys.exit(1) + print("done.") + + print("Reading alignment file 2/3 %s ..." % (align_file2)) + match = {} + if align_file2.endswith("rep"): + match2 = readCrossMatch(align_file2, mismatch, block_length, ref1, ref3, scale) + elif align_file2.endswith("paf"): + match2 = readPAF(align_file2, mismatch, block_length, ref1, ref3, scale) + else: + print("The alignment file provided (-x %s) does not end in .rep (cross_match) or .paf (PAF) -- fatal" % align_file2) + sys.exit(1) + print("done.") + + print("Reading alignment file 3/3 %s ..." % (align_file3)) + match = {} + if align_file3.endswith("rep"): + match3 = readCrossMatch(align_file3, mismatch, block_length, ref3, ref2, scale) + elif align_file3.endswith("paf"): + match3 = readPAF(align_file3, mismatch, block_length, ref3, ref2, scale) + else: + print("The alignment file provided (-x %s) does not end in .rep (cross_match) or .paf (PAF) -- fatal" % align_file3) + sys.exit(1) + print("done.") + + print("Drawing ...") + drawRelationship(info,gff1,gff2,gff3,order1,order2,order3,ref1,ref2,ref3,length1,length2,length3,match1,match2,match3,scale,seqidentity,mismatch,block_length,alpha) + +#--------------------------------------------- +#Main Call + +main() +sys.exit(1) + + diff --git a/v1.2.4/xmatchview.py b/v1.2.4/xmatchview.py new file mode 100755 index 0000000..dcbe978 --- /dev/null +++ b/v1.2.4/xmatchview.py @@ -0,0 +1,1099 @@ +#!/usr/bin/env python3 +# xmatchview.py +# Visualizing genome synteny +# Rene L Warren 2005-2020 + +import sys +import os +import getopt +import re +import csv +# import Image ## https://stackoverflow.com/questions/17451711/typeerror-when-resizing-an-image-with-pil-in-python +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont +from PIL import ImageEnhance +from PIL import PSDraw +import subprocess + +#--------------------------------------------- +def checkFile(file): + + print("Checking input %s" % file) + if not os.path.exists(file): + print("File %s" % file + " is not valid") + sys.exit(1) + else: + print("exists.") + +#--------------------------------------------- +def readGFF(file,scale): + + feature = {} + (start,end) = (0,0) + + with open(file) as fd: + rd = csv.reader(fd, delimiter="\t", quotechar='"') + for row in rd: + id = row[0] + start = float(int(row[3])/scale) + end = float(int(row[4])/scale) + print("__%s__ - __%s__ <<<<<" % (start,end)) + + color = row[9] + if color == None: + color = "yellow" + + if id not in feature: + feature[id] = {} + if start not in feature[id]: + feature[id][start] = {} + if 'end' not in feature[id][start]: + feature[id][start]['end'] = "" + if 'color' not in feature[id][start]: + feature[id][start]['color'] = "" + + feature[id][start]['end'] = end + feature[id][start]['color'] = color + print("INITIALIZED %s : %i,%i with %s" %(row[0],start,end,color)) + + return feature + +#--------------------------------------------- +def readPAF(paf_file,mismatch,block_length,reference,query,scale): + + (nocdt,match)=({},{}) + ctline = 0 + + xmatch_obj=open(paf_file, 'r') + + for line in xmatch_obj: + ### reverse matches qryname qrystart qryend orient hitname hitstart hitend match block + ### 1 2 3 4 5 6 7 8 + rev_regex = re.compile("(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\-\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + + fwd_regex = re.compile("(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\+\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)") + fm = fwd_regex.match(line) + + if rm != None: + ctline = 1 + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s %s" % (rm.group(1), rm.group(2), rm.group(3), rm.group(4), rm.group(5), rm.group(6), rm.group(7), rm.group(8)) + + alignLen = float(rm.group(6)) - float(rm.group(5)) + 1 + percentMis = 100 * float(( alignLen - float(rm.group(7))) / alignLen ) + #print "=== %.2f === %.2f ===" % (alignLen,percentMis) + #sys.exit(1) + (primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(str(rm.group(1)), float(rm.group(2)), float(rm.group(3)), str(rm.group(4)), float(rm.group(6)), float(rm.group(5))) ### order of 5 and 6 is important to plot alignments in different colors + + if (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + else: + + if primary_match in query and secondary_match in reference: #RLW after reading FASTA for each + startFirstMatchS = startFirstMatch + query[primary_match]['offset'] #RLW + endFirstMatchS = endFirstMatch + query[primary_match]['offset'] #RLW + startSecondMatchS = startSecondMatch + reference[secondary_match]['offset'] #RLW + endSecondMatchS = endSecondMatch + reference[secondary_match]['offset'] #RLW + + ####no autovivification in python + if primary_match not in nocdt: + nocdt[primary_match]={} + if secondary_match not in nocdt[primary_match]: + nocdt[primary_match][secondary_match]={} + if startFirstMatchS not in nocdt[primary_match][secondary_match]: + nocdt[primary_match][secondary_match][startFirstMatchS]={} + if endFirstMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]={} + if startSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]={} + if endSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]={} + + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]=percentMis + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + ###forward matches + elif fm != None: + ctline = 1 + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7), fm.group(8)) + + alignLen = float(fm.group(6)) - float(fm.group(5)) + 1 + percentMis = 100 * float(( alignLen - float(fm.group(7))) / alignLen ) + #percentMis2 = 100 * float((float(fm.group(8)) - float(fm.group(7))) / float(fm.group(8))) + #print "=== %.2f === %.2f === %.2f" % (alignLen,percentMis,percentMis2) + #sys.exit(1) + (primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(str(fm.group(1)), float(fm.group(2)), float(fm.group(3)), str(fm.group(4)), float(fm.group(5)), float(fm.group(6))) + + if (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + else: + + if primary_match in query and secondary_match in reference: #RLW after reading FASTA for each + startFirstMatchS = startFirstMatch + query[primary_match]['offset'] #RLW + endFirstMatchS = endFirstMatch + query[primary_match]['offset'] #RLW + startSecondMatchS = startSecondMatch + reference[secondary_match]['offset'] #RLW + endSecondMatchS = endSecondMatch + reference[secondary_match]['offset'] #RLW + + ####no autovivification in python + if primary_match not in nocdt: + nocdt[primary_match]={} + if secondary_match not in nocdt[primary_match]: + nocdt[primary_match][secondary_match]={} + if startFirstMatchS not in nocdt[primary_match][secondary_match]: + nocdt[primary_match][secondary_match][startFirstMatchS]={} + if endFirstMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]={} + if startSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]={} + if endSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]={} + + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]=percentMis + + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + if ctline == 0 : + print("There are no alignments to plot. Make sure your file %s is reporting alignments -- fatal." % paf_file) + sys.exit(1) + + return nocdt, match + +#--------------------------------------------- +def readCrossMatch(crossmatch_file,mismatch,block_length,reference,query,scale): + + (nocdt,match)=({},{}) + ctline = 0 + + xmatch_obj=open(crossmatch_file, 'r') + + for line in xmatch_obj: + + # Query start end R Ref end start + # 10 8.70 0.00 0.00 JN039333.1_Picea_abies 440 462 (2149) C KT263970.1_Picea_sitchensis (1851) 524 502 + # start end + # 16 4.55 0.00 0.00 JN039333.1_Picea_abies 484 505 (2106) KT263970.1_Picea_sitchensis 341 362 (2013) + + ###reverse matches s.i. qry + rev_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+C\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)") + rm = rev_regex.match(line) + ###forward matches + fwd_regex = re.compile("(\s+)?\d+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+\S+") + fm = fwd_regex.match(line) + + if rm != None and rm.group(3) != "0" and rm.group(6) != "0": + ctline = 1 + #print "GR: %s" % line + #print "REVERSE: %s %s %s %s %s %s %s" % (rm.group(1), rm.group(2), rm.group(3), rm.group(4), rm.group(5), rm.group(6), rm.group(7)) + + #(percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(1)), rm.group(2), float(rm.group(3)), float(rm.group(4)), rm.group(5), float(rm.group(6)), float(rm.group(7))) + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(rm.group(2)), rm.group(3), float(rm.group(4)), float(rm.group(5)), rm.group(6), float(rm.group(7)), float(rm.group(8)))### has to be in this order to plot reverse align in diff color + + if (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + else: + + if primary_match in query and secondary_match in reference: #RLW after reading FASTA for each + startFirstMatchS = startFirstMatch + query[primary_match]['offset'] #RLW + endFirstMatchS = endFirstMatch + query[primary_match]['offset'] #RLW + startSecondMatchS = startSecondMatch + reference[secondary_match]['offset'] #RLW + endSecondMatchS = endSecondMatch + reference[secondary_match]['offset'] #RLW + + ####no autovivification in python + if primary_match not in nocdt: + nocdt[primary_match]={} + if secondary_match not in nocdt[primary_match]: + nocdt[primary_match][secondary_match]={} + if startFirstMatchS not in nocdt[primary_match][secondary_match]: + nocdt[primary_match][secondary_match][startFirstMatchS]={} + if endFirstMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]={} + if startSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]={} + if endSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]={} + + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]=percentMis + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + + ###forward matches + elif fm != None and fm.group(3) != "0" and fm.group(6) != "0": + ctline = 1 + #print "GF: %s" % line + #print "FORWARD: %s %s %s %s %s %s %s" % (fm.group(1), fm.group(2), fm.group(3), fm.group(4), fm.group(5), fm.group(6), fm.group(7)) +# (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(1)), fm.group(2), float(fm.group(3)), float(fm.group(4)), fm.group(5), float(fm.group(6)), float(fm.group(7))) + + (percentMis, primary_match, startFirstMatch, endFirstMatch, secondary_match, startSecondMatch, endSecondMatch)=(float(fm.group(2)), fm.group(3), float(fm.group(4)), float(fm.group(5)), fm.group(6), float(fm.group(7)), float(fm.group(8))) + + if (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + else: + + if primary_match in query and secondary_match in reference: #RLW after reading FASTA for each + startFirstMatchS = startFirstMatch + query[primary_match]['offset'] #RLW + endFirstMatchS = endFirstMatch + query[primary_match]['offset'] #RLW + startSecondMatchS = startSecondMatch + reference[secondary_match]['offset'] #RLW + endSecondMatchS = endSecondMatch + reference[secondary_match]['offset'] #RLW + + ####no autovivification in python + if primary_match not in nocdt: + nocdt[primary_match]={} + if secondary_match not in nocdt[primary_match]: + nocdt[primary_match][secondary_match]={} + if startFirstMatchS not in nocdt[primary_match][secondary_match]: + nocdt[primary_match][secondary_match][startFirstMatchS]={} + if endFirstMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]={} + if startSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]={} + if endSecondMatchS not in nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS]: + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]={} + + nocdt[primary_match][secondary_match][startFirstMatchS][endFirstMatchS][startSecondMatchS][endSecondMatchS]=percentMis + + + if percentMis > mismatch: + continue #will not display alignment lines below threshold + elif (primary_match == secondary_match) and (startSecondMatch == startFirstMatch) and (endSecondMatch == endFirstMatch): + print("Exact match over full coordinates, ignoring...") + elif (endFirstMatch - startFirstMatch) < block_length: + continue #will skip smaller alignment + else: + if primary_match in query and secondary_match in reference: + startFirstMatch = (startFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + endFirstMatch = (endFirstMatch/scale) + query[primary_match]['offset_len'] #RLW + startSecondMatch = (startSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + endSecondMatch = (endSecondMatch/scale) + reference[secondary_match]['offset_len'] #RLW + + print("%i-%i :: %i-%i" % (startFirstMatch,endFirstMatch,startSecondMatch,endSecondMatch)) + + if primary_match not in match: + match[primary_match]={} + if secondary_match not in match[primary_match]: + match[primary_match][secondary_match]={} + if startFirstMatch not in match[primary_match][secondary_match]: + match[primary_match][secondary_match][startFirstMatch]={} + if endFirstMatch not in match[primary_match][secondary_match][startFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch]={} + if startSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]={} + if endSecondMatch not in match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch]: + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]={} + + match[primary_match][secondary_match][startFirstMatch][endFirstMatch][startSecondMatch][endSecondMatch]=percentMis + + #else: + #print "NO RE:%s" % line + xmatch_obj.close() + + if ctline == 0 : + print("There are no alignments to plot. Make sure your file %s is reporting alignments -- fatal." % crossmatch_file) + sys.exit(1) + + return nocdt, match + +#--------------------------------------------- +def generateCoords(nocdt, size, leap, protein): + + freq={} + + pos_range=list(range(0,size,leap)) + + for pos in pos_range: + print("%i out of %i" % (pos,size)) + for query in nocdt: + for comparison in nocdt[query]: + start1_dict=list(nocdt[query][comparison].keys()) + start1_dict.sort() + for start1 in start1_dict: + end1_dict=list(nocdt[query][comparison][start1].keys()) + end1_dict.sort() + for end1 in end1_dict: + start2_dict=list(nocdt[query][comparison][start1][end1].keys()) + start2_dict.sort() + for start2 in start2_dict: + end2_dict=list(nocdt[query][comparison][start1][end1][start2].keys()) + end2_dict.sort() + for end2 in end2_dict: + (ss,ee) = (start2,end2) + if protein: + size_ref = end2 - start2 + buffer = ((size_ref - (size_ref/3)) / 2) + ss = start2 + buffer + ee = end2 - buffer + + if((pos >= ss and pos <= ee) or (pos >= ee and pos <= ss)): + current_mismatch=float(nocdt[query][comparison][start1][end1][start2][end2]) + if pos not in freq: + freq[pos]={} + if current_mismatch not in freq[pos]: + freq[pos][current_mismatch]=int(0) + freq[pos][current_mismatch]=freq[pos][current_mismatch]+1 + return freq + +#--------------------------------------------- +def findOccurences(s, ch): + return [i for i, letter in enumerate(s) if letter == ch] + +#--------------------------------------------- +def readFasta(file, scale): + + (head_match, previous_contig,seq_length) = (None,None,0) + L1={} + order=[]#RLW + npos={} #RLW + zpos={} + tot_length = 0 #RLW + + file_obj = open(file, 'r') + + for line in file_obj: + head_match_regex = re.compile('>(\S+)') + head_match = head_match_regex.match(line) + if head_match != None: + if (head_match != previous_contig and previous_contig != None): + (seq_length, scale)=(int(seq_length), int(scale)) + if previous_contig not in L1:#RLW + L1[previous_contig]={} #RLW + L1[previous_contig]['scaled_len'] = float(seq_length/scale) #RLW + L1[previous_contig]['offset_len'] = float(tot_length/scale) #RLW first ID will be offset at 0 as it should + L1[previous_contig]['offset'] = tot_length #RLW + L1[previous_contig]['npos'] = npos + L1[previous_contig]['zpos'] = zpos + print("NPOS: (Ns or Zs only tracked on 1-line sequences)") + print(npos) + + order.append(previous_contig) #RLW + print("%s length = %i bp, scaled to %.0f pixels" % (previous_contig,seq_length,L1[previous_contig]['scaled_len'])) #RLW + tot_length += seq_length #RLW + seq_length = 0 #resets the sequence length + previous_contig = head_match.group(1) + else: + seq_subset_regex = re.compile('(.*)', re.I) + seq_subset = seq_subset_regex.match(line) + if seq_subset != None: + seq_length += len(seq_subset.group(1)) + npos=findOccurences(seq_subset.group(1).upper(), "N") + zpos=findOccurences(seq_subset.group(1).upper(), "Z") + + (seq_length, scale)=(int(seq_length), int(scale)) + if previous_contig not in L1: #RLW + L1[previous_contig]={} #RLW + L1[previous_contig]['scaled_len'] = float(seq_length/scale) #RLW + L1[previous_contig]['offset_len'] = float(tot_length/scale) #RLW + L1[previous_contig]['offset'] = tot_length #RLW + L1[previous_contig]['npos'] = npos + L1[previous_contig]['zpos'] = zpos + print("NPOS: (Ns or Zs only tracked on 1-line sequences)") + print(npos) + + order.append(previous_contig) #RLW + print("%s length = %i bp, scaled to %.0f pixels" % (previous_contig,seq_length,L1[previous_contig]['scaled_len'])) #RLW + tot_length += seq_length #RLW + + file_obj.close() + + scaled_tot_len = float(tot_length/scale) #RLW + print("Total length = %i bp, scaled to : %.0f pixels " % (tot_length,scaled_tot_len)) #RLW + + return (order, L1, tot_length) #RLW tot_length + +#--------------------------------------------- +def initColor(alpha): + color={} + + #allocate colors + color["white"] = (255,255,255,255) + color["black"] = (0,0,0,255) + color["swamp"] = (150,150,30,255) + color["blue"] = (0,102,204,255) + color["yellow"] = (255,255,0,255) + color["cyan"] = (0,255,255,255) + color["purple"] = (255,0,255,alpha) + color["green"] = (100,250,25,255) + color["lime"] = (57,255,20,255) + color["red"] = (250,25,75,255) + color["sarin"] = (255,66,0,255) + color["forest"] = (25,175,0,255) + color["dirtyred"] = (200,0,120,255) + color["navy"] = (0,0,150,alpha) + color["dirtyyellow"] = (200,200,75,255) + color["grey"] = (153,153,153,255) + color["lightgrey"] = (220,220,220,355) + color["salmon"] = (255,153,153,alpha) + color["lightblue"] = (153,204,255,alpha) + color["orange"] = (255,153,51,255) + color["beige"] = (222,184,135,255) + + return color + +#--------------------------------------------- +def initGraph(): + data={} + + #default data points + data['width']=2400 + data['height']=1200 + data['ref_y']=300 #250 + data['mis_bar']=50 + data['query_y']=70 + data['x']=50 + data['xlabel']=110 + data['bar_thick']=20 + data['query_thick']=15 + data['reference_thick']=15 + data['x_legend']=600 + data['y_legend']=750 + data['x_legend_picto']=100 + data['tick_up']=25 + data['tick_down']=40 + + return data + +#--------------------------------------------- +def drawRectangle(draw,start,end,y,thickness,bar_color,text,font,text_color): + + draw.rectangle((start,y,end,y+thickness), bar_color) + +#--------------------------------------------- +def drawRefText(draw,start,end,y,thickness,bar_color,text,font,text_color): + + #draw.text((start-80, y-2), text, font=font, fill=text_color)###position of SEQUENCE label + draw.text((start+2, y-25), text, font=font, fill=text_color)###position of SEQUENCE label + draw.line((start,y-21,start,y-1),text_color,width=1) + draw.line((end,y-21,end,y-1),text_color,width=1) + +#--------------------------------------------- +def drawQryText(draw,start,end,y,thickness,bar_color,text,font,text_color): + + #draw.text((start-80, y-2), text, font=font, fill=text_color)###position of SEQUENCE label + draw.text((start+2, y+17), text, font=font, fill=text_color)###position of SEQUENCE label + draw.line((start,y+thickness+1,start,y+thickness+20),text_color,width=1) + draw.line((end,y+thickness+1,end,y+thickness+20),text_color,width=1) + + +#--------------------------------------------- +def plotFrequency(freq,size,scale,draw,color,data,leap): + + pos_range=list(range(0,size,leap)) + + for pos in pos_range: + if pos in freq: + freq_list=freq[pos] + previous=data['mis_bar'] + identity_range=list(range(99,-1,-1))## RESTRICT SI AXIS was 9 + for id in identity_range: + cumul=int(0) + for freq_keys in freq_list: + if id >= freq_keys: + cumul += freq_list[freq_keys] + + if cumul<1: + color_now="white" + elif cumul==1: + color_now="blue" + elif cumul==2: + color_now="cyan" + elif cumul==3: + color_now="green" + elif cumul==4: + color_now="orange" + elif cumul>=5: + color_now="dirtyred" + #elif cumul==6: + # color_now="salmon" + #elif cumul==7: + # color_now="orange" + #elif cumul>=8: + # color_now="yellow" + + extension=((200-(2*id))+data['mis_bar']) #RESTRICT SI AXIS y was 20 + compressed=(pos/scale)+data['x'] #x + + if color_now != "white": + #print "%i, %i, %i, %i %s" % (compressed,previous,compressed,extension,color_now) + draw.line((compressed,previous,compressed,extension),color[color_now]) + + previous = extension + + +#--------------------------------------------- +def drawRelationship(reference_list, order_ref, query_list, order_qry, match_list, scale, mismatch, block_length, alignment_file, freq, reflength, leap, format, formatdict, protein, alpha, refgff, qrygff, qrylength, fontpath): + + scaled_reflength=int(reflength/scale) + + ###Initialize new graph + data=initGraph() + + ###Get colors + color=initColor(alpha) + + ###Set Font + arialfont = fontpath + "/arial.ttf" + pilfont = fontpath + "/helvR14.pil" + + #default all font sizes to default (it is quite small, you must provide a valid path for best results) + font_18=ImageFont.load_default() + font_20=ImageFont.load_default() + fontb_20=ImageFont.load_default() + fontbi_20=ImageFont.load_default() + fontb_22=ImageFont.load_default() + font_24=ImageFont.load_default() + fontb_24=ImageFont.load_default() + + if os.path.exists(arialfont): ### Will check for truetype first, they look better + ###Set Font (truetype) + font_18=ImageFont.truetype(fontpath + "/arial.ttf",18) + font_20=ImageFont.truetype(fontpath + "/arial.ttf",20) + fontb_20=ImageFont.truetype(fontpath + "/arialbd.ttf",20) + fontbi_20=ImageFont.truetype(fontpath + "/arialbi.ttf",20) + fontb_22=ImageFont.truetype(fontpath + "/arialbd.ttf",22) + font_24=ImageFont.truetype(fontpath + "/arial.ttf",24) + fontb_24=ImageFont.truetype(fontpath + "/arialbd.ttf",24) + elif os.path.exists(pilfont): ### Will settle for PIL font, if ttf do not exist. Otherwise, sticking with default. + ###Set font (pil) (sizes are limited, made to be compatible with TT fonts) + font_18=ImageFont.load_path(fontpath + "/helvR14.pil") + font_20=ImageFont.load_path(fontpath + "/helvR18.pil") + fontb_20=ImageFont.load_path(fontpath + "/helvB18.pil") + fontbi_20=ImageFont.load_path(fontpath + "/helvBO18.pil") + fontb_22=ImageFont.load_path(fontpath + "/helvB24.pil") + font_24=ImageFont.load_path(fontpath + "/helvR24.pil") + fontb_24=ImageFont.load_path(fontpath + "/helvB24.pil") + + ###Define Image + back = Image.new("RGBA", (data['width'],data['height']),(0,0,0,0)) + bdraw = ImageDraw.Draw(back) + + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + + ###Draw Legend + date=subprocess.getstatusoutput("date") + + ###Picto Legend + bdraw.text((data['x_legend_picto']+280,data['y_legend']), "Legend", font=fontb_24, fill=color['black']) + y_legend = data['y_legend']+30 + bdraw.text((data['x_legend_picto'],y_legend), "Frequency Repeated", font=fontbi_20, fill=color['black']) + + #### + bdraw.text((data['x_legend'],y_legend), "Mismatch threshold : %i %%" % mismatch, font=font_20, fill=color['black']) + bdraw.text((data['x_legend'],y_legend+20), "Minimum block length : %i bp" % block_length, font=font_20, fill=color['black']) + bdraw.text((data['x_legend'],y_legend+40), "Scale (pixel:bp) 1:%i" % scale, font=font_20, fill=color['black']) + #bdraw.text((data['x_legend'],y_legend+60), "%s" % date[1], font=font_20, fill=color['black']) + #### + + y_legend+=25 + bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['blue']) + bdraw.text((data['x_legend_picto']+25,y_legend), "Single copy", font=font_20, fill=color['black']) + y_legend+=25 + bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['cyan']) + bdraw.text((data['x_legend_picto']+25,y_legend), "2X", font=font_20, fill=color['black']) + y_legend+=25 + bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['green']) + bdraw.text((data['x_legend_picto']+25,y_legend), "3X", font=font_20, fill=color['black']) + y_legend+=25 + bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['orange']) + bdraw.text((data['x_legend_picto']+25,y_legend), "4X", font=font_20, fill=color['black']) + y_legend+=25 + bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['dirtyred']) + bdraw.text((data['x_legend_picto']+25,y_legend), "5X and over", font=font_20, fill=color['black']) + y_legend+=25 + #bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['salmon']) + #bdraw.text((data['x_legend_picto']+25,y_legend), "6X", font=font_20, fill=color['black']) + #y_legend+=25 + #bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['orange']) + #bdraw.text((data['x_legend_picto']+25,y_legend), "7X", font=font_20, fill=color['black']) + #y_legend+=25 + #bdraw.rectangle((data['x_legend_picto'],y_legend,data['x_legend_picto']+20,y_legend+20), outline=color['black'], fill=color['yellow']) + #bdraw.text((data['x_legend_picto']+25,y_legend), "8X and over", font=font_20, fill=color['black']) + y_legend+=40 + + bdraw.text((data['x_legend_picto'],y_legend), "Collinear Blocks", font=fontbi_20, fill=color['black']) + y_legend+=30 + + bdraw.polygon((data['x_legend_picto']-5,y_legend,data['x_legend_picto'],y_legend+25,data['x_legend_picto']+25,y_legend+25,data['x_legend_picto']+20,y_legend), outline=color['navy'], fill=color['lightblue']) + bdraw.text((data['x_legend_picto']+30,y_legend), "Direct", font=font_20, fill=color['black']) + + y_legend+=30 + bdraw.polygon((data['x_legend_picto']-5,y_legend,data['x_legend_picto']+25,y_legend+25,data['x_legend_picto']-5,y_legend+25,data['x_legend_picto']+25,y_legend), outline=color['purple'], fill=color['salmon']) + bdraw.text((data['x_legend_picto']+30,y_legend), "Inverted", font=font_20, fill=color['black']) + + y_legend+=40 + + bdraw.text((data['x_legend_picto'],y_legend), "Other", font=fontbi_20, fill=color['black']) + y_legend+=30 + + bdraw.rectangle((data['x_legend_picto']-5,y_legend+5,data['x_legend_picto']+25,y_legend+7), fill=color['red']) + bdraw.text((data['x_legend_picto']+30,y_legend), "Mismatch threshold", font=font_20, fill=color['black']) + y_legend+=30 + + bdraw.rectangle((data['x_legend_picto']-5,y_legend,data['x_legend_picto']+25,y_legend+(data['reference_thick']/2)), outline=color['black'], fill=color['yellow']) + bdraw.text((data['x_legend_picto']+30,y_legend), "Sequence features", font=font_20, fill=color['black']) + y_legend+=30 + + + bdraw.rectangle((data['x_legend_picto']+5,y_legend,data['x_legend_picto']+10,y_legend+data['reference_thick']), outline=color['red'], fill=color['red']) + bdraw.text((data['x_legend_picto']+30,y_legend), "Ambiguous bases (Ns)", font=font_20, fill=color['black']) + y_legend+=30 + + ####Draw Reference + for ref in order_ref: #RLW + scaled_offset_len = 0 #RLW + if ref in reference_list: #RLW + scaled_offset_len = reference_list[ref]['offset_len'] #RLW + + init_coord=int(data['x']+scaled_offset_len) #RLW + last_coord=int(data['x']+scaled_offset_len+reference_list[ref]['scaled_len']) #RLW + + ### draw top rectangle for REFERENCE + drawRectangle(bdraw, init_coord, last_coord,data['ref_y'],data['reference_thick'],color['black'],ref,fontb_20,color['black']) + drawRefText(bdraw, init_coord, last_coord,data['ref_y'],data['reference_thick'],color['black'],ref,fontb_20,color['black']) + + ###DRAW histo + x_range=list(range(data['x'], last_coord, 100)) + ### draw kbp scale + if reflength >= 10000: + for position in x_range: + base_number=int(((position-data['x'])*scale)/1000) + bdraw.rectangle((position,data['tick_up'],position+2,data['tick_down']),color['black']) + bdraw.text((position-5, data['tick_up']-25), "%i" % base_number, font=font_20, fill=color['black']) + else: + for position in x_range: + base_number=(position-data['x']) * scale + base_number=float(base_number) + base_number=base_number/1000 + bdraw.rectangle((position,data['tick_up'],position+2,data['tick_down']),color['black']) + #print "%i %i %i >>> %.2f <<< %i,%i" % (data['x'],position,scale,base_number,data['x_legend_picto'],position) + bdraw.text((position-5, data['tick_up']-25), "%.1f" % base_number, font=font_24, fill=color['black']) + + bdraw.text((data['x']+scaled_reflength+60,data['tick_up']-25), "kbp", font=fontb_24, fill=color['black']) + ###Mismatch Axis + #identity=int(0) + identity = int(0) ### RESTRICT SI AXIS was 90 + grid_range=list(range(data['mis_bar'], data['ref_y']-30, 20)) + + ###Draw grid + for grid in grid_range: + bdraw.rectangle((data['x'],grid,data['x']+scaled_reflength+5,grid+2),color['lightgrey']) + bdraw.text((data['x']+scaled_reflength+10, grid-7), "%i " % identity, font=font_18, fill=color['black']) + identity += 10 ### RESTRICT SI AXIS was 1 + + ###Draw grid metric + bdraw.text((data['x']+scaled_reflength+60, 150), "% Identity", font=font_18, fill=color['black']) + + ###Draw Repeat Frequency + plotFrequency(freq,reflength,scale,bdraw,color,data,leap) + + ###Draw Threshold + threshold_line= data['mis_bar'] + (200-(2*mismatch)) + draw.rectangle((data['x'],threshold_line,data['x']+scaled_reflength+5,threshold_line+2), color['red']) + + ###Draw Query & Collinear blocks + (current_position, LCB)=(data['x'], 10) + + ####Draw Query (only if not in reference list) + decay = 350 + for qry in order_qry: #RLW + + scaled_offset_len = 0 #RLW + if qry in query_list: #RLW + scaled_offset_len = query_list[qry]['offset_len'] #RLW + + init_coord=int(data['x']+scaled_offset_len) #RLW + last_coord=int(data['x']+scaled_offset_len+query_list[qry]['scaled_len']) #RLW + + ### draw bottom rectangle for QUERY + if qry not in order_ref: ### only draw if query not same as ref ##### REMOVE WHEN THEY HAVE SAME NAMES + drawRectangle(bdraw, init_coord, last_coord,data['ref_y']+decay,data['query_thick'],color['black'],qry,fontb_20,color['black']) + drawQryText(bdraw, init_coord, last_coord,data['ref_y']+decay,data['query_thick'],color['black'],qry,fontb_20,color['black']) + + plotflag = 0 + ###Draw blocks and relationships + for match in match_list: + allhit=match_list[match] + for hit in allhit: + start1_list=allhit[hit] + stop=current_position + query_list[match]['offset_len'] + #if match != hit: + # drawRectangle(bdraw,current_position,stop,data['ref_y']+decay,data['query_thick'],color['black'], hit, fontb_20, color['black']) + s1_list_sort=list(start1_list.keys()) + s1_list_sort.sort() + for start1 in s1_list_sort: + end1_list=start1_list[start1] + e1_list_sort=list(end1_list.keys()) + e1_list_sort.sort() + for end1 in e1_list_sort: + start2_list=end1_list[end1] + s2_list_sort=list(start2_list.keys()) + s2_list_sort.sort() + for start2 in s2_list_sort: + end2_list=start2_list[start2] + e2_list_sort=list(end2_list.keys()) + e2_list_sort.sort() + for end2 in e2_list_sort: + + if start2 > end2: + outline_color="purple" + fill_color="salmon" + else: + outline_color="navy" + fill_color="lightblue" + ###draw ORF on upper + size_qry = end1 - start1 + size_ref = end2 - start2 + buf_ref = ((size_ref - (size_ref/3)) / 2) + buf_qry = ((size_qry - (size_qry/3)) / 2) + ss1 = start1 + buf_qry + ee1 = end1 - buf_qry + ss2 = start2 + buf_ref + ee2 = end2 - buf_ref + print("%s (%i-%i) hits %s :: mismatch %.2f target(%i) block %i target (%i) " % (match,start1,end1,hit,end2_list[end2],mismatch,size_ref,block_length)) + + if match == hit:### COMPARE 1 SEQUENCE AGAINST ITSELF + print("SAME QUERY AND HIT NAME...WILL COMPARE AGAINST ITSELF"); + if start1 <= start2: + + repeat_size = start2 - start1 + size_chunk = int(decay * repeat_size / scaled_reflength) + print("%i %i %i" % (size_chunk, repeat_size, scaled_reflength)) + size_chunk += 50 + + if protein: + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + ss1 = int(ss1) + ss2 = int(ss2) + bdraw.rectangle((data['x']+ss1,data['ref_y']+data['reference_thick']+7,data['x']+ee1,data['ref_y']+data['reference_thick']+17), outline=color["black"], fill=color["red"])###REF1 + bdraw.rectangle((data['x']+ss2,data['ref_y']+data['reference_thick']+7,data['x']+ee2,data['ref_y']+data['reference_thick']+17), outline=color["black"], fill=color["red"])###REF2 + draw.arc((data['x']+ss1,data['ref_y']+data['reference_thick']+15-size_chunk,data['x']+ss2,data['ref_y']+data['reference_thick']+17+size_chunk),360,180, color[outline_color]) + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + plotflag = 1 + else: + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + start1 = int(start1) + start2 = int(start2) + draw.rectangle((data['x']+start1,data['ref_y'],data['x']+end1,data['ref_y']+data['reference_thick']), outline=color[outline_color], fill=color[fill_color])###REF LEFT REPEAT COLINEAR BLOCKS + draw.rectangle((data['x']+start2,data['ref_y'],data['x']+end2,data['ref_y']+data['reference_thick']), outline=color[outline_color], fill=color[fill_color])###REF RIGHT REPEAT COLINEAR BLOCKS + draw.arc((data['x']+start1,data['ref_y']+data['reference_thick']-size_chunk,data['x']+start2,data['ref_y']+data['reference_thick']+size_chunk),360,180, color[outline_color])###DRAW ARC AT REPEAT EDGE ONLY + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + plotflag = 1 + + else : #COMPARE + + if protein: + + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + draw.polygon((data['x']+ss1,data['ref_y']+data['reference_thick']+17,data['x']+ss2,data['ref_y']+decay-17,data['x']+ee2,data['ref_y']+decay-17,data['x']+ee1,data['ref_y']+data['reference_thick']+17), outline=color[outline_color], fill=color[fill_color]) + draw.rectangle((data['x']+ss1,data['ref_y']+data['reference_thick']+7,data['x']+ee1,data['ref_y']+data['reference_thick']+17), outline=color["black"], fill=color["red"]) ###REF + draw.rectangle((data['x']+ss2,data['ref_y']+decay-17,data['x']+ee2,data['ref_y']+decay-7), outline=color["black"], fill=color["red"])###QRY + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + plotflag = 1 + else: + if end2_list[end2] <= mismatch: ###does it pass the mismatch cutoff? + draw.polygon((data['x']+start2,data['ref_y']+data['reference_thick'],data['x']+start1,data['ref_y']+decay,data['x']+end1,data['ref_y']+decay,data['x']+end2,data['ref_y']+data['reference_thick']), outline=color[outline_color], fill=color[fill_color]) + draw.rectangle((data['x']+start2,data['ref_y'],data['x']+end2,data['ref_y']+data['reference_thick']), outline=color[outline_color], fill=color[fill_color]) ###REF COLINEAR BLOCKS + draw.rectangle((data['x']+start1,data['ref_y']+decay,data['x']+end1,data['ref_y']+decay+data['reference_thick']), outline=color[outline_color], fill=color[fill_color])###QRY COLINEAR BLOCKS + back.paste(poly, mask=poly) + del draw + poly = Image.new("RGBA", (data['width'],data['height'])) + draw = ImageDraw.Draw(poly) + plotflag = 1 + + #if plotflag==0: + # print "It looks like there is nothing to plot, try increasing -m 99 -- FATAL" + # sys.exit(1) + + #enhancer = ImageEnhance.Sharpness(im) + #for i in range(8): + # factor = i / 4.0 + # enhancer.enhance(factor).show("Sharpness %f" % factor) + + ### draw features on reference + for ref in order_ref: + + scaled_offset_len = 0 #RLW + if ref in reference_list: #RLW + scaled_offset_len = reference_list[ref]['offset_len'] #RLW + + if ref in refgff: + for scaledexstart in refgff[ref]: + exstart = data['x'] + scaledexstart + scaled_offset_len + exend = data['x'] + refgff[ref][scaledexstart]['end'] + scaled_offset_len + draw.rectangle((exstart,data['ref_y'],exend,data['ref_y']+(data['reference_thick']/2)),outline=refgff[ref][scaledexstart]['color'], fill=refgff[ref][scaledexstart]['color'])###features/exons + if 'npos' in reference_list[ref]: + for nstart in reference_list[ref]['npos']: ### draw Ns + nstart = data['x'] + (nstart/scale) + scaled_offset_len + draw.line((nstart,data['ref_y']+1,nstart,data['ref_y']+data['reference_thick']-1),color['red'],width=1)###reference + if 'zpos' in reference_list[ref]: ### draw Zs + for nstart in reference_list[ref]['zpos']: + nstart = data['x'] + (nstart/scale) + scaled_offset_len + draw.line((nstart,data['ref_y']+1,nstart,data['ref_y']+data['reference_thick']-1),color['lime'],width=1) + + + ### draw features on query + for qry in order_qry: + + if qry not in order_ref: ### only draw if query not same as ref + + scaled_offset_len = 0 #RLW + if qry in query_list: #RLW + scaled_offset_len = query_list[qry]['offset_len'] #RLW + + if qry in qrygff: + for scaledexstart in qrygff[qry]: + exstart = data['x'] + scaledexstart + scaled_offset_len + exend = data['x'] + qrygff[qry][scaledexstart]['end'] + scaled_offset_len + draw.rectangle((exstart,data['ref_y']+decay+(data['reference_thick']/2)+1,exend,data['ref_y']+decay+data['reference_thick']),outline=qrygff[qry][scaledexstart]['color'], fill=qrygff[qry][scaledexstart]['color'])###features/exons + if 'npos' in query_list[qry]: ### draw Ns + for nstart in query_list[qry]['npos']: + nstart = data['x'] + (nstart/scale) + scaled_offset_len + draw.line((nstart,data['ref_y']+decay+1,nstart,data['ref_y']+decay+data['reference_thick']-1),color['red'],width=1) + if 'zpos' in query_list[qry]: ### draw Zs + for nstart in query_list[qry]['zpos']: + nstart = data['x'] + (nstart/scale) + scaled_offset_len + draw.line((nstart,data['ref_y']+decay+1,nstart,data['ref_y']+decay+data['reference_thick']-1),color['lime'],width=1) + + + #### FINAL IMAGE PROCESSING + back.paste(poly, mask=poly) + file = "xmv-" + alignment_file + "_m" + str(mismatch) + "_b" + str(block_length) + "_r" + str(leap) + "_c" + str(scale) + "." + format + print("Saving %s..." % file) + back.save(open(file, 'wb'), formatdict[format]) + print("done.") + return file + +#--------------------------------------------- +def main(): + + opts, args = getopt.getopt(sys.argv[1:], "x:s:q:m:r:c:b:f:p:e:y:a:") + + (ref_gff_file, qry_gff_file, alignment_file, reference_file, query_file, format, fontpath)=(None,None,None,None,None,"png","") + (mismatch, block_length, scale, leap, protein, alpha)=(0,0,0,0,0,255) + (reference, reflength)=([],[]) + formatdict = {'png':'PNG','gif':'GIF','tiff':'TIFF','jpeg':'JPEG'} + + for o, v in opts: + if o == "-x": + alignment_file=str(v) + if o == "-s": + reference_file=str(v) + if o == "-q": + query_file=str(v) + if o == "-m": + mismatch=int(v) + if o == "-b": + block_length=int(v) + if o == "-c": + scale=int(v) + if o == "-r": + leap=int(v) + if o == "-f": + format=str(v) + if o == "-e": + ref_gff_file=str(v) + if o == "-y": + qry_gff_file=str(v) + if o == "-a": + alpha = int(v) + if o == "-p": + fontpath=str(v) + + if (alignment_file == None or reference_file == None or query_file == None or mismatch == 0 or block_length == 0 or scale ==0 or leap == 0): + print("Usage: %s v1.2.4" % (sys.argv[0:])) + print("-x alignment file (cross_match .rep or Pairwise mApping Format .paf) ") + print("-s reference genome fasta file") + print("-q query contig/genome fasta file") + print("-e reference features (eg. exons) coordinates GFF tsv file (start end) - optional") + print("-y query features (eg. exons) coordinates GFF tsv file (start end) - optional") + print("-m mismatch threshold (e.g. -m 10 allows representation of repeats having up to 10% mismatch") + print("-b length (bp) of similarity block to display") + print("-c scale (pixel to basepair scale, for displaying the image)") + print("-r leap (bp) to evaluate repeat frequency (smaller numbers will increase the resolution, but will affect drastically the run time. recommended -r=50)") + print("-a alpha value, from 0 (transparent) to 255 (solid, default)") + print("-f output image file format (png, tiff, jpeg, or gif) NOTE: png and tiff recommended.") + print("-p full path to the directory with fonts on your system (please refer to the documentation for fonts used)") + #print "-z transform bacterial ORF into protein (i.e. plot alignment between ORF products? 1/0) -not fully tested-"; + print("* Files for the -s and -q options must correspond to fasta files used to run cross_match") + sys.exit(1) + + #====Graph Format + if format not in formatdict: + print("Not a valid Graph Format. Please Select: png, tiff, jpeg, or gif. NOTE: png and tiff recommended.") + sys.exit(1) + + #====Mismatch checks + if (mismatch <0 or mismatch >99): + print("-m must be a valid number between 0-99") + sys.exit(1) + + #===Scale checks + if (scale<1): + print("Not a possible scale. Make sure you select a number >1.") + sys.exit(1) + + #====Alpha checks + if (alpha<0 or alpha >255): + print("-a must be a valid number between 0-255") + sys.exit(1) + + #====File checks + checkFile(alignment_file) + checkFile(reference_file) + checkFile(query_file) + + ###OPTIONAL, FOR FEATURES REPRESENTATION + (refgff,qrygff) = ({},{}) + if(ref_gff_file != None): + checkFile(ref_gff_file) + print("Reading reference feature file %s ..." % (ref_gff_file)) + refgff=readGFF(ref_gff_file,scale) + print("done.") + + if(qry_gff_file != None): + checkFile(qry_gff_file) + print("Reading query feature file %s ..." % (qry_gff_file)) + qrygff=readGFF(qry_gff_file,scale) + print("done.") + + #====Parse Fasta Files + (order_ref, reference, reflength)=readFasta(reference_file, scale) + (order_qry, query, qrylength)=readFasta(query_file, scale) + + #====Raise error if features out of bounds + data=initGraph() + if reflength / data['width'] > scale: + estscale = int(reflength / data['width']) + 1 + sys.exit("\n\n! The reference sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + if qrylength / data['width'] > scale: + estscale = int(qrylength / data['width']) + 1 + sys.exit("\n\n! The query sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + + print("Reading alignment file %s ..." % (alignment_file)) + (nocdt, match) = ({},{}) + if alignment_file.endswith("rep"): + (nocdt, match)=readCrossMatch(alignment_file, mismatch, block_length, reference, query, scale) + elif alignment_file.endswith("paf"): + (nocdt, match)=readPAF(alignment_file, mismatch, block_length, reference, query, scale) + else: + print("The alignment file provided (-x %s) does not end in .rep (cross_match) or .paf (PAF) -- fatal" % alignment_file) + sys.exit(1) + + print("done.") + print("Computing Repeat frequencies...") + (freq)=generateCoords(nocdt, reflength, leap, protein) + print("done.") + print("Drawing repeats...") + drawRelationship(reference, order_ref, query, order_qry, match, scale, mismatch, block_length, alignment_file, freq, reflength, leap, format, formatdict, protein, alpha, refgff, qrygff, qrylength, fontpath) + +#--------------------------------------------- +#Main Call + +main() +sys.exit(1) + + diff --git a/v1.2.4/xmv-c.png b/v1.2.4/xmv-c.png new file mode 100644 index 0000000..317522c Binary files /dev/null and b/v1.2.4/xmv-c.png differ diff --git a/v1.2.4/xmv-h.png b/v1.2.4/xmv-h.png new file mode 100644 index 0000000..d3a0f5e Binary files /dev/null and b/v1.2.4/xmv-h.png differ diff --git a/v1.2.4/xmv-logo.png b/v1.2.4/xmv-logo.png new file mode 100644 index 0000000..e381867 Binary files /dev/null and b/v1.2.4/xmv-logo.png differ diff --git a/v1.2.4/xmv.png b/v1.2.4/xmv.png new file mode 100644 index 0000000..00fc529 Binary files /dev/null and b/v1.2.4/xmv.png differ diff --git a/xmatchview-conifer.py b/xmatchview-conifer.py index d786286..99eefd9 100755 --- a/xmatchview-conifer.py +++ b/xmatchview-conifer.py @@ -1017,7 +1017,7 @@ def main(): fontpath=str(v) if (alignment_file == None or reference_file == None or query_file == None or mismatch == 0 or block_length == 0 or scale ==0): - print("Usage: %s v1.2.3" % (sys.argv[0:])) + print("Usage: %s v1.2.4" % (sys.argv[0:])) print("-x alignment file (cross_match .rep or Pairwise mApping Format .paf) ") print("-s reference genome fasta file") print("-q query contig/genome fasta file") @@ -1077,6 +1077,15 @@ def main(): (order_ref, reference, reflength)=readFasta(reference_file, scale) (order_qry, query, qrylength)=readFasta(query_file, scale) + #====Raise error if features out of bounds + data=initGraph() + if reflength / data['width'] > scale: + estscale = int(reflength / data['width']) + 1 + sys.exit("\n\n! The reference sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + if qrylength / data['width'] > scale: + estscale = int(qrylength / data['width']) + 1 + sys.exit("\n\n! The query sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + print("Reading alignment file %s ..." % (alignment_file)) match = {} if alignment_file.endswith("rep"): diff --git a/xmatchview-hive.py b/xmatchview-hive.py index 3c0cce5..a85daab 100755 --- a/xmatchview-hive.py +++ b/xmatchview-hive.py @@ -910,7 +910,7 @@ def main(): alpha = float(v) if (txt_file1 == None or txt_file2 == None or txt_file3 == None or align_file1 == None or align_file2 == None or align_file3 == None or block_length == 0 or scale == 0): - print("Usage: %s v1.2.3" % (sys.argv[0:])) + print("Usage: %s v1.2.4" % (sys.argv[0:])) print("-x alignment file [1 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)") print("-y alignment file [1 vs. 3] (cross_match .rep or Pairwise mApping Format .paf)") print("-z alignment file [3 vs. 2] (cross_match .rep or Pairwise mApping Format .paf)") @@ -992,6 +992,18 @@ def main(): (order2, ref2, length2)=readText(txt_file2, scale) (order3, ref3, length3)=readText(txt_file3, scale) + #====Raise error if features out of bounds + data=initGraph() + if length1 / data['width'] > scale: + estscale = int(length1 / data['width']) + 1 + sys.exit("\n\n! The sequence in %s is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (txt_file1, estscale)) + if length2 / data['width'] > scale: + estscale = int(length2 / data['width']) + 1 + sys.exit("\n\n! The sequence in %s is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (txt_file2, estscale)) + if length3 / data['width'] > scale: + estscale = int(length3 / data['width']) + 1 + sys.exit("\n\n! The sequence in %s is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (txt_file3, estscale)) + mismatch = 100 - seqidentity #====Parse Alignment files diff --git a/xmatchview.py b/xmatchview.py index 743afdd..dcbe978 100755 --- a/xmatchview.py +++ b/xmatchview.py @@ -1004,7 +1004,7 @@ def main(): fontpath=str(v) if (alignment_file == None or reference_file == None or query_file == None or mismatch == 0 or block_length == 0 or scale ==0 or leap == 0): - print("Usage: %s v1.2.3" % (sys.argv[0:])) + print("Usage: %s v1.2.4" % (sys.argv[0:])) print("-x alignment file (cross_match .rep or Pairwise mApping Format .paf) ") print("-s reference genome fasta file") print("-q query contig/genome fasta file") @@ -1064,6 +1064,15 @@ def main(): (order_ref, reference, reflength)=readFasta(reference_file, scale) (order_qry, query, qrylength)=readFasta(query_file, scale) + #====Raise error if features out of bounds + data=initGraph() + if reflength / data['width'] > scale: + estscale = int(reflength / data['width']) + 1 + sys.exit("\n\n! The reference sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + if qrylength / data['width'] > scale: + estscale = int(qrylength / data['width']) + 1 + sys.exit("\n\n! The query sequence is predicted to extend beyond the plot width, you must increase the scale to at least %i --fatal." % (estscale)) + print("Reading alignment file %s ..." % (alignment_file)) (nocdt, match) = ({},{}) if alignment_file.endswith("rep"):