diff --git a/FragMisincorporation_plot.pdf b/FragMisincorporation_plot.pdf
deleted file mode 100644
index e972fb4..0000000
Binary files a/FragMisincorporation_plot.pdf and /dev/null differ
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..ed6195c
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,18 @@
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
+# copies of the Software, and to permit persons to whom the Software is 
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+# SOFTWARE.
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..e217680
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,13 @@
+# file GENERATED by distutils, do NOT edit
+README.txt
+setup.py
+bin/mapDamage
+mapdamage/__init__.py
+mapdamage/align.py
+mapdamage/composition.py
+mapdamage/parseoptions.py
+mapdamage/rscript.py
+mapdamage/seq.py
+mapdamage/tables.py
+mapdamage/version.py
+mapdamage/Rscripts/mapDamage.R
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..ba889eb
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include mapdamage/Rscripts/mapDamage.R
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..52fe42f
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,90 @@
+Introduction
+------------
+
+mapDamage is a Perl script that tracks DNA damage patterns among ancient DNA sequencing reads generated by Next-Generation Sequencing platforms. 
+In addition to the mapDamage Perl script, SAMtools, BEDtools and R are mandatory and must be present in your $PATH.
+
+http://sourceforge.net/projects/samtools/files/samtools/
+
+http://code.google.com/p/bedtools/downloads/list
+
+http://www.r-project.org/
+
+The original page with examples, datasets and result files is there:
+
+http://geogenetics.ku.dk/publications/mapdamage/
+
+News in version 0.3.6
+---------------------
+
+* exchange colors for adenine/cytosine in fragmentation patterns, could be confusing with G>A in blue
+* bug fix with BAM files obtained from tmap and bowtie
+* hard clipping support
+
+Usage
+-----
+
+Three main commands can be used: map, merge and plot. The -c option in map allows to chain all the three steps. A detailed description is presented below:
+
+* 1. map
+
+    ./mapDamage.pl map -i input_SAM -d directory -r reference_fasta -c -t my_title
+
+    REQUIRED
+        -i : input SAM-file without header. SAM format, version 1.4 is described in this pdf file
+        -r : input reference fasta file, with headers identical to the ones in the input_SAM file
+
+    OPTIONAL
+        -l : maximal read length, in nucleotides to consider [default: 70]
+        -a : size, in nucleotides, of the genomic region to be retrieved before and after reads [default: 10]
+        -d : folder for writing output-files, if not already present, this folder will be automatically created [default name: results_input_SAM.filtered]
+        -j : second input SAM-file without header; reads present in both SAM files will not be processed
+        -u : removes all non-unique reads based on the X1 and XT tags from the BWA mapper
+        -k : keep bed file and fasta file containing all the genomic regions [default: delete these files]
+        -f : outputs all read aligned against the reference genome, in a fasta file format
+        -c : complete analysis, map will be automatically followed by both merge and plot steps [default: not active]
+
+        when -c is active, the following are available to control the plot step:
+        -b : the number of reference nucleotides to consider for ploting base composition in the region located upstream and downstream of every read [default: 10]
+        -y : graphical y-axis limit for nucleotide misincorporation frequencies [default: 0.30]
+        -m : read length, in nucleotides, considered for plotting nucleotide misincorporations [default: 25]
+
+* 2. merge
+
+    ./mapDamage.pl merge -d directory
+
+    REQUIRED
+        -d : folder with output-files generated by the map command
+
+        OPTIONAL
+        -c : complete analysis, merge will be automatically followed by the plot step [default: not active]
+
+* 3. plot
+
+    ./mapDamage.pl plot -d directory
+
+    REQUIRED
+        -d : folder with output-files generated by the merge command
+
+    OPTIONAL
+        -l : read length, in nucleotides, considered for plotting nucleotide misincorporations [default: 25]
+        -m : graphical y-axis limit for nucleotide misincorporation frequencies [default: 0.30]
+        -a : the number of reference nucleotides to consider for ploting base composition in the region located upstream and downstream of every read [default: 10]
+        -t : title used for both graph and filename [default: folder name without extension]
+
+An example of FragMisincorporation_plot.pdf  can be downloaded [here](https://github.com/ginolhac/mapDamage/blob/master/FragMisincorporation_plot.pdf?raw=true)
+
+
+Citation
+--------
+If you use this script, please cite the following publication: Ginolhac A, Rasmussen M, Gilbert MT, Willerslev E, Orlando L.
+mapDamage: testing for damage patterns in ancient DNA sequences. Bioinformatics. 2011 27(15):2153-5
+http://bioinformatics.oxfordjournals.org/content/27/15/2153
+
+Limitations
+-----------
+The positions used in fragmentation files are not compatible with read length > 99999 nucleotides. This is compatible with all sequencing technologies currently available.
+Contact
+-------
+Please report bugs and suggest possible improvements to Aurelien Ginolhac by email. 
+
diff --git a/bin/mapDamage b/bin/mapDamage
new file mode 100755
index 0000000..c5ed8c3
--- /dev/null
+++ b/bin/mapDamage
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+# -*- coding: ASCII -*-
+from __future__ import print_function
+
+import sys
+import os
+
+# check if pysam if available
+module = "pysam"
+url = "http://code.google.com/p/pysam/"
+try:
+  __import__(module)
+
+except ImportError, e:
+  sys.stderr.write("Error: Could not import required module '%s':\n\t- %s\n" % (module,e))
+  sys.stderr.write("       If module is not installed, please download from '%s'.\n" % (url,))
+  sys.stderr.write("       A local install may be performed using the following command:\n")
+  sys.stderr.write("       $ python setup.py install --user\n\n")
+  sys.exit(1)
+
+import pysam
+
+import mapdamage
+from mapdamage.version import __version__
+
+
+"""
+
+plot damage patterns from a SAM/BAM file
+
+:Author: Aurelien Ginolhac
+:Contact: aginolhac@snm.ku.dk
+:Date: November 2012
+:Type: tool
+:Input: SAM/BAM
+:Output: tabulated tables, pdf
+
+"""
+
+
+def getCoordinates(read):
+  
+  if read.is_reverse: 
+    fivep = read.aend
+    threep = read.pos
+  else:
+    fivep = read.pos
+    threep = read.aend
+  return(fivep, threep)
+
+
+def getAround(coord, chrom, reflengths, lg, ref):
+  
+  i = j = 0
+  before = after = ""
+  if min(coord) - lg > 0:
+    i = min(coord) - lg
+  if max(coord) + lg > reflengths[chrom]:
+    j = reflengths[chrom]
+  else:
+    j = max(coord) + lg 
+
+  before = ref.fetch(chrom, i, min(coord))
+  after = ref.fetch(chrom, max(coord), j)
+  
+  return(before, after)
+
+
+def writeFasta(read,ref, seq, refseq, start, end, before, after, fout):
+  if read.is_reverse:
+    std = '-'
+  else:
+    std = '+'
+  # output coordinate in 1-based offset
+  fout.write(">%s:%d-%d\n%s\n" % (ref, start-len(before), start, before))
+  fout.write(">%s:%d-%d\n%s\n>%s_%s\n%s\n" % (ref, start+1, end+1, refseq, read.qname, std, seq))
+  fout.write(">%s:%d-%d\n%s\n" % (ref, end+2, end+2+len(after), after))
+
+
+
+""" main """
+
+def main(argv):
+
+  options = mapdamage.parseoptions.options(argv)
+  if not options:
+    return 1
+  
+  # plot using R if results folder already done
+  if options.plotonly:
+    mapdamage.rscript.plot(options)
+    return 0
+
+  # open mapped reads and corresponding reference
+  in_bam = pysam.Samfile(options.filename)
+  ref = pysam.Fastafile(options.ref)
+
+  # for misincorporation patterns, to record mismaches, from 5'-ends
+  misincorp = {}
+  # for fragmentation patterns, record base compositions in the reference around
+  dnacomp = {}
+
+  # fetch all references and associated lengths in nucleotides
+  reflengths = dict(zip(in_bam.references, in_bam.lengths))
+  # initialize table of misincorporations with zeros
+  misincorp = mapdamage.tables.initializeMut(misincorp, reflengths, options.length)
+  # initialise base composition tables with zeros
+  dnacomp =  mapdamage.tables.initializeComp(dnacomp, reflengths, options.around, options.length)
+
+
+  if not options.quiet:
+    print("\tReading from '%s'" % options.filename)
+    if options.verbose:
+      print("\t%d references are assumed in SAM/BAM file, for a total of %d nucleotides" % (len(reflengths), sum(reflengths.values())))
+    print("\tWriting results to '%s/'" % options.folder)
+
+  # reference in BAM must be in the fasta reference file
+  #if not referenceCheck():
+    #sys.stderr.write("Error: %s reference was not found in the reference file provided\n" % (chrom))
+  #  return 1
+
+
+  # open file handler to write alignements in fasta format
+  if options.fasta:
+    # use name of the sam/bam filename without extension
+    ffasta = os.path.splitext(os.path.basename(options.filename))[0]+'.fasta'
+    if not options.quiet:
+      print("\tWriting alignments in '%s'" % ffasta)
+    ff = open(options.folder+"/"+ffasta,"w")
+
+  counter = filtered = 0
+
+  # main loop
+  for read in in_bam:
+    counter += 1
+    
+    if read.is_unmapped: 
+        filtered +=1
+        continue
+
+    # external coordinates 5' and 3' , 0-based offset
+    coordinate = getCoordinates(read)
+    # fetch reference name, chromosome or contig names 
+    chrom = in_bam.getrname(read.tid)
+ 
+    (before, after) = getAround(coordinate, chrom, reflengths, options.around, ref)
+    refseq = ref.fetch(chrom, min(coordinate), max(coordinate))
+    # read.query contains aligned sequences while read.seq is the read itself 
+    seq = read.query 
+ 
+    # add gaps according to the cigar string
+    (seq, refseq) =  mapdamage.align.align(read.cigar, seq, refseq)
+    
+    # reverse complement read and reference when mapped reverse strand
+    if read.is_reverse:
+      refseq = refseq.translate(mapdamage.seq.table)[::-1]
+      seq = seq.translate(mapdamage.seq.table)[::-1]
+      beforerev = after.translate(mapdamage.seq.table)[::-1]
+      after = before.translate(mapdamage.seq.table)[::-1]
+      before = beforerev
+
+    # record soft clippinp when present
+    if len(mapdamage.align.parseCigar(read.cigar, 4)) > 0:
+      mapdamage.align.recordSoftClipping(mapdamage.align.parseCigar(read.cigar, 4), read, chrom, misincorp, options.length)
+    
+    # count misincorparations by comparing read and reference base by base
+    mapdamage.align.getMis(read, seq, refseq, chrom, options.length, misincorp, '5p')
+    # do the same with sequences align to 3'-ends
+    mapdamage.align.getMis(read, seq[::-1], refseq[::-1], chrom, options.length, misincorp, '3p') 
+
+    # compute base composition for genomic regions
+    mapdamage.composition.countRefComp(read, chrom, before, after, dnacomp)
+    # compute base composition for reads
+    mapdamage.composition.countReadComp(read, chrom, options.length, dnacomp)
+
+
+    if options.fasta:
+      writeFasta(read, chrom, seq, refseq, min(coordinate), max(coordinate), before, after, ff)
+
+    if options.verbose:
+      if counter % 50000 == 0:
+        print("\t%10d reads processed (%d unmapped)" % (counter, filtered))
+
+  if not options.quiet:
+    print("\tDone. %d reads read (%d unmapped)" % (counter, filtered))
+
+  # close file handlers
+  in_bam.close()
+  ref.close()
+  if options.fasta:
+    ff.close()
+
+  # open file handlers to output results
+  fmut = open(options.folder+"/"+"misincorporation.txt", 'w')
+  fcomp = open(options.folder+"/"+"dnacomp.txt", 'w')
+
+  # write summary tables to disk
+  mapdamage.tables.printMut(misincorp, options, fmut)
+  mapdamage.tables.printComp(dnacomp, options, fcomp)
+
+  # close file handlers
+  fmut.close()
+  fcomp.close()
+
+  # plot using R
+  if not options.nor:
+    mapdamage.rscript.plot(options)
+
+  return 0
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv[1:]))
diff --git a/dist/mapdamage-0.3.7.tar.gz b/dist/mapdamage-0.3.7.tar.gz
new file mode 100644
index 0000000..cb55f74
Binary files /dev/null and b/dist/mapdamage-0.3.7.tar.gz differ
diff --git a/mapDamage.pl b/mapDamage.pl
deleted file mode 100755
index 0aa3285..0000000
--- a/mapDamage.pl
+++ /dev/null
@@ -1,1574 +0,0 @@
-#!/usr/bin/perl -w
-
-
-# Add option -c to chained the three commands in a row with standard plot
-# added the plot options to be passed from map ->merge -> plot
-# solved read start and end + $around fall apart the reference extremi
-# exchange colors for adenine/cytosine fragmentation patterns, could be confusing with G>A in blue
-
-# This program is a free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation
-
-# BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-# FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-# OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-# PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-# OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-# TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-# PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-# REPAIR OR CORRECTION.
-
-# Authors: Aurelien Ginolhac & Ludovic Orlando
-# Center of Geogenetics
-# University of Copenhagen, Denmark
-# contact: aginolhac@snm.ku.dk
-
-# If you use this script, please cite the associated publication:
-# Ginolhac A, Rasmussen M, Gilbert MT, Willerslev E, Orlando L. 2011
-# mapDamage: testing for damage patterns in ancient DNA sequences. Bioinformatics. 2011 27(15):2153-5
-
-use strict;
-use warnings;
-use Getopt::Std;
-
-my $version = "0.3.6";
-
-&printUsage if (@ARGV < 1);
-
-my $command = shift(@ARGV);
-my %func = (map=>\&map, merge=>\&merge, plot=>\&plot);
-
-die("Unknown command \"$command\", expected: map | merge | plot\n") if (!defined($func{$command}));
-
-&{$func{$command}};
-exit(0);
-
-# Main map subroutine ###
-
-sub map {
-
-my %opts = (i=>undef, d=>undef, l=>70, a=>10, f=>undef, r=>undef, j=>undef, u=>undef, k=>undef, c=>undef, m=>undef, b=>undef, t=>undef, n=>-1 );
-
-getopts('i:d:r:fl:a:j:ukcm:b:y:t:n:',\%opts);
-
-if(defined($opts{i}) and defined($opts{r})) { # mandatory options
-    
-    
-my $inputAncient = $opts{i};
-
-my $folder;
-my $dbFasta = $opts{r};
-if(defined($opts{a})){
-	if($opts{a} !~/^[0-9]+$/ ) {die("around is expected to be an integrer\n");}
-	if($opts{a} > 100) {print"Warning: the around option was set to $opts{a}, are you sure to retrieve such large regions? continuing anyway\n";}
-	if($opts{a} < 0) {die("The around position must be > -1 nucleotide\n");}
-}
-if(defined($opts{l})){
-	if($opts{l} !~/^[0-9]+$/) {die("length is expected to be an integrer\n");}
-	if($opts{l} < 1) {die("The length must be > 1 nucleotide\n");}
-	if($opts{l} > 99998) {die("The read length choosen, $opts{l}, is too large. Must be < 99998 nucleotides\n");}
-}
-
-
-my $cpus = $opts{t};
-my $around = $opts{a};
-my $seqLength = $opts{l};
-my $read = {};
-my $read2 = {};
-my $lengthDistrib = {};
-my $pattern = 'nuclMis';
-my $chromo	= 'hitPerChrom';
-my $summary = 'dnaFrag';
-my $classN = 'readComp';
-my $misinc  = 'nuclSumMis';
-my $types	= 'nuclComp';
-my $typesRev	= 'nuclCompReverse';
-my $max=0;
-my $cpt = 0;
-my $index = 0;
-my $i = 0;
-my $j=0;
-
-
-&checkExecutable;
-
-# exit if files are empty
-&checkFileEmpty($inputAncient);
-&checkFileEmpty($dbFasta);	# inspect reference file, size
-my $reference = &checkFastaRef($dbFasta); # inspect reference file, format
-
-
-# First, exclude hits which matched both on ancient & second genome
-# write no second hits in a new file
-# first will be indexed by the read name 
-
-my $fileOUT = $inputAncient.'.filtered';
-$fileOUT =~ s/\.bam/.sam/;
-my $unique = 0;
-my $first; 
-if( defined($opts{u})) { # if user want to remove non-unique hits ie., presence of suboptimal hits in X1 tag
-	$unique = 1;
-	$first = readSAMfile($inputAncient,$unique);
-	&writeSamFile ($first,$fileOUT);
-}
-else { # if nothing to remove, just use the file
-	$fileOUT = $inputAncient;
-	if($fileOUT =~ /\.bam$/) {$fileOUT = "samtools view $inputAncient | "; }
-}
-
-
-if(defined($opts{j})) {
-	if(!defined($opts{u})) {
-		$first = readSAMfile($inputAncient,$unique);
-		$fileOUT = $inputAncient.".filtered";
-		$fileOUT =~ s/\.bam/.sam/;
-	}
-	
-	my $inputFile  = $opts{j};
-	&checkFileEmpty($inputFile);
-	# second will be indexed by the read name 
-	my $second  = readSAMfile($inputFile,$unique);
-	# now compare 
-	&compare2SamFiles ($first,$second,$fileOUT);
-	undef($second); # free memory
-}
-
-# free memory
-undef($first);
-
-# filtered file written, trimmed for multiple hits or / and second file
-open(F_READ,"$fileOUT") or die ("Cannot open main file: $fileOUT\n");
-
-$index=0; 
-
-# initializing lengthDistrib & opening file to write distrib
-$lengthDistrib = initializeHash(0,1000,'+','-',%$lengthDistrib);
-
-while ( my $line = <F_READ>  ){ # store in RAM read information
-		
-	chomp($line);
-	next if ( $line  =~ m/^@/); 	# if SAM file provided with hearder, skip it
-	my @temp = split(/\t/,$line);
-	next if($temp[1] & 0x0004) ; # if no filtering was performed, skip at least unmapped read
-	### reading SAM information per read, 0-offset
-	# 0 read name, 
-	# 1 strand
-	# 2 genomic reference
-	# 3 position, strand forward 1-based leftmost
-	# 4 score alignement MAQ
-	# 5 CIGAR, alignment code
-	# 6 mate reference, forget about it 
-	# 7 1-based position of mate, forget about it
-	# 8 inferred insert size, forget about it
-	# 9 read sequence
-	# 10 read quality phred score
-	$index++;	# increment counter, read will be indexed by 1-offset
-
-	# Reservoir sampling, limit number of reads to value set via -n if positive
-	if (($opts{n} > 0) && ($index > $opts{n})) {
-	    $cpt=int(rand($index)) + 1;
-	    next if ($cpt > $opts{n});
-	} 
-	else {
-	    $cpt=$index;
-	}
-	
-	if($temp[1] & 0x0010) {	$read->{$cpt}{strand} = 16; } # at this step, unmapped read were removed
-	else { $read->{$cpt}{strand} = 0; }
-	$temp[2] =~ s/[:\|\s]//g;  # character '|' or others must be removed because of their interpretation by the shell 
-	# the reference name must be same
-	die("The reference $temp[2] doesn't correspond to any chromosomes in the reference, be sure to map and run mapDamage with the same reference file\n") if (!defined( $reference->{$temp[2]}));
-	$read->{$cpt}{ref} = $temp[2];	
-	$read->{$cpt}{name} = $temp[0]; # keep track of read name
-
-	if ( $temp[3] =~ m/([0-9]+)/ ) {
-		$read->{$cpt}{startPos} = $1; 
-	}
-	else { print "Problem with SAM format(position field)! $temp[3]\n";exit 1 }
-	if(defined($temp[5] )){
-		if ( $temp[5] =~ m/([0-9]+[MIDNSHP]+)/ ) { $read->{$cpt}{cigar} = $temp[5]; }
-	}
-	else { print "Problem with SAM format(CIGAR field)! $line\n";exit 1 }
-	if ( $temp[9] =~ m/([acgtnACGTN]+)/ ) { $read->{$cpt}{seq} = uc($1); } # put sequence in upper case
-	else { print "Problem with SAM format(sequence field)! $temp[9]\n";exit 1 }
-	my $posEnd = $temp[3] + length($temp[9]); 
-	$read->{$cpt}{endPos} = $posEnd;
-}
-close F_READ;
-print keys(%$read) . " reads put into RAM\n";
-
-
-foreach my $value (values(%$read)) {
-    # compute size length 
-    my $lg = length($value->{seq});
-    if($max < $lg) {
-	$max = $lg;
-    }
-
-    if ( $value->{strand} & 0x0010) {
-	$lengthDistrib->{'-'}{$lg}++;	
-    }	
-    else {
-	$lengthDistrib->{'+'}{$lg}++;
-    }	    
-}				
-
-
-
-# before writing results, create folder
-# with user name if provided via -d option
-if (defined($opts{d})) {$folder = $opts{d};}
-elsif(defined($opts{u})){ $folder = "results_".$opts{i}.".filtered"; $folder =~ s/\.bam/.sam/;}
-else {$folder  = "results_".$opts{i}; $folder =~ s/\.bam/.sam/;}
-mkdir($folder);
-
-# Cannot proceed if the BAM was empty
-if ($cpt == 0) {
-    print "Cannot proceed: Input was did not contain any records.\n";
-    open(FLAG,">$folder/README") or die "Cannot open file: $folder/README\n";
-    print FLAG "Could not generate tables: Input was did not contain any records.\n";
-    close FLAG;
-    exit 0;
-}
-
-# write Distriblength one file per strand
-open(PLUS,">$folder/lengthDistribStrd+.txt") or die "Cannot open file: $folder/lengthDistribStrd+.txt\n";
-print PLUS "Length\tOccurences\n";
-foreach my $lg (0..$max) {
-	print PLUS " $lg\t".$lengthDistrib->{'+'}{$lg}."\n";
-}
-close PLUS;
-open(MINUS,">$folder/lengthDistribStrd-.txt") or die "Cannot open file: $folder/lengthDistribStrd-.txt\n";
-print MINUS "Length\tOccurences\n";
-foreach my $lg (0..$max) {
-	print MINUS " $lg\t".$lengthDistrib->{'-'}{$lg}."\n";
-}
-close MINUS;
-
-# aware user if computation doesn't encompass the whole data, ie reads length > 70 bp
-if ( $max > $seqLength ) {	
-	print "#######\nWARNING! some reads are larger than the choosen length for computation ($max > $seqLength)\n#######\n";
-}
-print "recovery of genomic regions (-$around to +$around nt around match alignment)\n";
-open(F_FASTA,"$dbFasta") or die "Cannot open file: $dbFasta\n";
-
-my $bedFile = "$folder/genomic_regions.bed";
-open(BED, ">$bedFile") or die ("Can't write the bed file $bedFile\n");
-
-foreach my $key (sort {$a <=> $b} keys(%$read))	{
-	
-	# first round of looking at the CIGAR field to get the exact genomic coordinates to be retrieved
-	my (@CIG) =  $read->{$key}{cigar} =~ m/\d+\w/g;
-   
-	my $nbDel = 0;
-	my $nbIns =0;
-  
-	foreach my $var (@CIG)   {  # foreach group of integrer[MDI] ie, Match, Deletion or Insertion
-	
-		if (my ($I) = $var =~ m/(\d+)[IS]/)  {$nbIns  += $I;  } # if an insertion or a soft clipping, count how many
-		elsif (my ($D) = $var =~ m/(\d+)D/)  { $nbDel  += $D;  } # if an deletion, count how many
-	}
-	# then define les region limits to retrieve
-	my $start = ($read->{$key}{startPos}-$around-1); # bedtools is 0-offset based
-	if( $start < 0 ) {
-		#print "Problem with starting position, set to zero!($start for $key ".$read->{$key}{startPos}.")\n";
-		$start = 0;
-		#print "for $read->{$key}{name} left! $read->{$key}{startPos} flag $read->{$key}{strand}\n ";
-		$read->{$key}{shortLeft} = $read->{$key}{startPos} - 1; # keep track of how many bases we actually get
-	}
-	# for end position, adjust for gaps, ie., minus gap number in reference, add gap number in read
-	my $end = ($read->{$key}{endPos}+$around+$nbDel-$nbIns-1);
-	my $ref = $read->{$key}{ref};
-	# if the reach the reference end, stop when needed
-	if($end >= $reference->{$ref}) {
-		$end = $reference->{$ref}-1 ;
-		#print "I for $read->{$key}{name} right! $end and endpos ".($read->{$key}{endPos}+$nbDel-$nbIns)." pr ref $ref flag $read->{$key}{strand}\n";
-		$read->{$key}{shortRight} = abs($end - ($read->{$key}{endPos}+$nbDel-$nbIns) + 1); # keep track of how many bases we actually get
-		
-	}
-	# soft clipping at right position could be missing, adjust for indels
-	if(($read->{$key}{endPos}+$nbDel-$nbIns) >  $reference->{$ref} ) {
-		if ( $read->{$key}{strand} & 0x0010 ) {
-			$read->{$key}{short5} = ($read->{$key}{endPos}+$nbDel-$nbIns) -  $reference->{$ref};
-		}
-		else {
-			$read->{$key}{short3} = ($read->{$key}{endPos}+$nbDel-$nbIns) -  $reference->{$ref};
-		}
-		$read->{$key}{shortRight} = 0 ;
-		#print "II for $read->{$key}{name} ".($read->{$key}{endPos}+$nbDel-$nbIns)." sup $read->{$key}{ref}, $read->{$key}{short5} ###################\n"; 
-	}
-	print BED "$ref\t$start\t$end\n";
-}
-close BED;
-print "Genomic regions were written in $bedFile\n";
-print "Retrieving genomic regions using fastaFromBed, could take a while...\n";
-&checkFileEmpty($bedFile);
-open(BED, "$bedFile") or die("Cannot read file $bedFile\n");
-my $bedFasta = "$folder/genomic_sequences.tab";
-system("fastaFromBed -fi $dbFasta -bed $bedFile -fo $bedFasta -tab");
-&checkFileEmpty($bedFasta);
-print "All genomic regions retrieved and written in $bedFasta, reading this file...\n";
-open(BFASTA, "$bedFasta") or die ("Cannot read file $bedFasta\n");
-$cpt = 0;
-my $reference2 = {};
-while ( my $line = <BFASTA>) {
-	
-	$cpt++;
-	chomp($line);
-	my ($ref_coord, $seq) = split(/\t/, $line);
-	my @ref = split(/:/, $ref_coord);
-	if ( $seq eq "") { 
-		die("Problem reading genomic sequence! ($dbFasta $ref[0])\n"); # exit if sequence empty
-	}	
-	if ( $read->{$cpt}{ref} ne $ref[0]) { # QC is we read in a same order
-		die("Problem reading genomic sequence! ($ref[0] doesn't match ".$read->{$cpt}{ref}." for ".$read->{$cpt}{name}.")\n"); 
-	}
-	$reference2->{$ref[0]} = $reference->{$ref[0]}; # keep only the references we need if many
-	$seq = uc($seq); # MUST BE IN UPPERCASE FOR COMPARISON READ<->REFERENCE
-	$seq =~ s/[\n\r\e\t]//g;	 # erase carrier return or any space characters if any
-	my $seq2 = $read->{$cpt}{seq};
-	my (@CIG) =  $read->{$cpt}{cigar} =~ m/\d+\w/g;
-   
-	my $nbDel = 0;
-	my $nbIns =0;	
-	my $curr_pos = 0; 
-	my $c5p=0;
-	my $before = $around;
-	my $after = $around;
-	if(defined($read->{$cpt}{shortRight})) { 
-		if ( $read->{$cpt}{strand} & 0x0010 ) {
-			$before = $read->{$cpt}{shortRight};
-		}
-		else {
-			$after = $read->{$cpt}{shortRight};
-		}
-		#print "for $read->{$cpt}{name} revcomp $before $after\n ";
-	}
-	
-	if(defined($read->{$cpt}{shortLeft})) { 
-		if ( $read->{$cpt}{strand} & 0x0010 ) {
-			$after = $read->{$cpt}{shortLeft};
-		}
-		else {
-			$before = $read->{$cpt}{shortLeft};
-		}
-		#print "for $read->{$cpt}{name} $before $after\n ";
-	}
-	
-	foreach my $var (@CIG) { 
-		
-		if (my ($M) = $var =~ m/(\d+)[MX=]/) { $curr_pos += $M; } # match, only update the current position
-		elsif (my ($I) = $var =~ m/(\d+)I/)  {
-			
-			if(($curr_pos+$before-$c5p) > length($seq)) {
-				substr($seq,length($seq),0,"-" x $I); 
-			}
-			else {
-				substr($seq,($curr_pos+$before-$c5p),0,"-" x $I);  #insert x gap in the REFERENCE since insertion. Must correct with $around and soft clipping
-			}
-			$curr_pos  += $I;
-			$nbIns += $I;
-			#print "insert $I gaps at $curr_pos - $c5p ".($curr_pos+$before-$c5p)."\n";
-		}
-		elsif (my ($D) = $var =~ m/(\d+)D/) {
-			substr($seq2,$curr_pos,0,"-" x $D); #insert x gap in the READ since deletion
-			$curr_pos  += $D;
-			$nbDel += $D;
-		}  
-		elsif( my ($S) = $var =~ m/(\d+)S/) {
-			
-			if( $curr_pos == 0 ) {
-				$c5p=$S;
-				if($read->{$cpt}{strand} & 0x0010 ) { $read->{$cpt}{softClip3} = $S; }
-				else {  $read->{$cpt}{softClip5} = $S;}
-			}
-			else {	if($read->{$cpt}{strand} & 0x0010 ) { $read->{$cpt}{softClip5} = $S; }
-				else {  $read->{$cpt}{softClip3} = $S;}
-			}
-			$curr_pos  += $S;
-		}
-	}
-
-	# if read matches complementary reverse seq
-	# same for read sequence which was reverse complemented
-	if ( $read->{$cpt}{strand} & 0x0010 ) {
-		# reverse REFERENCE sequence
-		$seq =~ tr/ACGT/TGCA/;
-		$read->{$cpt}{seqRef} = reverse( $seq );
-		# reverse READ sequence
-		$seq2 =~ tr/ACGT/TGCA/;
-		$read->{$cpt}{seq} = reverse( $seq2 );
-	}
-	else {
-		$read->{$cpt}{seqRef} = $seq;
-		$read->{$cpt}{seq} = $seq2;
-	}
-	
-	# create after and before regions for handy computation
-	# {before} will contain the $around nucleotides just before the read
-	# and {after} will contain the $around nucleotides just after the read
-	$read->{$cpt}{before} = substr($read->{$cpt}{seqRef},0,$before); 
-	my $summ = (length($read->{$cpt}{seqRef})-$after);
-	$read->{$cpt}{after} = substr($read->{$cpt}{seqRef},$summ,$after);
-	if(($cpt%100000) == 0) {printf("%8d genomic regions computed\n",$cpt); }
-}
-close BFASTA;
-
-if(!defined($opts{k})) { 
-	system("rm $bedFasta $bedFile");
-	print"$bedFasta $bedFile deleted (use -k to keep these files)\n";
-}
-print "All genomic regions put in memory, computing and writing results in $folder\n";
-close (F_FASTA); # can close the reference genome file
- 
-if( defined($opts{f})) {
-	# if -f option, open fasta file handle
-	open(FILEFASTA,">$inputAncient.fasta") or die "Cannot open file: $dbFasta\n";
-	foreach my $key (sort {$a <=> $b} keys(%$read)) {
-		die("The ref sequence for ".$read->{$key}{name}." wasn't retrieved\n") if (!defined($read->{$key}{before}));
-		my $clip5p = 0;
-		my $clip3p = 0;
-		if(defined($read->{$key}{softClip5})) { # adjust the alignment with clipped bases
-			$clip5p = $read->{$key}{softClip5};
-		}
-		if(defined($read->{$key}{softClip3})) { 
-			$clip3p = $read->{$key}{softClip3};
-		}
-		#print "".$read->{$key}{name}." CIG $read->{$key}{cigar} std $read->{$key}{strand} lg ".length($read->{$key}{seq})." clip5 $clip5p clip3 $clip3p\n";
-		print FILEFASTA ">Ref_before\n".$read->{$key}{before}."\n";
-		print FILEFASTA ">Ref_".$read->{$key}{ref}."\n";
-		my $ref = $read->{$key}{seqRef};
-		
-		if(defined($read->{$key}{short5})) {
-			substr($ref,0,0,'-' x $read->{$key}{short5});  
-		}
-		if(defined($read->{$key}{short3})) {
-			substr($ref,length($ref),0,'-' x $read->{$key}{short3});
-		}
-		print FILEFASTA "$ref\n";
-		# strand is appended to the read name
-		print FILEFASTA ">".$read->{$key}{name}."_".$read->{$key}{strand}."\n";
-		my $gap = 0;
-		while ($gap < length($read->{$key}{before}) - $clip5p){
-			print FILEFASTA "-";
-			$gap++
-		}
-		print FILEFASTA "".$read->{$key}{seq}."\n>Ref_after\n";
-		$gap = 0;
-		while ($gap <(length($read->{$key}{before})+length($read->{$key}{seq})-$clip3p-$clip5p)){
-			print FILEFASTA "-";
-			$gap++
-		}
-		print FILEFASTA "".$read->{$key}{after}."\n";
-	}
-	close (FILEFASTA);
-	print"Alignments were written in the fasta file: $inputAncient.fasta\n";
-	
-}
-$cpt = 0;
-undef($reference); # useless now
-
-
-### Compute damage tracking ###
-
-# Compute per chromosome
-foreach my $chr (sort {$a cmp $b } keys(%$reference2)) {
-
-	$cpt++;
-	### Composition for each read
-	my $file_OUT	= $classN.'_'.$chr.'.txt';
-	open(FILE_OUT,">$folder/$file_OUT") or die ("Cannot write result file\n");
-	print FILE_OUT "Seq\tLength\tAl\tCl\tGl\tTl\tdell\tinsl\tAr\tCr\tGr\tTr\tdelr\tinsr\n";
-
-	### Hits per chromosome
-	my $filehit = $folder.'/'.$chromo.'_'.$chr.'_'.$around.'_'.$seqLength.'.txt';
-	open(FILEHIT,">$filehit") or die ("Cannot write result file\n");
-	print FILEHIT "Chr\tStrd\tNb\tLength\tMax\n";
-	my $read2 = {}; 
-	# new hash readRev which contains reads and ref in the reverse WITHOUT complementation
-	# in order to align left and avoid stair-like effects with reads of different lengths
-	my $readRev = {};
-	my $cptMinus=0;
-	my $cptPlus=0;
-	my $totLengthMinus=0;
-	my $totLengthPlus=0;
-	my $chrLength= 0;
-	# fetch chr length
-	$chrLength = $reference2->{$chr};
-	
-	foreach my $key (keys(%$read)) {
-			
-		# look for read which have mapped to the current chromosome
-		if(defined($chr) and defined($read->{$key}{ref})){
-			if ( $read->{$key}{ref} eq $chr ){
-				
-				if(defined($read->{$key}{softClip5})) {$read2->{$key}{softClip5}= $read->{$key}{softClip5} ; }
-				if(defined($read->{$key}{softClip3})) {$read2->{$key}{softClip3}= $read->{$key}{softClip3} ; }
-				$read2->{$key}{before} = $read->{$key}{before};
-				$read2->{$key}{after} = $read->{$key}{after};
-				$readRev->{$key}{seq} = reverse($read->{$key}{seq});
-				$readRev->{$key}{seqRef} = reverse($read->{$key}{seqRef});
-				# count hit per chr on both strands
-				if($read->{$key}{strand} & 0x0010){
-					$cptMinus++;
-					$totLengthMinus += length($read->{$key}{seq});
-					if(defined($read2->{$key}{softClip5})) { # don't count soft clipping
-						$totLengthMinus -= $read2->{$key}{softClip5};
-					}
-					if(defined($read2->{$key}{softClip3})) {
-						$totLengthMinus -= $read2->{$key}{softClip3};
-					}
-				}
-				else { 
-					$cptPlus++;
-					$totLengthPlus += length($read->{$key}{seq});
-					if(defined($read2->{$key}{softClip5})) {
-						$totLengthPlus -= $read2->{$key}{softClip5};
-					}
-					if(defined($read2->{$key}{softClip3})) {
-						$totLengthPlus -= $read2->{$key}{softClip3};
-					}
-				}
-			}
-		}
-		
-	}
-	print FILEHIT "$chr\t+\t$cptPlus\t$totLengthPlus\t$chrLength\n"; # write hitPerChrom data
-	print FILEHIT "$chr\t-\t$cptMinus\t$totLengthMinus\t$chrLength\n";
-	close (FILEHIT);
-	
-	# hash table which will contain the composition of read and around 
-	# nuclCompRev will only contain reversed read to align from 3'
-	my %nuclComp;
-	my %nuclCompRev;
-	foreach my  $strd (0,16){ ### Initializing both hash tables
-		foreach my $i (1..$around){
-			foreach my $base ('A','C','G','T','-','+','sum'){ # '-' means deletion in the read, '+' means insertion in the read (= gap in the reference sequence)
-				$j = 99999+$i;
-				${${$nuclComp{-$i}}{$strd}}{$base} = 0;
-				${${$nuclComp{$j}}{$strd}}{$base} = 0;	
-			}
-		}
-	}	
-	foreach my $base ('A','C','G','T','-','+','sum'){
-	
-		foreach my $i (1..$seqLength){
-		
-			foreach my $strd (0,16){
-			
-				${${$nuclComp{$i}}{$strd}}{$base} = 0;
-				# same for reverted
-				${${$nuclCompRev{$i}}{$strd}}{$base} = 0;
-			
-			}
-		}
-	}	
-	
-	foreach my $key (keys(%$read2)) { # do the damage tracking for the read of current chromosome
-	
-		### nuclComp, nuclCompReverse files and readComp files
-		
-		my %localACGT;	# hash indexed by position, per strand and per base for the read for readComp file
-		my %regionACGT;	# idem for region, ie before + read + after for readComp file
-		
-		foreach my $letter ('A','C','G','T','-','+'){ # initialize Hash
-			$localACGT{$letter} = 0;
-			$regionACGT{$letter} = 0;
-		}
-		my @temp = split(//,$read->{$key}{before}); # compute base composition of region BEFORE
-		my $strand = $read->{$key}{strand};
-		foreach my $i (1..scalar(@temp))	{
-			${${$nuclComp{-scalar(@temp)+$i-1}}{$strand}}{$temp[$i-1]}++; 
-			#print "before read $read->{$key}{name} letter $temp[$i-1] pos $i record (".(-scalar(@temp)+$i-1).")\n"
-		}
-		@temp = split(//,$read->{$key}{after}); # compute base composition of region AFTER
-		foreach my $i (1..scalar(@temp))	{
-			${${$nuclComp{99999+$i}}{$strand}}{$temp[$i-1]}++; 
-			#print "after read $read->{$key}{name} letter $temp[$i-1] pos $i (".(99999+$i).")\n"
-		}
-		
-		my $nbDel=0;
-		my $start=0;
-		my $end=0;
-		if(defined($read->{$key}{softClip5})) { $start = $read->{$key}{softClip5} ;}
-		if(defined($read->{$key}{softClip3})) { $end = $read->{$key}{softClip3} ;}
-		#print " $start-1, ".length($read->{$key}{seq})." - $start - $end  \n";
-		my $cleanSeq= substr($read->{$key}{seq}, $start, (length($read->{$key}{seq}) - $start - $end +1) );
-		@temp = split(//,$cleanSeq); # compute base composition for read sequence
-		my @tempRev = reverse(@temp);	# reverse sequence for nuclCompRev
-
-		my $newSeqLength = $seqLength; # avoid access to not allocated table, adjust to the read length
-		if ( scalar(@temp) < $seqLength ) {
-			$newSeqLength = scalar(@temp)-1;
-		}
-		#print "st $start end $end $newSeqLength lg ".$#temp."\n";
-		foreach my $i (1..$newSeqLength){
-			${${$nuclComp{$i}}{$strand}}{$temp[$i-1]}++;
-			${${$nuclCompRev{$i}}{$strand}}{$tempRev[$i-1]}++;
-			$localACGT{$temp[$i-1]}++;
-			if( $temp[$i-1] eq '-'){
-				$nbDel++;	# count deletion to get number of letter in sequence, without gaps
-			}
-		}
-		# Counting of A, C, T, G and deletion ('-') done, now count insertion number ('+') into read sequences
-		# which means count the gap number into the REFERENCE sequence adjusted to the read sequence limits
-		die("The ref sequence for ".$read->{$key}{name}." wasn't retrieved\n") if (!defined($read->{$key}{seqRef}));
-		my @tempREF = split(//,$read->{$key}{seqRef});
-		my $nbIns=0; # reset counter
-		foreach my $i (0..length($read->{$key}{seqRef})-1){
-			#print "seq $key\tpos $i\t$tempREF[$i]\n";
-			if(!defined($tempREF[$i])) {print "!! $i name ".$read->{$key}{name}."\n";}
-			$regionACGT{$tempREF[$i]}++; # compute reference matches read
-			if( $tempREF[$i] eq '-'){
-				${${$nuclComp{$i}}{$strand}}{'+'}++;
-				$localACGT{'+'}++;
-				
-				$nbIns++;	# count insertion to get number of letter in reference, without gaps
-			}
-		}	
-		# do not count deletion in the length, neither soft clipped bases
-		my $long = (length($read->{$key}{seq})-$nbDel - $start - $end);
-		
-		foreach my $letter ('A','C','G','T','-','+'){
-			
-			if($long != 0){ # compute base computation
-				
-				$regionACGT{$letter} = $regionACGT{$letter}  /  (length($read->{$key}{seqRef}) - $nbIns) ;
-				$localACGT{$letter}  = $localACGT{$letter}  / $long;
-			}
-		}
-		print FILE_OUT "".$read->{$key}{name}."\t$long";
-		foreach my $letter ('A','C','G','T','-','+')	{ 
-			if(defined($localACGT{$letter})){printf FILE_OUT "\t%.13f",$localACGT{$letter}; }
-		}
-		foreach my $letter ('A','C','G','T','-','+')	{ 
-			if(defined($regionACGT{$letter})){ printf FILE_OUT "\t%.13f",$regionACGT{$letter}; }
-		}
-		print FILE_OUT "\n";
-	}
-	close(FILE_OUT);	
-
-	#   nucleotide composition
-	my $fileOUT = $summary.'_'.$chr.'_'.$around.'_'.$seqLength.'.txt';
-
-	open(FILEOUT,">$folder/$fileOUT") or die ("Cannot write result file\n");
-	print FILEOUT "Strd\tPos\tA\tC\tG\tT\tdel\tins\tTot";
-	
-	foreach my $i (1..$around){ # sum base composition per strand for before and after
-		foreach my $base ('A','C','G','T','-','+'){
-			foreach my $strd (0,16){
-																					  
-				if(defined(${${$nuclComp{-$i}}{$strd}}{sum}) and defined(${${$nuclComp{-$i}}{$strd}}{$base})){ # sum for before region
-					${${$nuclComp{-$i}}{$strd}}{sum} = ${${$nuclComp{-$i}}{$strd}}{sum} + ${${$nuclComp{-$i}}{$strd}}{$base};
-				}
-				if(defined(${${$nuclComp{99999+$i}}{$strd}}{sum}) and defined(${${$nuclComp{99999+$i}}{$strd}}{$base})){ # sum for after region
-					${${$nuclComp{99999+$i}}{$strd}}{sum} = ${${$nuclComp{99999+$i}}{$strd}}{sum} + ${${$nuclComp{99999+$i}}{$strd}}{$base};
-				}
-			}
-		}
-	}
-	# now write result in the file
-	foreach my $strd (0,16){
-		foreach my $i (1..$around)	{
-			print FILEOUT "\n$strd\t-$i"; 
-			foreach my $base ('A','C','G','T','-','+','sum')	{ # for before region
-				if(defined(${${$nuclComp{-$i}}{$strd}}{$base})){
-					print FILEOUT "\t${${$nuclComp{-$i}}{$strd}}{$base}"; 
-				}
-			}
-			$j = 99999+$i;
-			print FILEOUT "\n$strd\t$j"; 
-			foreach my $base ('A','C','G','T','-','+','sum')	{ # for after region
-				if(defined(${${$nuclComp{$j}}{$strd}}{$base})){
-					print FILEOUT "\t${${$nuclComp{$j}}{$strd}}{$base}"; 
-				}
-			}
-		}
-	}
-	# compute sum for sequence
-	foreach my $i (1..$seqLength)
-	{
-		foreach my $base ('A','C','G','T','-','+')
-		{
-			foreach my $strd (0,16)
-			{
-				if(defined(${${$nuclComp{$i}}{$strd}}{sum}) and defined( ${${$nuclComp{$i}}{$strd}}{$base})) {
-					${${$nuclComp{$i}}{$strd}}{sum} = ${${$nuclComp{$i}}{$strd}}{sum} + ${${$nuclComp{$i}}{$strd}}{$base};
-				}
-				if(defined(${${$nuclCompRev{$i}}{$strd}}{sum}) and defined( ${${$nuclCompRev{$i}}{$strd}}{$base})) {
-					${${$nuclCompRev{$i}}{$strd}}{sum} = ${${$nuclCompRev{$i}}{$strd}}{sum} + ${${$nuclCompRev{$i}}{$strd}}{$base};
-				}
-			}
-		}
-	}
-	# write results for nucleotide composition by strand
-	foreach my $strd (0,16)
-	{
-		foreach my $i (1..$seqLength)	
-		{
-			print FILEOUT "\n$strd\t$i"; 
-			foreach my $base ('A','C','G','T','-','+','sum')	{ 
-				if(defined(${${$nuclComp{$i}}{$strd}}{$base})){
-					print FILEOUT "\t${${$nuclComp{$i}}{$strd}}{$base}"; 
-				}
-			}
-		}
-	}
-	# write results for nucleotide composition by strand for reverse sequence
-	# arbitratry choose that this reversed sequence is from position 200000 to 200000 + $seqLength
-	foreach my $strd (0,16)
-	{
-		foreach my $i (1..$seqLength)	
-		{
-			print FILEOUT "\n$strd\t".($i+199999).""; 
-			foreach my $base ('A','C','G','T','-','+','sum')	{ 
-				if(defined(${${$nuclCompRev{$i}}{$strd}}{$base})){
-					print FILEOUT "\t${${$nuclCompRev{$i}}{$strd}}{$base}"; 
-				}
-			}
-		}
-	}
-	print FILEOUT "\n";
-	close(FILEOUT);
-
-	# Misincorporations
-	my %nuclmis;
-	my %basesG;
-	my %nuclmisRev;
-	my %basesGRev;
-	my $fileMismOUT	= $types.'_'.$chr.'.txt';
-	my $fileMismRevOUT	= $typesRev.'_'.$chr.'.txt';
-	open(FILEMISMOUT,">$folder/$fileMismOUT") or die ("Cannot write result file\n");
-	open(FILEMISMREVOUT,">$folder/$fileMismRevOUT") or die ("Cannot write result file\n");
-	# initialising
-	foreach my $key  ('A','C','G','T')	{ 
-		foreach my $position (0..$seqLength-1)	{ 
-			${$basesG{$position}}{$key} = 0;
-			${$basesGRev{$position}}{$key} = 0;
-		} 
-	}
-	foreach my  $key2 ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') {
-		foreach my $position (0..$seqLength-1)	{ 
-			${$nuclmis{$key2}}{$position} = 0 ; 
-			${$nuclmisRev{$key2}}{$position} = 0 ; 
-		}
-	} 
-	
-	# file open for mismatch pattern
-	$fileOUT = $folder.'/'.$pattern.'_'.$chr.'_'.$around.'_'.$seqLength.'.txt';
-	open(FILEOUT,">$fileOUT") or die ("Cannot write result file\n");
-	my %mismPos;
-	my %total;
-	my $fileTOT = $folder.'/'.$misinc.'_'.$chr.'_'.$around.'_'.$seqLength.'.txt';
-	open(FILETOT,">$fileTOT") or die ("Cannot write result file\n");
-	
-	foreach my $strand(0,16){
-		foreach my  $key2 ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') {
-			foreach my $position (0..$seqLength-1)	{ 
-				${${$mismPos{$key2}}{$position}}{$strand} = 0;
-	
-			}
-		}
-	}
-	# initializing hash tables to avoid missing values
-	foreach my $strand (0,16) {
-		foreach my $key2 ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S','total')
-		{
-			${$total{$key2}}{$strand}=0;
-		}
-	}
-		
-	foreach my $key (keys(%$read2)){
-		
-		my $strd = $read->{$key}{strand};	
-		my $clip5p=0; 
-		my $clip3p=0; 
-		if(defined($read->{$key}{softClip5})) { 
-			$clip5p = $read->{$key}{softClip5} ;
-			for (my $i=0; $i < $clip5p; $i++) {
-				${${$mismPos{'S'}}{$i}}{0}++;	
-				${$nuclmis{'S'}}{$i}++;
-				${$total{'S'}}{0}++;	
-				${$total{total}}{0}++;					
-			}
-		}
-		if(defined($read->{$key}{softClip3})) { 
-			$clip3p = $read->{$key}{softClip3} ;
-			for (my $i=0; $i < $clip3p; $i++) {
-				${${$mismPos{'S'}}{$i}}{16}++;	
-				${$total{'S'}}{16}++;		
-				${$nuclmisRev{'S'}}{$i}++;
-				${$total{total}}{16}++;	
-			}			
-		}
-
-		my $seqREF = substr($read->{$key}{seqRef},length($read->{$key}{before}),(length($read->{$key}{seqRef}) - length($read->{$key}{before}) - length($read->{$key}{after})));
-		my $seqREFRev = substr($readRev->{$key}{seqRef},length($read->{$key}{after}),(length($read->{$key}{seqRef}) - length($read->{$key}{before}) - length($read->{$key}{after})));
-		if(defined($read->{$key}{short5})) {
-			substr($seqREF,0,0,'N' x $read->{$key}{short5});  # correct for read outside the reference extremi, N won't be counted
-			substr($seqREFRev,length($seqREFRev),0,'N' x $read->{$key}{short5});
-		}
-		if(defined($read->{$key}{short3})) {
-			substr($seqREFRev,0,0,'N' x $read->{$key}{short3});  # correct for read outside the reference extremi, N won't be counted
-			substr($seqREF,length($seqREF),0,'N' x $read->{$key}{short3});
-		}
-		my @ref = split(//,$seqREF);
-		my @refRev = split(//,$seqREFRev);
-		my $cleanSeq =  substr($read->{$key}{seq},$clip5p,(length($read->{$key}{seq}) - $clip5p - $clip3p));
-		my @seq = split(//,$cleanSeq);
-		my $cleanSeqRev =  substr($readRev->{$key}{seq},$clip3p,(length($read->{$key}{seq}) - $clip5p - $clip3p));
-		my @seqRev = split(//,$cleanSeqRev);
-		my $newSeqLength = $seqLength; # avoid access to not allocated table, adjust to the read length
-		if ( scalar(@seq) < $seqLength ) {
-			$newSeqLength = scalar(@seq);
-		}
-		#print "ref @ref $clip5p\nseq @seq $clip3p\nrevref @refRev\nrevseq @seqRev\n".$read->{$key}{short5}." ".$read->{$key}{short3}."\n";
-		foreach my $position (0..$newSeqLength-1){
-			#print "pos $position ref $ref[$position] and $seq[$position]\n";
-			if(defined($ref[$position]) and defined($refRev[$position]) and defined($seq[$position] ) and defined($seqRev[$position] ) ){
-				${$basesG{$position}}{$ref[$position]}++;	# compute base composition of reference
-				${$basesGRev{$position}}{$refRev[$position]}++;
-				# if base are different AND comparable = a letter in read sequence
-				if (( $ref[$position]  ne $seq[$position] ) and ($seq[$position] ne "")) {
-					my $mis = $ref[$position] .">".$seq[$position];	# hash is indexed by the mismatches ie letter>other letter. Letter are A, C, G, T and -. N are not taken into account
-					${${$mismPos{$mis}}{$position}}{$strd}++;	
-					${$nuclmis{$mis}}{$position}++;
-					${$total{$mis}}{$strd}++;						
-					${$total{total}}{$strd}++;	
-				}
-				# same for reverse nuclComp
-				if (( $refRev[$position]  ne $seqRev[$position] ) and ($seqRev[$position] ne "")) {
-					my $mis = $refRev[$position] .">".$seqRev[$position];				
-					${$nuclmisRev{$mis}}{$position}++;
-				}
-			}
-			else {
-				die("ref $ref[$position] refrev $refRev[$position] seq $seq[$position] revseq $seqRev[$position] not defined 5s $clip5p 3s $clip3p $read->{$key}{name} pos $position\n");
-			}
-		}		
-	}
-	print FILEMISMOUT "Pos\tA\tC\tG\tT\tTot\tG>A\tC>T\tA>G\tT>C\tA>C\tA>T\tC>G\tC>A\tT>G\tT>A\tG>C\tG>T\tA>-\tT>-\tC>-\tG>-\t->A\t->T\t->C\t->G\tS\n";
-	print FILEMISMREVOUT "Pos\tA\tC\tG\tT\tTot\tG>A\tC>T\tA>G\tT>C\tA>C\tA>T\tC>G\tC>A\tT>G\tT>A\tG>C\tG>T\tA>-\tT>-\tC>-\tG>-\t->A\t->T\t->C\t->G\tS\n";
-
-	foreach my $position (sort { $a <=> $b } (keys(%basesG)))
-	{
-		print FILEMISMOUT $position+1; 
-		print FILEMISMREVOUT $position+1; 
-		my $sumACGT = 0;
-		my $sumRevACGT = 0;
-		
-		foreach my $key ('A','C','G','T')	{ 
-			$sumACGT = $sumACGT + ${$basesG{$position}}{$key}; 
-			$sumRevACGT = $sumRevACGT + ${$basesGRev{$position}}{$key}; 
-			print FILEMISMOUT "\t${$basesG{$position}}{$key}"; 
-			print FILEMISMREVOUT "\t${$basesGRev{$position}}{$key}"; 
-		}
-		print FILEMISMOUT "\t$sumACGT";
-		print FILEMISMREVOUT "\t$sumRevACGT";
-		foreach my $key2 ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') {
-			print FILEMISMOUT "\t${$nuclmis{$key2}}{$position}";
-			print FILEMISMREVOUT "\t${$nuclmisRev{$key2}}{$position}";
-		}
-		print FILEMISMOUT "\n";
-		print FILEMISMREVOUT "\n";
-	}
-	
-	close(FILEMISMOUT);	
-	close(FILEMISMREVOUT);
-	
-	# write results
-	print  FILEOUT "Strd\tPos";
-	print FILETOT "Strd";
-	foreach my $key  ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') {
-		print FILEOUT "\t$key";
-		print FILETOT "\t$key";
-
-	}
-	print  FILEOUT "\n";
-	print FILETOT "\tTotal\n";
-	foreach my $strd (0,16){
-			
-		print FILETOT "$strd";
-		foreach my $position (0..$seqLength-1){
-			my $position2 = $position + 1; # NB. 0-offset shifted in 1-offset for final output 
-			print FILEOUT "$strd\t$position2";
-			foreach my $key ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') {
-			
-				print FILEOUT "\t".${${$mismPos{$key}}{$position}}{$strd}."";
-			}	
-			print FILEOUT "\n";
-		}
-		foreach my $key2 ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G','T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') {
-					
-			print FILETOT "\t".${$total{$key2}}{$strd}."";
-		}
-		print FILETOT "\t".${$total{total}}{$strd}."\n";
-	}
-	close (FILETOT);
-	close (FILEOUT);
-	# undef to be sure that read2 will be empty for the next analyzed chromosome
-	undef(%$read2);
-	printf ("Computation done for reference %5s [%2d/%2d]\n", $chr, $cpt,scalar(keys(%$reference2)) );
-}
-
-if(defined($opts{c}) ) {
-		
-	my $plotl; my $plota; my $ploty; my $plott;
-	if( defined($opts{m})) { $plotl = $opts{m};} else {$plotl = 25;}
-	if( defined($opts{b})) { $plota = $opts{b};} else {$plota = 10;}
-	if( defined($opts{y})) { $ploty = $opts{y};} else {$ploty = 0.3;}
-	if( defined($opts{t})) { $plott = $opts{t};} else {$plott = "plot";}
-	&merge($folder, $plotl, $plota, $ploty, $plott);
-}
-else {
-	print "\nmap terminated, you may use $0 merge -d $folder\n";
-}
-exit 0;
-}
-else{
-	&printUsageMap;
-}
-}
-
-## subroutine merge
-
-sub merge {
-
-my %opts = (d=>undef, c=>undef);
-
-if(defined($_[0]) ) {
-	$opts{d}= $_[0];
-	$opts{c}= 1;
-}
-
-getopts('d:',\%opts);
-
-if(defined($opts{d}) ) {
-
-my $folder 	= $opts{d};
-print "Merging files from $folder\n";
-
-my $fileNC = 'nuclComp.all.txt';
-my $fileNCR = 'nuclCompReverse.all.txt';
-my $fileDF = 'dnaFrag.all.txt';
-my $fileHIT = 'hitPerChrom.all.txt';
-my $fileRCOM = 'readComp.all.txt';
-my $fileSUM = 'nuclSumMis.all.txt';
-my $fileMIS = 'nuclMis.all.txt';
-
-### Reading sequences in $folder
-opendir(FILES, "$folder") or die "Can't open folder $folder\n";
-my @files = readdir(FILES); 
-closedir(FILES);
-
-my $hit=(); my $dFrag=(); my $mis =(); my $sumMis=(); my $nComp =(); my $nCompR =(); my $rComp =();
-
-foreach my $file (@files) { 
-
-	&checkFileEmpty($file);
-	# regex modified to accept more diverse reference names
-	if ( $file =~ m/^nuclComp_([0-9a-zA-Z]+).*\.txt/ ){		
-		$nComp = ReadFile($folder,$file,$nComp,$1); # add chromosome name 
-	}
-	if ( $file =~ m/^nuclCompReverse_([0-9a-zA-Z]+).*\.txt/ ){		
-		$nCompR = ReadFile($folder,$file,$nCompR,$1);
-	}
-	if ( $file =~ m/^dnaFrag_(.+)_[0-9]+_[0-9]+\.txt/ ){		
-		$dFrag = ReadFile($folder,$file,$dFrag,$1);
-	}
-	if ( $file =~ m/^hitPerChrom_/ ){		
-		$hit = ReadFile($folder,$file,$hit,"");
-	}
-	if ( $file =~ m/^readComp_(.+)\.txt/ ){		
-		$rComp = ReadFile($folder,$file,$rComp,$1);
-	}
-	if ( $file =~ m/^nuclMis_(.+)_[0-9]+_[0-9]+\.txt/ ){		
-		$mis = ReadFile($folder,$file,$mis,$1);
-	}
-	if ( $file =~ m/^nuclSumMis_(.+)_[0-9]+_[0-9]+\.txt/){		
-		$sumMis = ReadFile($folder,$file,$sumMis,$1);
-	}			
-}
-
-# nuclComp
-open(FILEOUT,">$folder/$fileNC") or die ("Cannot write result file\n");
-print FILEOUT "Chr\tPos\tA\tC\tG\tT\tTot\tG>A\tC>T\tA>G\tT>C\tA>C\tA>T\tC>G\tC>A\tT>G\tT>A\tG>C\tG>T\tA>-\tT>-\tC>-\tG>-\t->A\t->T\t->C\t->G\tS\n";
-foreach my $line (@$nComp)	{ print FILEOUT $line; }
-close(FILEOUT);
-print "Printing $fileNC done\n";
-
-#nuclCompReverse
-open(FILEOUT,">$folder/$fileNCR") or die ("Cannot write result file\n");
-print FILEOUT "Chr\tPos\tA\tC\tG\tT\tTot\tG>A\tC>T\tA>G\tT>C\tA>C\tA>T\tC>G\tC>A\tT>G\tT>A\tG>C\tG>T\tA>-\tT>-\tC>-\tG>-\t->A\t->T\t->C\t->G\tS\n";
-foreach my $line (@$nCompR)	{ print FILEOUT $line; }
-close(FILEOUT);
-print "Printing $fileNCR done\n";
-
-# hitPerChromosome
-open(FILEOUT,">$folder/$fileHIT") or die ("Cannot write result file\n");
-print FILEOUT "Chr\tStrd\tNb\tLength\tMax\n";
-foreach my $line (@$hit)	{ print FILEOUT $line; }
-close(FILEOUT);
-print "Printing $fileHIT done\n";
-
-# dnaFragmentation
-open(FILEOUT,">$folder/$fileDF") or die ("Cannot write result file\n");
-print FILEOUT "Chr\tStrd\tPos\tA\tC\tG\tT\tdel\tins\tTot\n";
-foreach my $line (@$dFrag)	{ print FILEOUT $line; }
-close(FILEOUT);
-print "Printing $fileDF done\n";
-
-# read Composition
-open(FILEOUT,">$folder/$fileRCOM") or die ("Cannot write result file\n");
-print FILEOUT "Chr\tSeq\tLength\tAl\tCl\tGl\tTl\tdell\tinsl\tAr\tCr\tGr\tTr\tdelr\tinsr\n";
-foreach my $line (@$rComp)	{ print FILEOUT $line; }
-close(FILEOUT);
-print "Printing $fileRCOM done\n";
-
-# Misincorporation
-open(FILEOUT,">$folder/$fileMIS") or die ("Cannot write result file\n");
-print FILEOUT "Chr\tStrd\tPos\tG>A\tC>T\tA>G\tT>C\tA>C\tA>T\tC>G\tC>A\tT>G\tT>A\tG>C\tG>T\tA>-\tT>-\tC>-\tG>-\t->A\t->T\t->C\t->G\tS\n";
-foreach my $line (@$mis)	{  print FILEOUT $line; }
-close(FILEOUT);
-
-# Sum Misincorporation
-open(FILEOUT,">$folder/$fileSUM") or die ("Cannot write result file\n");
-print "Printing $fileSUM ...\n";
-print FILEOUT "Chr\tStrd\tG>A\tC>T\tA>G\tT>C\tA>C\tA>T\tC>G\tC>A\tT>G\tT>A\tG>C\tG>T\tA>-\tT>-\tC>-\tG>-\t->A\t->T\t->C\t->G\tS\tTotal\n";
-foreach my $line (@$sumMis)	{ print FILEOUT $line; }
-close(FILEOUT);
-print "Printing $fileMIS done\n";
-
-if(defined($opts{c}) ) {
-	
-	my $plotl; my $plota; my $ploty; my $plott;
-	if( defined($_[1])) { $plotl = $_[1];} else {$plotl = 25;}
-	if( defined($_[2])) { $plota = $_[2];} else {$plota = 10;}
-	if( defined($_[3])) { $ploty = $_[3];} else {$ploty = 0.3;}
-	if( defined($_[4])) { $plott = $_[4]; } else {$plott = "plot";}
-	&plot($folder, $plotl, $plota, $ploty, $plott);
-}
-else {
-	print "\nmerge terminated, you may use $0 plot -d $folder\n"
-}
-exit 0;
-}
-else{
-	&printUsageMerge;
-}
-}
-
-## subroutine merge
-
-sub plot {
-
-my $output = `R --version`;
-die ("R is not present in your \$PATH, please install it on your system\n") if (!defined($output) or $output  !~ /version/) ;
-
-my %opts = (d=>undef, l=>25, a=>10, m=>0.3, t=>undef);
-
-getopts('d:l:a:m:t:',\%opts);
-
-if(defined($_[0]) ) {
-	$opts{d}= $_[0];
-	if( defined($_[1])) { $opts{l} = $_[1];}
-	if( defined($_[2])) { $opts{a} = $_[2];}
-	if( defined($_[3])) { $opts{m} = $_[3];}
-	if( defined($_[4])) { $opts{t} = $_[4];}
-}
-#print "length $opts{l} title set with $opts{t} and y-limit $opts{m}\n";
-if(defined($opts{d}) and -d $opts{d}) {
-    
-    
-my $folder = $opts{d};
-my $title ;
-if( $opts{d} =~ m/results_(.+)\.[sb]am/) {
-	$title = $1;
-}
-else {
-	$title = "plot";
-}
-
-if(defined($opts{t})) {
-	$opts{t} =~  s/[:\|\s]//g;
-	$title = $opts{t};
-}
-
-if($opts{a} != 20){
-	if($opts{a} !~/^[0-9]+$/ ) {die("around is expected to be an integrer\n");}
-	if($opts{a} > 100) {print"Warning: the around option was set to $opts{a}, are you sure to plot such large regions? continuing anyway\n";}
-	if($opts{a} < 1) {die("The around position must be > 1 nucleotide\n");}
-}
-if($opts{l} != 70){
-	if($opts{l} !~/^[0-9]+$/) {die("length is expected to be an integrer\n");}
-	if($opts{l} < 1) {die("The length must be > 1 nucleotide\n");}
-	if($opts{l} > 99998) {die("The read length choosen, $opts{l}, is too large. Must be < 99998 nucleotides\n");}
-}
-if($opts{m} != 0.3){
-	if($opts{m} !~/^[0-9\.]+$/) {die("y plot limit is expected to be a decimal number\n");}
-	if($opts{m} <= 0 or $opts{m} > 1) {die("y plot limit must be >= 0 and < 1\n");}
-}
-
-my $file = "$folder/mapDamage.R";
-open(FILEF,">$file") or die("Cannot open file $file\n");
-print FILEF "com<-read.csv(file=\"$folder/dnaFrag.all.txt\",sep=\"\\t\")\n";
-print FILEF "idx <- (com\$Pos > 99999) & (com\$Pos < 200000)\n";
-print FILEF "com\$Pos[idx] <- com\$Pos[idx] - 299999\n";
-print FILEF "idx <- (com\$Pos > 199999)\n";
-print FILEF "com\$Pos[idx] = -com\$Pos[idx] - 1\n";
-print FILEF "pdf(file=\"$folder/FragMisincorporation_$title.pdf\", title=\"mapDamage-$version $title\")\npar(oma=c(4,2,2,2),mar=c(1,2,1,1))\nlayout(matrix(c(1,2,3,4,5,6,7,8,9,9,10,10), 3, 4, byrow=TRUE))\n";
-# for letter A, before - read
-print FILEF "n<-numeric\nplot(com\$Pos,com\$A/com\$Tot,pch='.',xlim=c(-$opts{a},$opts{a}),ylim=c(0,0.5),col=\"blue\",main=\"A\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2), axes=FALSE)\n";
-print FILEF "axis(side=2,labels=TRUE,line=0,las=2,cex.axis=0.8)\naxis(side=1,labels=FALSE)\n";
-print FILEF "mtext(\"Frequency\",side=2,line=2.5,cex=0.7)\n";
-print FILEF "rect(0.5,0,$opts{a}.5,0.5,border=\"darkgrey\")\nsegments($opts{a}.5,0,$opts{a}.5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(-$opts{a}:-1,1:$opts{a})) { n<-c(n,mean(com\$A[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(-$opts{a}:-1,1:$opts{a}),n[2:".(2*$opts{a}+1)."],pch=20,col=\"blue\",type=\"b\")\n";
-# for letter A, read - after
-print FILEF "n<-numeric\nplot(com\$Pos,com\$A/com\$Tot,pch='.',xlim=c(".(-200000-$opts{a}).",".(-200000+$opts{a})."),ylim=c(0,0.5),col=\"blue\",main=\"A\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2), axes=FALSE)\n";
-print FILEF "axis(side=4,labels=FALSE,line=0,las=2,cex.axis=0.8)\naxis(side=1,labels=FALSE)\n";
-print FILEF "rect(".(-200000-$opts{a}).".5,0,".(-200000).".5,0.5,border=\"darkgrey\")\nsegments(".(-200000-$opts{a}).".5,0,".(-200000-$opts{a}).".5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a}).")) { n<-c(n,mean(com\$A[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a})."),n[2:".(2*$opts{a}+1)."],pch=20,col=\"blue\",type=\"b\")\n";
-print FILEF "mtext(\"$title\", side=3, line=1.2, cex=0.8)\n";
-#print FILEF "text(labels=c(\"$title\"), c(2,2),c(3,3))\n";
-# for letter C, before - read
-print FILEF "n<-numeric\nplot(com\$Pos,com\$C/com\$Tot,pch='.',xlim=c(-$opts{a},$opts{a}),ylim=c(0,0.5),col=\"green\",main=\"C\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2), axes=FALSE)\n";
-print FILEF "axis(side=2,labels=FALSE,line=0,las=2,cex.axis=0.8)\naxis(side=1,labels=FALSE)\n";
-print FILEF "rect(0.5,0,$opts{a}.5,0.5,border=\"darkgrey\")\nsegments($opts{a}.5,0,$opts{a}.5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(-$opts{a}:-1,1:$opts{a})) { n<-c(n,mean(com\$C[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(-$opts{a}:-1,1:$opts{a}),n[2:".(2*$opts{a}+1)."],pch=20,col=\"green\",type=\"b\")\n";
-# for letter C, read - after
-print FILEF "n<-numeric\nplot(com\$Pos,com\$C/com\$Tot,pch='.',xlim=c(".(-200000-$opts{a}).",".(-200000+$opts{a})."),ylim=c(0,0.5),col=\"green\",main=\"C\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2), axes=FALSE)\n";
-print FILEF "axis(side=4,labels=TRUE,line=0,las=2,cex.axis=0.8)\naxis(side=1,labels=FALSE)\n";
-print FILEF "rect(".(-200000-$opts{a}).".5,0,".(-200000).".5,0.5,border=\"darkgrey\")\nsegments(".(-200000-$opts{a}).".5,0,".(-200000-$opts{a}).".5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a}).")) { n<-c(n,mean(com\$C[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a})."),n[2:".(2*$opts{a}+1)."],pch=20,col=\"green\",type=\"b\")\n";
-# for letter G, before - read
-print FILEF "n<-numeric\nplot(com\$Pos,com\$G/com\$Tot,pch='.',xlim=c(-$opts{a},$opts{a}),ylim=c(0,0.5),col=\"black\",main=\"G\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2),axes=FALSE)\n";
-print FILEF "axis(side=2,labels=TRUE,line=0,las=2,cex.axis=0.8)\naxis(side=1,labels=TRUE,las=2,cex.axis=0.8)\n";
-print FILEF "rect(0.5,0,$opts{a}.5,0.5,border=\"darkgrey\")\nsegments($opts{a}.5,0,$opts{a}.5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "mtext(\"Frequency\",side=2,line=2.5,cex=0.7)\n";
-print FILEF "for (i in c(-$opts{a}:-1,1:$opts{a})) { n<-c(n,mean(com\$G[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(-$opts{a}:-1,1:$opts{a}),n[2:".(2*$opts{a}+1)."],pch=20,col=\"black\",type=\"b\")\n";
-# for letter G, read - after
-print FILEF "n<-numeric\nplot(com\$Pos,com\$G/com\$Tot,pch='.',xlim=c(".(-200000-$opts{a}).",".(-200000+$opts{a})."),ylim=c(0,0.5),col=\"black\",main=\"G\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2), axes=FALSE)\n";
-print FILEF "axis(side=4,labels=FALSE)\n";
-print FILEF "axis(side=1,labels=seq(from=-$opts{a},to=$opts{a},by=1),at=seq(from=".(-200000-$opts{a}).",to=".(-200000+$opts{a}).",by=1),las=2,cex.axis=0.8)\n";
-print FILEF "rect(".(-200000-$opts{a}).".5,0,".(-200000).".5,0.5,border=\"darkgrey\")\nsegments(".(-200000-$opts{a}).".5,0,".(-200000-$opts{a}).".5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a}).")) { n<-c(n,mean(com\$G[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a})."),n[2:".(2*$opts{a}+1)."],pch=20,col=\"black\",type=\"b\")\n";
-# for letter T, before - read
-print FILEF "n<-numeric\nplot(com\$Pos,com\$T/com\$Tot,pch='.',xlim=c(-$opts{a},$opts{a}),ylim=c(0,0.5),col=\"red\",main=\"T\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2), axes=FALSE)\n";
-print FILEF "axis(side=2,labels=FALSE)\naxis(side=1,labels=TRUE,las=2,cex.axis=0.8)\n";
-print FILEF "rect(0.5,0,$opts{a}.5,0.5,border=\"darkgrey\")\nsegments($opts{a}.5,0,$opts{a}.5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(-$opts{a}:-1,1:$opts{a})) { n<-c(n,mean(com\$T[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(-$opts{a}:-1,1:$opts{a}),n[2:".(2*$opts{a}+1)."],pch=20,col=\"red\",type=\"b\")\n";
-# for letter T, read - after
-print FILEF "n<-numeric\nplot(com\$Pos,com\$T/com\$Tot,pch='.',xlim=c(".(-200000-$opts{a}).",".(-200000+$opts{a})."),ylim=c(0,0.5),col=\"red\",main=\"T\",";
-print FILEF "cex.axis=0.8,las=2,xlab=\"\",ylab=\"\",lab=c(".(2*$opts{a}).",6,0.2),axes=FALSE)\n";
-print FILEF "axis(side=4,labels=TRUE,line=0,las=2,cex.axis=0.8)\n";
-print FILEF "axis(side=1,labels=seq(from=-$opts{a},to=$opts{a},by=1),at=seq(from=".(-200000-$opts{a}).",to=".(-200000+$opts{a}).",by=1),las=2,cex.axis=0.8)\n";
-print FILEF "rect(".(-200000-$opts{a}).".5,0,".(-200000).".5,0.5,border=\"darkgrey\")\nsegments(".(-200000-$opts{a}).".5,0,".(-200000-$opts{a}).".5,0.5,col=\"white\",lwd=2)\n";
-print FILEF "for (i in c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a}).")) { n<-c(n,mean(com\$T[(com\$Pos==i)]/com\$Tot[(com\$Pos==i)],na.rm=T)) }\n";
-print FILEF "points(c(".(-200000-$opts{a}).":-200001,-199999:".(-200000+$opts{a})."),n[2:".(2*$opts{a}+1)."],pch=20,col=\"red\",type=\"b\")\n\n\n";
-
-
-### Misincorporation patterns
-print FILEF "nucl<-read.csv(file=\"$folder/nuclComp.all.txt\",sep=\"\\t\",check.names = FALSE)\n";
-print FILEF "vec<-(1:$opts{l})\nsumhit<-data.frame(cbind(vec,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))\n";
-#print FILEF "idx <- nucl\$A==0\n";
-#print FILEF "nucl\$A[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$C==0\n";
-#print FILEF "nucl\$C[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$G==0\n";
-#print FILEF "nucl\$G[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$T==0\n";
-#print FILEF "nucl\$T[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$Tot==0\n";
-#print FILEF "nucl\$Tot[idx] <- 10^10\n";
-print FILEF "insertion<-0\n";
-print FILEF "for (i in vec)	{\n";
-print FILEF "sumhit[i,2]<-sum(nucl[(nucl\$Pos == i), \"G>A\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,3]<-sum(nucl[(nucl\$Pos == i), \"C>T\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,4]<-sum(nucl[(nucl\$Pos == i), \"A>G\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,5]<-sum(nucl[(nucl\$Pos == i), \"T>C\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,6]<-sum(nucl[(nucl\$Pos == i), \"A>C\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,7]<-sum(nucl[(nucl\$Pos == i), \"A>T\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,8]<-sum(nucl[(nucl\$Pos == i), \"C>G\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,9]<-sum(nucl[(nucl\$Pos == i), \"C>A\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,10]<-sum(nucl[(nucl\$Pos == i), \"T>G\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,11]<-sum(nucl[(nucl\$Pos == i), \"T>A\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,12]<-sum(nucl[(nucl\$Pos == i), \"G>C\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,13]<-sum(nucl[(nucl\$Pos == i), \"G>T\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,14]<-sum(nucl[(nucl\$Pos == i), \"A>-\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,15]<-sum(nucl[(nucl\$Pos == i), \"T>-\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,16]<-sum(nucl[(nucl\$Pos == i), \"C>-\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,17]<-sum(nucl[(nucl\$Pos == i), \"G>-\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,22]<-sum(nucl[(nucl\$Pos == i), \"S\"])/sum(nucl[(nucl\$Pos == i), \"Tot\"])\n";
-print FILEF "NTs <- c(\"A\", \"C\", \"G\", \"T\")\n";
-print FILEF "for (j in seq(NTs)) {\n";
-print FILEF "insertion[j]<-sprintf(\"->%s\", NTs[j])\n";
-print FILEF "sumhit[i,17 + j] <-sum(nucl[(nucl\$Pos == i), insertion[j]]) /sum(nucl[(nucl\$Pos == i), NTs])\n";
-print FILEF "}\n}\n";
-# write a table for C>T frequencies
-print FILEF "write.table(sumhit[,3], file=\"$folder/5pCtoT_freq.txt\", sep=\"\\t\", quote=FALSE, col.names=c(\"pos\\t5pC>T\"))\n";
-# write G>A
-print FILEF "write.table(sumhit[,2], file=\"$folder/5pGtoA_freq.txt\", sep=\"\\t\", quote=FALSE, col.names=c(\"pos\\t5pG>A\"))\n";
-print FILEF "plot(sumhit\$vec,sumhit\$V4,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1,type=\"l\",xlab=\"\",ylab=\"\",cex.lab=0.8,cex.axis=0.6,las=2,lab=c($opts{l},11,0.1),axes=FALSE)\n";
-print FILEF "axis(side=1,labels=TRUE,las=2,cex.axis=0.8)\n";
-if( $opts{m} > 0.1) { print FILEF "axis(side=2,labels=TRUE,las=2,cex.axis=0.8)\n"; }
-else { print FILEF "axis(side=2,labels=seq(from=0,to=$opts{m},by=0.01),at=seq(from=0,to=$opts{m},by=0.01),las=2,cex.axis=0.8)\n"; }
-print FILEF "rect(0.5,-0.01,$opts{l}.5,$opts{m},border=\"darkgrey\")\nsegments($opts{l}.5,-0.01,$opts{l}.5,$opts{m},col=\"white\",lwd=2)\n";
-print FILEF "mtext(\"Frequency\",side=2,line=2.8,cex=0.7)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V5,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V6,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V7,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V8,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V9,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V10,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V11,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V12,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF"lines(sumhit\$vec,sumhit\$V13,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-# soft clipping in orange
-#print FILEF"lines(sumhit\$vec,(sumhit\$V22),xlim=c(1,5),ylim=c(0,$opts{m}),col=\"orange\",lwd=1)\n";
-# deletions in green
-print FILEF"lines(sumhit\$vec,(sumhit\$V14+sumhit\$V15+sumhit\$V16+sumhit\$V17),xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"green\",lwd=1)\n";
-# insertions in purple
-print FILEF"lines(sumhit\$vec,(sumhit\$V18+sumhit\$V19+sumhit\$V20+sumhit\$V21),xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"purple\",lwd=1)\n";
-# G>A in blue
-print FILEF"lines(sumhit\$vec,sumhit\$V2,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"blue\",lwd=2)\n";
-# C>T in red
-print FILEF"lines(sumhit\$vec,sumhit\$V3,xlim=c(1,$opts{l}),ylim=c(0,$opts{m}),col=\"red\",lwd=2)\n";
-
-# do the same at 3'-end
-print FILEF "nucl<-read.csv(file=\"$folder/nuclCompReverse.all.txt\",sep=\"\\t\",check.names = FALSE)\n";
-print FILEF "vec<-(1:$opts{l})\nsumhit<-data.frame(cbind(vec,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))\n";
-#print FILEF "idx <- nucl\$A==0\n";
-#print FILEF "nucl\$A[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$C==0\n";
-#print FILEF "nucl\$C[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$G==0\n";
-#print FILEF "nucl\$G[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$T==0\n";
-#print FILEF "nucl\$T[idx] <- 10^10\n";
-#print FILEF "idx <- nucl\$Tot==0\n";
-#print FILEF "nucl\$Tot[idx] <- 10^10\n";
-print FILEF "for (i in vec)	{\n";
-print FILEF "sumhit[i,2]<-sum(nucl[(nucl\$Pos == i), \"G>A\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,3]<-sum(nucl[(nucl\$Pos == i), \"C>T\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,4]<-sum(nucl[(nucl\$Pos == i), \"A>G\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,5]<-sum(nucl[(nucl\$Pos == i), \"T>C\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,6]<-sum(nucl[(nucl\$Pos == i), \"A>C\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,7]<-sum(nucl[(nucl\$Pos == i), \"A>T\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,8]<-sum(nucl[(nucl\$Pos == i), \"C>G\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,9]<-sum(nucl[(nucl\$Pos == i), \"C>A\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,10]<-sum(nucl[(nucl\$Pos == i), \"T>G\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,11]<-sum(nucl[(nucl\$Pos == i), \"T>A\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,12]<-sum(nucl[(nucl\$Pos == i), \"G>C\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,13]<-sum(nucl[(nucl\$Pos == i), \"G>T\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,14]<-sum(nucl[(nucl\$Pos == i), \"A>-\"])/sum(nucl[(nucl\$Pos == i), \"A\"])\n";
-print FILEF "sumhit[i,15]<-sum(nucl[(nucl\$Pos == i), \"T>-\"])/sum(nucl[(nucl\$Pos == i), \"T\"])\n";
-print FILEF "sumhit[i,16]<-sum(nucl[(nucl\$Pos == i), \"C>-\"])/sum(nucl[(nucl\$Pos == i), \"C\"])\n";
-print FILEF "sumhit[i,17]<-sum(nucl[(nucl\$Pos == i), \"G>-\"])/sum(nucl[(nucl\$Pos == i), \"G\"])\n";
-print FILEF "sumhit[i,22]<-sum(nucl[(nucl\$Pos == i), \"S\"])/sum(nucl[(nucl\$Pos == i), \"Tot\"])\n";
-print FILEF "for (j in seq(NTs)) {\n";
-print FILEF "insertion[j]<-sprintf(\"->%s\", NTs[j])\n";
-print FILEF "sumhit[i,17 + j] <-sum(nucl[(nucl\$Pos == i), insertion[j]]) /sum(nucl[(nucl\$Pos == i), NTs])\n";
-print FILEF "}\n}\n";
-# write a table for G>A frequencies
-print FILEF "write.table(sumhit[,2], file=\"$folder/3pGtoA_freq.txt\", sep=\"\\t\", quote=FALSE, col.names=c(\"pos\\t3pG>A\"))\n";
-print FILEF "plot(-sumhit\$vec,sumhit\$V4,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1,type=\"l\",xlab=\"\",ylab=\"\",las=2,lab=c($opts{l},11,0.1),axes=FALSE)\n";
-print FILEF "axis(side=1,labels=TRUE,las=2,cex.axis=0.8)\n";
-if( $opts{m} > 0.1) { print FILEF "axis(side=4,labels=TRUE,las=2,cex.axis=0.8)\n"; }
-else { print FILEF "axis(side=4,labels=seq(from=0,to=$opts{m},by=0.01),at=seq(from=0,to=$opts{m},by=0.01),las=2,cex.axis=0.8)\n"; }
-print FILEF "rect(-$opts{l}.5,-0.01,-0.5,$opts{m},border=\"darkgrey\")\nsegments(-$opts{l}.5,-0.01,-$opts{l}.5,$opts{m},col=\"white\",lwd=2)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V5,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V6,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V7,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V8,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V9,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V10,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V11,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V12,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-print FILEF "lines(-sumhit\$vec,sumhit\$V13,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"grey\",lwd=1)\n";
-# soft clipping in orange
-#print FILEF"lines(-sumhit\$vec,(sumhit\$V22),xlim=c(-5,-1),ylim=c(0,$opts{m}),col=\"orange\",lwd=1)\n";
-# insertions
-print FILEF "lines(-sumhit\$vec,(sumhit\$V14+sumhit\$V15+sumhit\$V16+sumhit\$V17),xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"green\",lwd=1)\n";
-# insertions
-print FILEF "lines(-sumhit\$vec,(sumhit\$V18+sumhit\$V19+sumhit\$V20+sumhit\$V21),xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"purple\",lwd=1)\n\n";
-# C>T in red
-print FILEF "lines(-sumhit\$vec,sumhit\$V3,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"red\",lwd=2)\n";
-# G>A in blue
-print FILEF "lines(-sumhit\$vec,sumhit\$V2,xlim=c(-$opts{l},-1),ylim=c(0,$opts{m}),col=\"blue\",lwd=2)\n";
-print FILEF "dev.off()\n";
-close (FILEF);
-print "plotting using R, it may take a while...\n";
-system("Rscript $folder/mapDamage.R $folder/mapDamage.Rout");
-print "pdf FragMisincorporation_$title.pdf generated\n";
-exit 0;
-}
-else{
-	&printUsagePlot;
-}
-
-}
-######################################
-#               SECONDARY SUBROUTINES DEFINITION
-######################################
-
-sub checkFileEmpty {
-	
-	my $file = $_[0];
-	die("file $file empty! -> Exit\n") if ( -z $file ) ; # exit if file is empty
-}
-
-sub readSAMfile {  # for map
-
-	my $file = $_[0];
-	my $uniq = $_[1];
-	if ( $uniq == 1 ) { print "Reading $file, remove unmapped reads and reads with alternative hits according to BWA (XT and X1 tags)\n"; }
-	my $read_ref = {};
-	my $line = ();
-	my $input = $file;
-	my $cpt=0;
-	my $i=0;
-	my$j=0;
-	my $flag = 0;
-	my $pair=0;
-		 
-	if($file =~ /\.bam$/) {$file = "samtools view $file | "; } 
-	
-	open(FILESAM,"$file") or die("Cannot open file $input\n");
-	while (my $line = <FILESAM>){
-
-		chomp($line);
-		next if ( $line  =~ m/^@/); 	# if SAM file provided with hearder, skip it
-		my @field = split("\t",$line);
-		
-		if ( defined($field[5]) and $field[5] =~ m/([0-9]+[MIDNSHPX=])+|\*/ ) { # check if it looks like a SAM file using the CIGAR field
-			
-			my $nbBestHit = 0; # intialize like unique hits if BWA wasn't used for mapping.
-			my $xt = 'U';
-			if($line =~ m/XT:A:(\w+)\s/) { $xt = $1;} 
-			if($line =~ m/X1:i:(\d+)\s/) { $nbBestHit = $1;} 
-			
-			
-			if ( $field[1] & 0x0004 ) {	# remove unmapped reads 
-					$j++;
-			}
-			elsif ( $uniq == 1 and ($nbBestHit > 0 or $xt ne 'U') ) { # removed non unique hits ie., reads with suboptimal hits or declared non unique by BWA
-					$j++
-			}
-			else{
-				if(defined($read_ref->{$field[0]}{sam})) {
-					# paired-end run, change name, hash is indexed by read name, must be an unique key
-					$field[0].='/2';
-					$pair++;
-				}
-				$read_ref->{$field[0]}{sam} = $line; 	
-				$i++;
-			}
-		$cpt++;
-		}
-		else { die("Problem with SAM format (CIGAR field $field[5])!\n"); exit; }
-		if ($cpt % 500000 == 0 && $cpt !=0) { printf"%8d reads put in RAM ($j trashed, $pair pairs found)/%8d analysed\n",$i,$cpt ;}	
-		
-	}
-	close FILESAM;
-	printf("OK, %8d reads put in RAM ($j trashed, $pair pairs found)\n",$i) ;	
-	return($read_ref);
-}
-
-sub compare2SamFiles { # for map
-	
-	my $first = $_[0];
-	my $second = $_[1];
-	my $fileOUT = $_[2];
-	my $cpt=0;
-	my $i=0;
-	my$j=0;
-	
-	open(FILEOUT,">$fileOUT") or die("Cannot write file $fileOUT\n");
-	foreach  my $key (keys(%$first)) {
-	
-		if( exists( $second->{$key} ) ) {
-			# read present in both files, do nothing
-			$j++;
-		}
-		else{ 
-			# if read doesn't exist in second, write whole SAM line in a separate file
-			$i++;
-			if(defined($first->{$key}{sam})){print FILEOUT"".$first->{$key}{sam} ."\n";}
-		}
-		if ($cpt % 10000 == 0 && $cpt !=0) { printf"%8d reads specific to file $first /%8d analysed\n",$i,$cpt ;}	
-		$cpt++;
-	}
-	print "$i reads were written to $fileOUT ($j reads in both files removed)\n";
-	# close file pointers
-	close (FILEOUT);
-}
-
-sub writeSamFile { # for map
-	
-	my $read = $_[0];
-	my $file =  $_[1];
-	my $i = 0;
-	open(FILEOUT,">$file") or die("Cannot write output file $file\n");
-	foreach  my $key (keys(%$read)) {
-		
-		if(defined($read->{$key}{sam})){
-			print FILEOUT"".$read->{$key}{sam} ."\n";
-			$i++;
-		}
-	}
-	close (FILEOUT);
-	print"$i reads written to file $file\n";
-}
-
-sub initializeHash { # for map
-	my $a = $_[0];
-	my $b = $_[1];
-	my $c = $_[2];
-	my $d = $_[3];
-	my $hash = $_[4];
-	
-	foreach my $pos ($a..$b) {
-		foreach my $str ($c,$d){
-			$hash->{$str}{$pos}=0;
-		}
-	}
-	return($hash);
-}	
-sub checkExecutable {
-     
-        my $check = 0;
-	$check+=1 if `which samtools 2>&1` =~ /samtools$/;
-	die ("samtools is not present in your \$PATH\n") unless $check == 1; 
-	$check+=1 if `which bedtools 2>&1` =~ /bedtools$/;
-	$check+=1  if `which fastaFromBed 2>&1` =~ /fastaFromBed$/;
-	die ("bedtools is not present in your \$PATH\n") unless $check > 1;
-}
-sub checkFastaRef { # for map
-	
-	print "@_\n";
-
-	my $ref =  $_[0];
-	my $h = {};
-	my $cpt = 0;
-	my $sum = 0;
-	# built reference index if not present
-	if (!-e $ref.".fai") { system("samtools faidx $ref");}
-	open(F,$ref.".fai") or die ("Cannot open reference index $ref.fai");
-	while (my $line = <F>) {
-		chomp($line);
-		my @field = split("\t",$line,);
-		# first field is the name of  one or several chromosomes in reference
-		# second field is the length in nucleotide 
-		if(defined($field[0]) and defined($field[1]) and ($field[1] =~ m/^[0-9]+$/)){  # does it look like a good fai file
-			  
-			$field[0] =~ s/[:\|\s]//g;  # character '|' or others must be removed because of their interpretation by the shell and samtools command
-			$h->{$field[0]} = $field[1];
-			$cpt++;
-			$sum = $sum +  $field[1];
-		} else{ die ("The format of reference index file $ref.fai is not correct, you should erase it");}
-	}
-	print"The reference is assumed to contain $cpt chromosome(s) and $sum nucleotides in total\n";
-	return($h);
-}
-
-sub ReadFile	{ # for merge
-	
-	my $d = $_[0];
-	my $file = $_[1];
-	my $result = $_[2];
-	my $chr = $_[3];
-	open(FILE,"<$d/$file") or die("Cannot open file $d/$file\n");
-	my $lineNB = 1;
-	while ( my $line = <FILE> )	{
-		
-		if ( $lineNB != 1 ) {# skip the header
-			if(defined($line) and defined($chr)) {
-				chomp($line);
-				if($chr eq "") {
-					$line = $line."\n"; # just add the newline character (for hitPerChrom files)
-				}
-				else {
-					$line = $chr."\t".$line."\n"; # add the chromosome information
-				}
-				push(@$result, $line);
-			}
-		}
-		$lineNB++;
-	}
-	close(FILE);
-	return($result);
-}
-
-sub printUsageMap{
-    print "
-USAGE: $0 map -i input_SAM/BAM -d directory -r reference_fasta -l read_length -a reference_around
-	-i : input SAM/BAM-file
-	-r : input reference fasta file, with headers identical to the ones in the input_SAM/BAM file
-		
-	OPTIONAL
-	-l : maximal read length, in nucleotides to consider [default: 70]
-	-a : size, in nucleotides, of the genomic region to be retrieved before and after reads [default: 10]
-	-d : folder for writing output-files, if not already present, this folder will be automatically created [default name: results_input_SAM.filtered]
-	-j : second input SAM-file without header; reads present in both SAM files will not be processed
-	-u : removes all non-unique reads based on the X1 and XT tags from the BWA mapper
-	-k : keep bed file and fasta file containing all the genomic regions [default: delete these files]
-	-f : outputs all read aligned against the reference genome, in a fasta file format [default: not active]
-	-c : complete analysis, map will be automatically followed by both merge and plot steps [default: not active]
-	when -c is active, the following are available to control the plot step:
-	-b : the number of reference nucleotides to consider for ploting base composition in the region located upstream and downstream of every read [default: 10]
-	-y : graphical y-axis limit for nucleotide misincorporation frequencies [default: 0.30]
-	-m : read length, in nucleotides, considered for plotting nucleotide misincorporations [default: 25]
-        -n : use at most N randomly selected reads, if set to a positive value [default: -1]
-	\n";
-}
-
-sub printUsageMerge{
-    print "
-USAGE: $0 merge -d directory
-
-	-d :folder with output-files generated by the map command
-	
-	OPTIONAL
-	-c : complete analysis, merge will be automatically followed by the plot step [default: not active]
-	\n";
-}
-
-sub printUsagePlot{
-    print "
-USAGE: $0 plot -d directory
-
-	-d : folder with output-files generated by the merge command
-	
-	OPTIONAL
-	-l : read length, in nucleotides, considered for plotting nucleotide misincorporations [default: 25]
-	-m : graphical y-axis limit for nucleotide misincorporation frequencies [default: 0.30]
-	-a : the number of reference nucleotides to consider for ploting base composition in the region located upstream and downstream of every read [default: 10]
-	-t : title used for both graph and filename [default: folder name without extension]\n";
-}
-
- sub printUsage{
-    print "
-USAGE: $0 <command> <arguments>
-
-Commands:
-	map	main function, perform damage patterns [samtools and bedtools required] 
-	merge	merge all chromosomes
-	plot	plot the graphics [R environment required]\n";
-	exit 0;
-}
diff --git a/mapdamage/Rscripts/mapDamage.R b/mapdamage/Rscripts/mapDamage.R
new file mode 100644
index 0000000..50b9554
--- /dev/null
+++ b/mapdamage/Rscripts/mapDamage.R
@@ -0,0 +1,235 @@
+# R script mapdamage
+
+args <- commandArgs(trailingOnly = TRUE)
+
+#print(args)
+
+comp<-args[1]
+pdfout<-args[2]
+around<-as.numeric(args[3])
+misincorp<-args[4]
+lg<-as.numeric(args[5])
+ymax<-as.numeric(args[6])
+folder<-args[7]
+title<-args[8]
+version<-args[9]
+
+com<-read.table(file=comp,sep="\t", header=TRUE, as.is=TRUE)
+five=subset(com, End=="5p")
+three=subset(com, End=="3p")
+pdf(file=pdfout, title=paste("mapDamage-",version," plot"))
+par(oma=c(4,2,2,2),mar=c(1,2,1,1))
+layout(matrix(c(1,2,3,4,5,6,7,8,9,9,10,10), 3, 4, byrow=TRUE))
+# for letter A, before - read
+n<-numeric
+plot(five$Pos,five$A/five$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="blue",main="A",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=2,labels=TRUE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=FALSE)
+mtext("Frequency",side=2,line=2.5,cex=0.7)
+rect(0.5,0,around+0.5,0.5,border="darkgrey")
+segments(around+0.5,0,around+0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(five$A[(five$Pos==i)]/five$Tot[(five$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="blue",type="b")
+# for letter A, read - after
+n<-numeric
+plot(three$Pos,three$A/three$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="blue",main="A",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=4,labels=FALSE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=FALSE)
+rect(-around-0.5,0,-0.5,0.5,border="darkgrey")
+segments(-around-0.5,0,-around-0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(three$A[(three$Pos==i)]/three$Tot[(three$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="blue",type="b")							 
+mtext(title, side=3, line=1.2, cex=0.8)
+
+# for letter C, before - read
+n<-numeric
+plot(five$Pos,five$C/five$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="green",main="C",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=2,labels=FALSE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=FALSE)
+rect(0.5,0,around+0.5,0.5,border="darkgrey")
+segments(around+0.5,0,around+0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(five$C[(five$Pos==i)]/five$Tot[(five$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="green",type="b")
+# for letter C, read - after
+n<-numeric
+plot(three$Pos,three$C/three$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="green",main="C",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=4,labels=TRUE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=FALSE)
+rect(-around-0.5,0,-0.5,0.5,border="darkgrey")
+segments(-around-0.5,0,-around-0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(three$C[(three$Pos==i)]/three$Tot[(three$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="green",type="b")							 
+
+# for letter G, before - read
+n<-numeric
+plot(five$Pos,five$G/five$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="black",main="G",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=2,labels=TRUE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=TRUE, las=2, cex.axis=0.6)
+mtext("Frequency",side=2,line=2.5,cex=0.7)
+rect(0.5,0,around+0.5,0.5,border="darkgrey")
+segments(around+0.5,0,around+0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(five$G[(five$Pos==i)]/five$Tot[(five$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="black",type="b")
+# for letter G, read - after
+n<-numeric
+plot(three$Pos,three$G/three$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="black",main="G",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=4,labels=FALSE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=TRUE, las=2, cex.axis=0.6)
+rect(-around-0.5,0,-0.5,0.5,border="darkgrey")
+segments(-around-0.5,0,-around-0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(three$G[(three$Pos==i)]/three$Tot[(three$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="black",type="b")
+
+# for letter T, before - read
+n<-numeric
+plot(five$Pos,five$T/five$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="red",main="T",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=2,labels=FALSE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=TRUE, las=2, cex.axis=0.6)
+rect(0.5,0,around+0.5,0.5,border="darkgrey")
+segments(around+0.5,0,around+0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(five$T[(five$Pos==i)]/five$Tot[(five$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="red",type="b")
+# for letter T, read - after
+n<-numeric
+plot(three$Pos,three$T/three$Tot,pch='.',xlim=c(-around,around),ylim=c(0,0.5),col="red",main="T",cex.axis=0.8,las=2,xlab="",ylab="",lab=c(2*around,6,0.2), axes=FALSE)
+axis(side=4,labels=TRUE,line=0,las=2,cex.axis=0.8)
+axis(side=1,labels=TRUE, las=2, cex.axis=0.6)
+rect(-around-0.5,0,-0.5,0.5,border="darkgrey")
+segments(-around-0.5,0,-around-0.5,0.5,col="white",lwd=2)
+for (i in c(-around:-1, 1:around)) { 
+  n<-c(n,mean(three$T[(three$Pos==i)]/three$Tot[(three$Pos==i)],na.rm=T)) 
+}
+points(c(-around:-1,1:around),n[2:((2*around)+1)],pch=20,col="red",type="b")
+
+# Misincorporation patterns
+# first from 5'-ends
+mut<-read.table(file=misincorp,sep="\t",header=TRUE, check.names = FALSE)
+nucl<-subset(mut, End=="5p")
+vec<-(1:lg)
+sumhit<-data.frame(cbind(vec,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
+insertion<-0
+for (i in vec)	{
+  sumhit[i,2]<-sum(nucl[(nucl$Pos == i), "G>A"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,3]<-sum(nucl[(nucl$Pos == i), "C>T"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,4]<-sum(nucl[(nucl$Pos == i), "A>G"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,5]<-sum(nucl[(nucl$Pos == i), "T>C"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,6]<-sum(nucl[(nucl$Pos == i), "A>C"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,7]<-sum(nucl[(nucl$Pos == i), "A>T"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,8]<-sum(nucl[(nucl$Pos == i), "C>G"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,9]<-sum(nucl[(nucl$Pos == i), "C>A"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,10]<-sum(nucl[(nucl$Pos == i), "T>G"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,11]<-sum(nucl[(nucl$Pos == i), "T>A"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,12]<-sum(nucl[(nucl$Pos == i), "G>C"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,13]<-sum(nucl[(nucl$Pos == i), "G>T"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,14]<-sum(nucl[(nucl$Pos == i), "A>-"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,15]<-sum(nucl[(nucl$Pos == i), "T>-"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,16]<-sum(nucl[(nucl$Pos == i), "C>-"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,17]<-sum(nucl[(nucl$Pos == i), "G>-"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,22]<-sum(nucl[(nucl$Pos == i), "S"])/sum(nucl[(nucl$Pos == i), "Tot"])
+  NTs <- c("A", "C", "G", "T")
+  for (j in seq(NTs)) {
+    insertion[j]<-sprintf("->%s", NTs[j])
+    sumhit[i,17 + j] <-sum(nucl[(nucl$Pos == i), insertion[j]]) /sum(nucl[(nucl$Pos == i), NTs])
+  }
+}
+# write a table for C>T frequencies
+write.table(sumhit[,3], file=paste(folder,"/5pCtoT_freq.txt",sep=""), sep="\t", quote=FALSE, col.names=c("pos\t5pC>T"))
+plot(sumhit$vec,sumhit$V4,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1,type="l",xlab="",ylab="",cex.lab=0.8,cex.axis=0.6,las=2,lab=c(lg,11,0.1),axes=FALSE)
+axis(side=1,labels=TRUE,las=2,cex.axis=0.8)
+axis(side=2,labels=TRUE,las=2,cex.axis=0.8)
+rect(0.5,-0.01,lg+0.5,ymax,border="darkgrey")
+segments(lg+0.5,-0.01,lg+0.5,ymax,col="white",lwd=2)
+mtext("Frequency",side=2,line=2.8,cex=0.7)
+lines(sumhit$vec,sumhit$V5,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V6,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V7,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V8,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V9,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V10,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V11,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V12,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+lines(sumhit$vec,sumhit$V13,xlim=c(1,lg),ylim=c(0,ymax),col="grey",lwd=1)
+# sofclipping in orange
+lines(sumhit$vec,sumhit$V22,xlim=c(1,lg),ylim=c(0,ymax),col="orange",lwd=1)
+# deletio in green
+lines(sumhit$vec,(sumhit$V14+sumhit$V15+sumhit$V16+sumhit$V17),xlim=c(1,lg),ylim=c(0,ymax),col="green",lwd=1)
+# insertions in pruple
+lines(sumhit$vec,(sumhit$V18+sumhit$V19+sumhit$V20+sumhit$V21),xlim=c(1,lg),ylim=c(0,ymax),col="purple",lwd=1)
+# G>A in blue
+lines(sumhit$vec,sumhit$V2,xlim=c(1,lg),ylim=c(0,ymax),col="blue",lwd=2)
+# C>T in red
+lines(sumhit$vec,sumhit$V3,xlim=c(1,lg),ylim=c(0,ymax),col="red",lwd=2)
+
+# then from 3'-ends
+nucl=subset(mut, End=="3p")
+vec<-(1:lg)
+sumhit<-data.frame(cbind(vec,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
+insertion<-0
+for (i in vec)	{
+  sumhit[i,2]<-sum(nucl[(nucl$Pos == i), "G>A"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,3]<-sum(nucl[(nucl$Pos == i), "C>T"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,4]<-sum(nucl[(nucl$Pos == i), "A>G"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,5]<-sum(nucl[(nucl$Pos == i), "T>C"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,6]<-sum(nucl[(nucl$Pos == i), "A>C"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,7]<-sum(nucl[(nucl$Pos == i), "A>T"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,8]<-sum(nucl[(nucl$Pos == i), "C>G"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,9]<-sum(nucl[(nucl$Pos == i), "C>A"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,10]<-sum(nucl[(nucl$Pos == i), "T>G"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,11]<-sum(nucl[(nucl$Pos == i), "T>A"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,12]<-sum(nucl[(nucl$Pos == i), "G>C"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,13]<-sum(nucl[(nucl$Pos == i), "G>T"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,14]<-sum(nucl[(nucl$Pos == i), "A>-"])/sum(nucl[(nucl$Pos == i), "A"])
+  sumhit[i,15]<-sum(nucl[(nucl$Pos == i), "T>-"])/sum(nucl[(nucl$Pos == i), "T"])
+  sumhit[i,16]<-sum(nucl[(nucl$Pos == i), "C>-"])/sum(nucl[(nucl$Pos == i), "C"])
+  sumhit[i,17]<-sum(nucl[(nucl$Pos == i), "G>-"])/sum(nucl[(nucl$Pos == i), "G"])
+  sumhit[i,22]<-sum(nucl[(nucl$Pos == i), "S"])/sum(nucl[(nucl$Pos == i), "Tot"])
+  NTs <- c("A", "C", "G", "T")
+  for (j in seq(NTs)) {
+    insertion[j]<-sprintf("->%s", NTs[j])
+    sumhit[i,17 + j] <-sum(nucl[(nucl$Pos == i), insertion[j]]) /sum(nucl[(nucl$Pos == i), NTs])
+  }
+}
+#  write a table for G>A frequencies
+write.table(sumhit[,2], file=paste(folder,"/5pGtoA_freq.txt",sep=""), sep="\t", quote=FALSE, col.names=c("pos\t5pG>A"))
+plot(-sumhit$vec,sumhit$V4,xlim=c(-lg, -1),ylim=c(0,ymax),col="grey",lwd=1,type="l",xlab="",ylab="",cex.lab=0.8,cex.axis=0.6,las=2,lab=c(lg,11,0.1),axes=FALSE)
+axis(side=1,labels=TRUE,las=2,cex.axis=0.8)
+axis(side=4,labels=TRUE,las=2,cex.axis=0.8)
+rect(-lg-0.5,-0.01,-0.5,ymax,border="darkgrey")
+segments(-lg-0.5,-0.01,-lg-0.5,ymax,col="white",lwd=2)
+lines(-sumhit$vec,sumhit$V5,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V6,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V7,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V8,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V9,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V10,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V11,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V12,col="grey",lwd=1)
+lines(-sumhit$vec,sumhit$V13,col="grey",lwd=1)
+# sofclipping in orange
+lines(-sumhit$vec,sumhit$V22,col="orange",lwd=1)
+# deletions in green
+lines(-sumhit$vec,(sumhit$V14+sumhit$V15+sumhit$V16+sumhit$V17),col="green",lwd=1)
+# insertions in pruple
+lines(-sumhit$vec,(sumhit$V18+sumhit$V19+sumhit$V20+sumhit$V21),col="purple",lwd=1)
+# G>A in blue
+lines(-sumhit$vec,sumhit$V2,col="blue",lwd=2)
+# C>T in red
+lines(-sumhit$vec,sumhit$V3,ylim=c(0,ymax),col="red",lwd=2)
+
+# graphics.off() calls dev.off() for all devices but doesn't return anything (avoid null device message)
+graphics.off()
+
diff --git a/mapdamage/__init__.py b/mapdamage/__init__.py
new file mode 100644
index 0000000..20c256b
--- /dev/null
+++ b/mapdamage/__init__.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+
+import parseoptions
+import seq
+import align
+import tables
+import composition
+import rscript
+import version
diff --git a/mapdamage/__init__.pyc b/mapdamage/__init__.pyc
new file mode 100644
index 0000000..fa46f3c
Binary files /dev/null and b/mapdamage/__init__.pyc differ
diff --git a/mapdamage/align.py b/mapdamage/align.py
new file mode 100644
index 0000000..fa32581
--- /dev/null
+++ b/mapdamage/align.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+import mapdamage
+import sys 
+
+
+# from Martin Kircher, description of CIGAR operations
+#O BAM Description
+#M 0 alignment match (can be a sequence match or mismatch)
+#I 1 insertion to the reference
+#D 2 deletion from the reference
+#N 3 skipped region from the reference
+#S 4 soft clipping (clipped sequences present in SEQ)
+#H 5 hard clipping (clipped sequences NOT present in SEQ)
+#P 6 padding (silent deletion from padded reference)
+#= 7 sequence match
+#X 8 sequence mismatch
+
+
+def align(cigarlist, seq, ref):
+
+  ins = parseCigar(cigarlist, 1)
+  lref=list(ref)
+  for nb,idx in ins:
+    lref[idx:idx] = ["-"]*nb 
+  ref = "".join(lref)
+  delet = parseCigar(cigarlist, 2)
+  lread = list(seq)
+  for nb,idx in delet:
+    lread[idx:idx] = ["-"]*nb 
+  seq = "".join(lread)
+  return(seq, ref)
+
+
+def getMis(read, seq, refseq, ref, length, tab, end):
+  if read.is_reverse:
+    std = '-'
+  else:
+    std = '+'
+  for (i, nt) in enumerate(zip(seq, refseq)):
+    if i > length-1:
+      continue
+    # print("pos %d compare %s and %s" % (i, nt[0], nt[1]))
+    mut = nt[1]+">"+nt[0] # mutation such as ref>read
+    if nt[1] in mapdamage.seq.letters:
+      # record base composition in the reference, only A, C, G, T
+      tab[ref][end][std]['Tot'][i] += 1
+      tab[ref][end][std][nt[1]][i] += 1
+      #print("+1 for %s pos %d std %s (%d)" % (nt[1], i, std, misincorp[ref][std][nt[1]][i] ))
+    try:
+      # discard identities
+      tab[ref][end][std][mut][i] += 1
+      #print("+1 for %s pos %d std %s (%d)" % (mut, i, std, misincorp[ref][std][mut][i] ))
+    except:
+      continue
+
+
+def parseCigar(cigarlist, op):
+  tlength = 0
+  coordinate = []
+  # for deletion, count matches, softclip and deletion 
+  if op == 2:
+      oplist = (0, 2, 4, 7, 8)
+  # for insertion and softclip count matches, softclip and insertions
+  else:
+      oplist = (0, 1, 4, 7, 8)
+  for operation,length in cigarlist:
+    if operation == op:
+        coordinate.append([length, tlength])
+    if operation in oplist: 
+        tlength+=length
+  return coordinate
+
+
+# to be simplified without ref as arguments and subtables
+
+def recordSoftClipping(sclip, read, ref, tab, lg):
+  #def update_table(left, right):
+  #      subtable = tab[ref][left]['-']['S']
+  #      for i in range(0,min(sclip[idx][0], lg)):
+  #        subtable[i] += 1
+  #      for i in range(min(len(read.seq)-sclip[idx][0], lg), min(len(read.seq), lg)):
+  #        tab[ref][right]['-']['S'][i] += 1
+
+  for idx in range(0,len(sclip)):
+    # for soft at left side
+    if idx == 0:
+      if read.is_reverse:
+        for i in range(0,min(sclip[idx][0], lg)):
+          tab[ref]['3p']['-']['S'][i] += 1
+        for i in range(min(len(read.seq)-sclip[idx][0], lg), min(len(read.seq), lg)):
+          tab[ref]['5p']['-']['S'][i] += 1
+      else: 
+        for i in range(0,min(sclip[idx][0], lg)):
+          tab[ref]['5p']['+']['S'][i] += 1
+        for i in range(min(len(read.seq)-sclip[idx][0], lg), min(len(read.seq), lg)):
+          tab[ref]['3p']['+']['S'][i] += 1
+    # for soft clip at right side
+    else:
+      if read.is_reverse:
+        for i in range(0,min(sclip[idx][0], lg)):
+          tab[ref]['5p']['-']['S'][i] += 1
+        for i in range(min(len(read.seq)-sclip[idx][0], lg), min(len(read.seq), lg)):
+          tab[ref]['3p']['-']['S'][i] += 1
+      else:
+        for i in range(0,min(sclip[idx][0], lg)):
+          tab[ref]['3p']['+']['S'][i] += 1
+        for i in range(min(len(read.seq)-sclip[idx][0], lg), min(len(read.seq), lg)):
+          tab[ref]['5p']['+']['S'][i] += 1
+
+
+
+
+
+
diff --git a/mapdamage/align.pyc b/mapdamage/align.pyc
new file mode 100644
index 0000000..1526a5d
Binary files /dev/null and b/mapdamage/align.pyc differ
diff --git a/mapdamage/composition.py b/mapdamage/composition.py
new file mode 100644
index 0000000..a35aa71
--- /dev/null
+++ b/mapdamage/composition.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import mapdamage
+
+def countRefComp(read, chrom, before, after, comp):
+
+  if read.is_reverse:
+    std = '-'
+  else:
+    std = '+'
+
+  for (i, nt) in enumerate(before):
+    if nt in mapdamage.seq.letters:
+      # record base composition in the reference
+      j = i - len(before)
+      comp[chrom]['5p'][std]['Tot'][j] += 1
+      comp[chrom]['5p'][std][nt][j] += 1
+      #print("bef +1 for %s pos %d std %s (%d)" % (nt, j, std, comp[chrom][std][nt][j] ))
+
+  for (i, nt) in enumerate(after):
+    if nt in mapdamage.seq.letters:
+      # record base composition in the reference
+      comp[chrom]['3p'][std]['Tot'][i+1] += 1
+      comp[chrom]['3p'][std][nt][i+1] += 1
+      #print("aft +1 for %s pos %d std %s (%d)" % (nt, i+1, std, comp[chrom][std][nt][i+1] ))
+
+
+def countReadComp(read, chrom, lg, comp):
+
+  if read.is_reverse:
+    std = '-'
+  else:
+    std = '+'
+  # compute base composition for base aligned only
+  for (i, nt) in enumerate(read.query):
+    if nt in mapdamage.seq.letters and i < lg:
+      # record base composition
+      comp[chrom]['5p'][std]['Tot'][i+1] += 1
+      comp[chrom]['5p'][std][nt][i+1] += 1
+      #print("+1 for %s pos %d std %s (%d)" % (nt, i+1, std, comp[chrom][std][nt][i+1]))
+
+  rev = read.query[::-1]
+  for (i, nt) in enumerate(rev):
+    if nt in mapdamage.seq.letters and i < lg:
+      j = (i * -1)
+      j -= 1
+      # record base composition
+      comp[chrom]['3p'][std]['Tot'][j] += 1
+      comp[chrom]['3p'][std][nt][j] += 1
+      #print("rev +1 for %s pos %d, i %d, std %s (%d Tot %d)" % (nt, j, i, std, comp[chrom][std][nt][j], comp[chrom][std]['Tot'][j]))
+
+  #print("end tot pos -70, + %d, std - %d" % (comp[chrom]['+']['Tot'][-70], comp[chrom]['-']['Tot'][-70]))
+
+
diff --git a/mapdamage/composition.pyc b/mapdamage/composition.pyc
new file mode 100644
index 0000000..859feb2
Binary files /dev/null and b/mapdamage/composition.pyc differ
diff --git a/mapdamage/parseoptions.py b/mapdamage/parseoptions.py
new file mode 100644
index 0000000..e20dc46
--- /dev/null
+++ b/mapdamage/parseoptions.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# -*- coding: ASCII -*-
+
+from optparse import OptionParser,OptionGroup, SUPPRESS_HELP
+import os
+import sys
+
+import mapdamage
+from mapdamage.version import __version__
+
+def fileExist(filename):
+
+  if os.path.exists(filename) and not os.path.isdir(filename):
+    return True
+  else:
+    sys.stderr.write("Error: '%s' is not a valid file\n" % (filename))
+    return None
+
+
+def checkModule(mod):
+
+  module = mod.keys()
+  url = mod[module[0]]
+  try:
+    __import__(module[0])
+
+  except ImportError, e:
+    sys.stderr.write("Error: Could not import required module '%s':\n\t- %s\n" % (module[0],e))
+    sys.stderr.write("       If module is not installed, please download from '%s'.\n" % (url,))
+    sys.stderr.write("       A local install may be performed using the following command:\n")
+    sys.stderr.write("       $ python setup.py install --user\n\n")
+    return None
+
+  return True
+
+
+
+def whereis(program):
+  for path in os.environ.get('PATH', '').split(':'):
+    if os.path.exists(os.path.join(path, program)) and \
+        not os.path.isdir(os.path.join(path, program)):
+      return os.path.join(path, program)
+  return None
+
+def checkPyVersion():
+  req_version = (2,5)
+  cur_version = sys.version_info
+
+  if cur_version >= req_version:
+   return True
+  else:
+   sys.stderr.write("Your Python interpreter is too old. Please consider upgrading to at least %d.%d\n" % (req_version[0], req_version[1]))
+   return None
+    
+
+def options(args):
+
+  parser = OptionParser("%prog [options] -i BAMfile -r reference.fasta", version=__version__, \
+          epilog="report bugs to aginolhac@snm.ku.dk")
+  args = OptionGroup(parser, "Mandatory arguments")
+  args.add_option("-r", "--reference", help="Reference file in FASTA format",\
+        action="store", dest="ref")
+  args.add_option("-i", "--input", help="SAM/BAM file, must contain a valid header", \
+        action="store", type="string", dest="filename")
+  parser.add_option_group(args)
+  group = OptionGroup(parser, "Options")
+  group.add_option("-l","--length",dest="length",help="read length, in nucleotides to consider [%default]",\
+          type = int, default=70,action="store")
+  group.add_option("-a","--around",dest="around",help="nucleotides to retrieve before/after reads [%default]",\
+          type = int, default=10,action="store")
+  #group.add_option("-p","--pipe",dest="pipe",help="Read BAM from a pipe, ie. the standard input",\
+  #      default=False,action="store_true")
+  group.add_option("-d", "--folder", help="folder name to store results [results_FILENAME]", \
+        action="store", type="string", dest="folder")
+  group.add_option("-f","--fasta",dest="fasta",help="Write alignments in a FASTA file",\
+        default=False,action="store_true")
+  group.add_option("--plotonly",dest="plotonly",help="Run only plotting from a valid result folder",\
+        default=False,action="store_true")
+  group.add_option("-q","--quiet",dest="quiet",help="Disable any output to stdout",\
+        default=False,action="store_true")
+  group.add_option("-v","--verbose",dest="verbose",help="Display progression information during parsing",\
+        default=False,action="store_true")
+  group.add_option("--noplot",dest="nor",help=SUPPRESS_HELP, default=False, action="store_true")
+
+  parser.add_option_group(group)
+  group2 = OptionGroup(parser, "Option graphics")
+  group2.add_option("-y","--ymax",dest="ymax",\
+          help="graphical y-axis limit for nucleotide misincorporation frequencies [%default]", type = float, default=0.3,action="store")
+  group2.add_option("-m","--readplot",dest="readplot",\
+          help="read length, in nucleotides, considered for plotting nucleotide misincorporations [%default]",\
+          type = int, default=25,action="store")
+  group2.add_option("-b","--refplot",dest="refplot",\
+          help="the number of reference nucleotides to consider for ploting base composition in the region located upstream and downstream of every read [%default]",\
+          type= int, default=10,action="store")
+  group2.add_option("-t","--title",dest="title",\
+          help="title used for both graph and filename [%default]",\
+          type = "string", default="plot",action="store")
+
+  parser.add_option_group(group2)
+  (options, args) = parser.parse_args()
+
+  # check python version
+  if not checkPyVersion():
+    return None
+
+  # check mandatory arguments
+  if not options.plotonly and not options.filename:   # if filename is not given
+    parser.error('SAM/BAM file not given')
+  if not options.plotonly and not options.ref:   # if filename is not given
+    parser.error('Reference file not given')
+  if not options.plotonly and (not fileExist(options.filename) or not fileExist(options.ref)):
+    return None
+
+  if options.plotonly and not options.folder:
+    parser.error('Folder not provided, required with --plotonly')
+
+  # check options 
+  if options.length < 0:
+    parser.error('length (-l) must be a positive integrer')
+  if options.around < 0:
+    parser.error('around (-a) must be a positive integrer')
+  if options.ymax <= 0 or options.ymax > 1:
+    parser.error('ymax (-b) must be an integrer beetween 0 and 1')
+  if options.readplot < 0:
+    parser.error('readplot (-m) must be a positive integrer')
+  if options.refplot < 0:
+    parser.error('refplot (-b) must be a positive integrer')
+  if options.refplot > options.around:
+    parser.error('refplot (-b) must be inferior to around (-a)')
+  if options.readplot > options.length:
+    parser.error('readplot (-m) must be inferior to length (-l)')
+
+  # check folder
+  if not options.folder and options.filename:
+    options.folder = "results_"+os.path.splitext(os.path.basename(options.filename))[0]
+  if os.path.isdir(options.folder):
+    if not options.quiet and not options.plotonly:
+      print("Warning, %s already exists" % options.folder)
+    if options.plotonly:
+      if not fileExist(options.folder+"/dnacomp.txt") or not fileExist(options.folder+"/misincorporation.txt"):
+        parser.error('folder %s is not a valid result folder' % options.folder)
+  else:
+    os.makedirs(options.folder, mode = 0700)
+    if options.plotonly:
+      sys.stderr.write("Error, %s does not exist while plot only was used\n" % options.folder)
+      return None
+
+
+  # check if the Rscript executable is present on the system
+  if not whereis('Rscript'):
+    print("Warning, Rscript is not in your PATH, plotting is disabled")
+    options.nor = True
+
+  return options
+
diff --git a/mapdamage/parseoptions.pyc b/mapdamage/parseoptions.pyc
new file mode 100644
index 0000000..a78880c
Binary files /dev/null and b/mapdamage/parseoptions.pyc differ
diff --git a/mapdamage/rscript.py b/mapdamage/rscript.py
new file mode 100644
index 0000000..f970c5a
--- /dev/null
+++ b/mapdamage/rscript.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+from mapdamage.version import __version__
+
+def plot(op):
+
+  # comp<-args[1]
+  # pdfout<-args[2]
+  # around<-as.numeric(args[3])
+  # misincorp<-args[4]
+  # lg<-as.numeric(args[5])
+  # ymax<-as.numeric(args[6])
+  # folder<-args[7]
+  # title<-args[8]
+  # version<-args[9]
+
+  fmut = op.folder+"/"+"misincorporation.txt"
+  fcomp = op.folder+"/"+"dnacomp.txt"
+  title = op.folder+"/"+"Fragmisincorporation_"+op.title+".pdf"
+
+#os.path.dirname(__file__)
+  script = os.path.join(os.path.dirname(__file__), "Rscripts/mapDamage.R")
+  call = ["Rscript", script, fcomp, title, op.refplot, fmut, op.readplot, op.ymax, op.folder, op.title, __version__]
+  code = subprocess.call(map(str, call))
+  #print " ".join(map(str, call))
+
+  if code == 0:
+    print("pdf %s generated using R" % title)
+    return 0
+  else:
+    print("Error: plotting with R failed")
+    return 0
+      
+
+
diff --git a/mapdamage/rscript.pyc b/mapdamage/rscript.pyc
new file mode 100644
index 0000000..9a56abb
Binary files /dev/null and b/mapdamage/rscript.pyc differ
diff --git a/mapdamage/seq.py b/mapdamage/seq.py
new file mode 100644
index 0000000..52b603d
--- /dev/null
+++ b/mapdamage/seq.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import string
+
+# from Martin Kircher, to complement DNA
+table = string.maketrans('TGCAMRWSYKVHDBtgcamrwsykvhdb','ACGTKYWSRMBDHVacgtkywsrmbdhv')
+
+letters = ('A','C','G','T','Tot')
+mutations = ('G>A','C>T','A>G','T>C','A>C','A>T','C>G','C>A','T>G',\
+        'T>A','G>C','G>T','A>-','T>-','C>-','G>-','->A','->T','->C','->G','S') 
+header=letters+mutations
+
+
diff --git a/mapdamage/seq.pyc b/mapdamage/seq.pyc
new file mode 100644
index 0000000..26e23c3
Binary files /dev/null and b/mapdamage/seq.pyc differ
diff --git a/mapdamage/tables.py b/mapdamage/tables.py
new file mode 100644
index 0000000..dba3a3d
--- /dev/null
+++ b/mapdamage/tables.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+import mapdamage
+import sys
+import collections
+from mapdamage.version import __version__
+
+
+class recursivedefaultdict(collections.defaultdict):
+  def __init__(self):
+    self.default_factory = type(self) 
+
+
+
+def initializeMut(tab, ref, lg):
+    
+  tab = recursivedefaultdict() 
+  for contig in ref:
+    for end in ('5p','3p'):
+      for std in ('+','-'):
+        for mut in mapdamage.seq.header:
+          tab[contig][end][std][mut] = collections.defaultdict(int)
+  
+  return(tab)
+
+def printMut(mut, op, out):
+  # print header
+  out.write("# table produced by mapDamage version %s\n" % __version__)
+  out.write("# using mapped file %s and %s as reference file\n" % (op.filename, op.ref))
+  out.write("# Chr: reference from sam/bam header, End: from which termini of DNA sequences, Std: strand of reads\n")
+  out.write("Chr\tEnd\tStd\tPos\t%s\t%s\n" % ("\t".join(mapdamage.seq.letters),"\t".join(mapdamage.seq.mutations)))
+  for ref in sorted(mut):
+    for end in mut[ref].keys():
+      for std in mut[ref][end].keys():
+        for i in range(op.length):
+          out.write("%s\t%s\t%s\t%d" % (ref, end, std, i+1))
+          for mis in mapdamage.seq.header:
+            out.write("\t%d" % mut[ref][end][std][mis][i])
+          out.write("\n")
+
+
+def initializeComp(tab, ref, around,lg):
+    
+  tab = recursivedefaultdict() 
+  for contig in ref:
+    for end in ('5p','3p'):
+      for std in ('+','-'):
+        for letters in mapdamage.seq.letters:
+          tab[contig][end][std][letters] = collections.defaultdict(int)
+
+  return(tab)
+
+
+def printComp(comp, op, out):
+  # print header
+  out.write("# table produced by mapDamage version %s\n" % __version__)
+  out.write("# using mapped file %s and %s as reference file\n" % (op.filename, op.ref))
+  out.write("# Chr: reference from sam/bam header, End: from which termini of DNA sequences, Std: strand of reads\n")
+  out.write("Chr\tEnd\tStd\tPos\t%s\n" % ("\t".join(mapdamage.seq.letters)))
+  for ref in sorted(comp):
+    for end in comp[ref].keys():
+      for std in comp[ref][end].keys():
+        if end == '5p':
+          for i in range((-1*op.around), (op.length+1)):
+            out.write("%s\t%s\t%s\t%d" % (ref, end, std, i))
+            for base in mapdamage.seq.letters:
+              out.write("\t%d" % comp[ref][end][std][base][i])
+            out.write("\n")
+        else:
+          for i in range((-1*op.length), (op.around+1)):
+            out.write("%s\t%s\t%s\t%d" % (ref, end, std, i))
+            for base in mapdamage.seq.letters:
+              out.write("\t%d" % comp[ref][end][std][base][i])
+            out.write("\n")
+
+
+
diff --git a/mapdamage/tables.pyc b/mapdamage/tables.pyc
new file mode 100644
index 0000000..5212815
Binary files /dev/null and b/mapdamage/tables.pyc differ
diff --git a/mapdamage/version.py b/mapdamage/version.py
new file mode 100644
index 0000000..fd7191a
--- /dev/null
+++ b/mapdamage/version.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+
+__version__ = "0.3.7"
+
diff --git a/mapdamage/version.pyc b/mapdamage/version.pyc
new file mode 100644
index 0000000..c0fb223
Binary files /dev/null and b/mapdamage/version.pyc differ
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..4422efa
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+from distutils.core import setup
+
+setup(
+    name='mapdamage',
+    version='0.3.7',
+    author='Aurelien Ginolhac',
+    author_email='aginolhac@snm.ku.dk',
+    packages=['mapdamage'],
+    package_data={'mapdamage': ['Rscripts/*.R']},
+    scripts=['bin/mapDamage'],
+    url='http://',
+    license='LICENSE.txt',
+    description='mapDamage is a python script that tracks DNA damage pattern among ancient DNA sequencing reads generated by Next-Generation Sequencing platforms',
+    long_description=open('README.txt').read(),
+)