diff --git a/.gitignore b/.gitignore
index 1e536d7..07db275 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 # Autotools cruft (NEVER check this into VCS)
 Makefile
 Makefile.in
+.version
 aclocal.m4
 .deps/
 .dirstamp
@@ -17,37 +18,36 @@ configure
 depcomp
 install-sh
 missing
+py-compile
+ar-lib
+/shorah
 
 # generated tarball
 shorah*.tar.bz2
 
 # executables
 b2w
-contain
 diri_sampler
 fil
-freqEst
 
 # samtools lib
 libbam.a
 
-# python/perl scripts are generated by Autoconf from *.{py,pl}.in
-# files, replacing the shebang in the process
-fas2read.pl
-src/scripts/amplian.py
-src/scripts/dec.py
-src/scripts/mm.py
-src/scripts/shorah.py
-src/scripts/snv.py
-
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
-py-compile
 
 # OS X temporary files that should never be committed
 .DS_Store
 *.swp
 *.lock
 profile
+
+# other intermediate stuff
+build
+.eggs
+src/ShoRAH.egg-info/
+
+# shorah output
+shorah.log
diff --git a/.travis.yml b/.travis.yml
index 699457a..a6bc46a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,17 @@
-language: c cpp
+language: c cpp python
+
+python:
+  - "3.6"
 
 # GSL and Autotools needed
 before_install:
   - sudo add-apt-repository ppa:ondrej/autotools -y
   - sudo apt-get update -qq
   - sudo apt-get install -y gsl-bin libgsl0-dev autoconf automake-1.15 pkg-config m4
+  - source ~/virtualenv/python3.6/bin/activate
 
 compiler:
   - gcc
 
 before_script: autoreconf -vif
-script: ./configure && make -j1 && make distcheck
+script: ./configure && make -j1 distcheck
diff --git a/CHANGELOG b/CHANGELOG.md
similarity index 100%
rename from CHANGELOG
rename to CHANGELOG.md
diff --git a/Makefile.am b/Makefile.am
index d96df09..3e0ef0f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,48 +1,38 @@
 # install scripts in ${bindir}
 bin_SCRIPTS = \
-	src/scripts/amplian.py \
-	src/scripts/dec.py \
-	src/scripts/mm.py \
-	src/scripts/shorah.py \
-	src/scripts/snv.py \
-	fas2read.pl
+	shorah
 CLEANFILES = $(bin_SCRIPTS)
 EXTRA_DIST = \
-	src/scripts/amplian.py.in \
-	src/scripts/dec.py.in \
-	src/scripts/mm.py.in \
-	src/scripts/shorah.py.in \
-	src/scripts/snv.py.in \
-	src/scripts/fas2read.pl.in \
-	src/samtools/COPYING
+	.version \
+	meson.build \
+	setup.py \
+	src/meson.build \
+	src/cpp/meson.build \
+	src/samtools/COPYING \
+	src/samtools/meson.build
+BUILT_SOURCES = .version
 
-fas2read.pl: src/scripts/fas2read.pl.in Makefile
-	$(SED) 's,[@]perldir[@],$(perldir),g' $< > $@
+shorah: $(top_srcdir)/src/shorah/__main__.py Makefile
+	$(SED) 's,#!/usr/bin/env python3,#!/usr/bin/env $(PYTHON),g' $(top_srcdir)/src/shorah/__main__.py > $@
 
 # actual C/C++ programs doing the real computation
-bin_PROGRAMS = diri_sampler contain freqEst b2w fil
+bin_PROGRAMS = diri_sampler b2w fil
 
 diri_sampler_SOURCES = \
-	src/shorah/data_structures.hpp \
-	src/shorah/dpm_sampler.hpp \
-	src/shorah/dpm_sampler.cpp
+	src/cpp/data_structures.hpp \
+	src/cpp/dpm_sampler.hpp \
+	src/cpp/dpm_sampler.cpp
 diri_sampler_CPPFLAGS = $(GSL_CFLAGS)
 diri_sampler_LDADD = $(GSL_LIBS)
 
-contain_SOURCES = \
-	src/shorah/contain.cpp
-
-freqEst_SOURCES = \
-	src/shorah/freqEst.cpp
-
 b2w_SOURCES = \
-	src/shorah/b2w.cpp
+	src/cpp/b2w.cpp
 b2w_CXXFLAGS = $(PTHREAD_CFLAGS)
 b2w_CPPFLAGS = -I$(top_srcdir)/src/samtools
 b2w_LDADD = libbam.a $(ZLIB_LIBS) $(PTHREAD_LIBS)
 
 fil_SOURCES = \
-	src/shorah/fil.cpp
+	src/cpp/fil.cpp
 fil_CXXFLAGS = $(PTHREAD_CFLAGS)
 fil_CPPFLAGS = $(GSL_CFLAGS) -I$(top_srcdir)/src/samtools
 fil_LDADD = libbam.a $(ZLIB_LIBS) $(GSL_LIBS) $(PTHREAD_LIBS)
@@ -85,19 +75,14 @@ libbam_a_SOURCES = \
 libbam_a_CPPFLAGS = $(ZLIB_CFLAGS) -D_USE_KNETFILE
 libbam_a_CFLAGS = $(PTHREAD_CFLAGS)
 
-# perl modules
-dist_perl_DATA = \
-	src/perl/DNA.pm \
-	src/perl/NikoUtil.pm \
-	src/perl/ReadUtil.pm \
-	src/perl/Util.pm
-
 # python modules
-python_PYTHON = \
-	src/python/shorah_dec.py \
-	src/python/shorah_matching.py \
-	src/python/shorah_mm.py \
-	src/python/shorah_snv.py
+dist_pkgpython_PYTHON = \
+	src/shorah/__main__.py \
+	src/shorah/amplicon.py \
+	src/shorah/cli.py \
+	src/shorah/shorah_snv.py \
+	src/shorah/shotgun.py
+pkgpython_PYTHON = .version
 
 # example files
 dist_pkgdata_DATA = \
@@ -110,3 +95,9 @@ dist_ampliconpkgdata_DATA = \
 	examples/amplicon_test/ampli_sorted.bam \
 	examples/amplicon_test/amplicon_reads.fastq \
 	examples/amplicon_test/reference.fasta
+
+# version handling
+$(top_srcdir)/.version:
+	echo $(VERSION) > $@-t && mv $@-t $@
+dist-hook:
+	echo $(VERSION) > $(distdir)/.tarball-version
diff --git a/README.md b/README.md
index 1883942..ef828fe 100755
--- a/README.md
+++ b/README.md
@@ -132,10 +132,10 @@ These can be run one after the other, or one can invoke `shorah.py`, that runs
 the whole process from bam file to frequency estimation and SNV calling.
 
 ## Coding style
-All changes to the C++ code in `src/shorah` should always be formatted according to the included `.clang-format` style by doing
+All changes to the C++ code in `src/cpp` should always be formatted according to the included `.clang-format` style by doing
 
-	clang-format -style=file -i src/shorah/*.[ch]pp
+	clang-format -style=file -i src/cpp/*.[ch]pp
 
 in the root of the repository.
 
-All changes to the python code in `src/python` and `src/scripts` should always be formatted conforming to the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guide. To this end, we advise to use [autopep8](https://pypi.python.org/pypi/autopep8). 
+All changes to the python code in `src/shorah` should always be formatted conforming to the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guide. To this end, we advise to use [autopep8](https://pypi.python.org/pypi/autopep8). 
diff --git a/build-aux/git-version-gen b/build-aux/git-version-gen
new file mode 100755
index 0000000..6d073fc
--- /dev/null
+++ b/build-aux/git-version-gen
@@ -0,0 +1,227 @@
+#!/bin/sh
+# Print a version string.
+scriptversion=2018-03-07.03; # UTC
+
+# Copyright (C) 2007-2018 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# This script is derived from GIT-VERSION-GEN from GIT: https://git-scm.com/.
+# It may be run two ways:
+# - from a git repository in which the "git describe" command below
+#   produces useful output (thus requiring at least one signed tag)
+# - from a non-git-repo directory containing a .tarball-version file, which
+#   presumes this script is invoked like "./git-version-gen .tarball-version".
+
+# In order to use intra-version strings in your project, you will need two
+# separate generated version string files:
+#
+# .tarball-version - present only in a distribution tarball, and not in
+#   a checked-out repository.  Created with contents that were learned at
+#   the last time autoconf was run, and used by git-version-gen.  Must not
+#   be present in either $(srcdir) or $(builddir) for git-version-gen to
+#   give accurate answers during normal development with a checked out tree,
+#   but must be present in a tarball when there is no version control system.
+#   Therefore, it cannot be used in any dependencies.  GNUmakefile has
+#   hooks to force a reconfigure at distribution time to get the value
+#   correct, without penalizing normal development with extra reconfigures.
+#
+# .version - present in a checked-out repository and in a distribution
+#   tarball.  Usable in dependencies, particularly for files that don't
+#   want to depend on config.h but do want to track version changes.
+#   Delete this file prior to any autoconf run where you want to rebuild
+#   files to pick up a version string change; and leave it stale to
+#   minimize rebuild time after unrelated changes to configure sources.
+#
+# As with any generated file in a VC'd directory, you should add
+# /.version to .gitignore, so that you don't accidentally commit it.
+# .tarball-version is never generated in a VC'd directory, so needn't
+# be listed there.
+#
+# Use the following line in your configure.ac, so that $(VERSION) will
+# automatically be up-to-date each time configure is run (and note that
+# since configure.ac no longer includes a version string, Makefile rules
+# should not depend on configure.ac for version updates).
+#
+# AC_INIT([GNU project],
+#         m4_esyscmd([build-aux/git-version-gen .tarball-version]),
+#         [bug-project@example])
+#
+# Then use the following lines in your Makefile.am, so that .version
+# will be present for dependencies, and so that .version and
+# .tarball-version will exist in distribution tarballs.
+#
+# EXTRA_DIST = $(top_srcdir)/.version
+# BUILT_SOURCES = $(top_srcdir)/.version
+# $(top_srcdir)/.version:
+#	echo $(VERSION) > $@-t && mv $@-t $@
+# dist-hook:
+#	echo $(VERSION) > $(distdir)/.tarball-version
+
+
+me=$0
+
+version="git-version-gen $scriptversion
+
+Copyright 2011 Free Software Foundation, Inc.
+There is NO warranty.  You may redistribute this software
+under the terms of the GNU General Public License.
+For more information about these matters, see the files named COPYING."
+
+usage="\
+Usage: $me [OPTION]... \$srcdir/.tarball-version [TAG-NORMALIZATION-SED-SCRIPT]
+Print a version string.
+
+Options:
+
+   --prefix PREFIX    prefix of git tags (default 'v')
+   --fallback VERSION
+                      fallback version to use if \"git --version\" fails
+
+   --help             display this help and exit
+   --version          output version information and exit
+
+Running without arguments will suffice in most cases."
+
+prefix=v
+fallback=
+
+while test $# -gt 0; do
+  case $1 in
+    --help) echo "$usage"; exit 0;;
+    --version) echo "$version"; exit 0;;
+    --prefix) shift; prefix=${1?};;
+    --fallback) shift; fallback=${1?};;
+    -*)
+      echo "$0: Unknown option '$1'." >&2
+      echo "$0: Try '--help' for more information." >&2
+      exit 1;;
+    *)
+      if test "x$tarball_version_file" = x; then
+        tarball_version_file="$1"
+      elif test "x$tag_sed_script" = x; then
+        tag_sed_script="$1"
+      else
+        echo "$0: extra non-option argument '$1'." >&2
+        exit 1
+      fi;;
+  esac
+  shift
+done
+
+if test "x$tarball_version_file" = x; then
+    echo "$usage"
+    exit 1
+fi
+
+tag_sed_script="${tag_sed_script:-s/x/x/}"
+
+nl='
+'
+
+# Avoid meddling by environment variable of the same name.
+v=
+v_from_git=
+
+# First see if there is a tarball-only version file.
+# then try "git describe", then default.
+if test -f $tarball_version_file
+then
+    v=`cat $tarball_version_file` || v=
+    case $v in
+        *$nl*) v= ;; # reject multi-line output
+        [0-9]*) ;;
+        *) v= ;;
+    esac
+    test "x$v" = x \
+        && echo "$0: WARNING: $tarball_version_file is missing or damaged" 1>&2
+fi
+
+if test "x$v" != x
+then
+    : # use $v
+# Otherwise, if there is at least one git commit involving the working
+# directory, and "git describe" output looks sensible, use that to
+# derive a version string.
+elif test "`git log -1 --pretty=format:x . 2>&1`" = x \
+    && v=`git describe --abbrev=4 --match="$prefix*" HEAD 2>/dev/null \
+          || git describe --abbrev=4 HEAD 2>/dev/null` \
+    && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \
+    && case $v in
+         $prefix[0-9]*) ;;
+         *) (exit 1) ;;
+       esac
+then
+    # Is this a new git that lists number of commits since the last
+    # tag or the previous older version that did not?
+    #   Newer: v6.10-77-g0f8faeb
+    #   Older: v6.10-g0f8faeb
+    vprefix=`expr "X$v" : 'X\(.*\)-g[^-]*$'` || vprefix=$v
+    case $vprefix in
+        *-*) : git describe is probably okay three part flavor ;;
+        *)
+            : git describe is older two part flavor
+            # Recreate the number of commits and rewrite such that the
+            # result is the same as if we were using the newer version
+            # of git describe.
+            vtag=`echo "$v" | sed 's/-.*//'`
+            commit_list=`git rev-list "$vtag"..HEAD 2>/dev/null` \
+                || { commit_list=failed;
+                     echo "$0: WARNING: git rev-list failed" 1>&2; }
+            numcommits=`echo "$commit_list" | wc -l`
+            v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`;
+            test "$commit_list" = failed && v=UNKNOWN
+            ;;
+    esac
+
+    # Change the penultimate "-" to ".", for version-comparing tools.
+    # Remove the "g" to save a byte.
+    v=`echo "$v" | sed 's/-\([^-]*\)-g\([^-]*\)$/.\1-\2/'`;
+    v_from_git=1
+elif test "x$fallback" = x || git --version >/dev/null 2>&1; then
+    v=UNKNOWN
+else
+    v=$fallback
+fi
+
+v=`echo "$v" |sed "s/^$prefix//"`
+
+# Test whether to append the "-dirty" suffix only if the version
+# string we're using came from git.  I.e., skip the test if it's "UNKNOWN"
+# or if it came from .tarball-version.
+if test "x$v_from_git" != x; then
+  # Don't declare a version "dirty" merely because a timestamp has changed.
+  git update-index --refresh > /dev/null 2>&1
+
+  dirty=`exec 2>/dev/null;git diff-index --name-only HEAD` || dirty=
+  case "$dirty" in
+      '') ;;
+      *) # Append the suffix only if there isn't one already.
+          case $v in
+            *-dirty) ;;
+            *) v="$v-dirty" ;;
+          esac ;;
+  esac
+fi
+
+# Omit the trailing newline, so that m4_esyscmd can use the result directly.
+printf %s "$v"
+
+# Local variables:
+# eval: (add-hook 'before-save-hook 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/configure.ac b/configure.ac
index 058fcc1..ed793c5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -21,10 +21,10 @@ dnl           https://github.com/cbg-ethz/shorah/releases
 dnl Initialise Autoconf
 AC_PREREQ([2.69])
 AC_INIT(
-	[ShoRAH],
-	[1.1.1],
+	[shorah],
+	m4_esyscmd([build-aux/git-version-gen .tarball-version]),
 	[osvaldo.zagordi@gmail.com])
-AC_CONFIG_SRCDIR([src/shorah/freqEst.cpp])
+AC_CONFIG_SRCDIR([src/cpp/dpm_sampler.cpp])
 AC_CONFIG_MACRO_DIR([m4])
 
 
@@ -64,6 +64,7 @@ AC_PROG_SED
 AC_PROG_CC
 AC_PROG_CXX
 AC_PROG_RANLIB
+AM_PROG_AR
 
 AC_LANG([C])
 AX_COMPILER_VENDOR
@@ -142,23 +143,14 @@ AM_SILENT_RULES([yes])
 dnl ======================================
 dnl Initialise Perl and Python directories
 dnl ======================================
-AM_PATH_PYTHON
-AC_ARG_VAR([perldir], [path in which to install perl modules, '${libdir}/perl' by default])
-: ${perldir="${libdir}/perl"}
+AM_PATH_PYTHON([3.5])
 
 
 
 dnl ========
 dnl Finalise
 dnl ========
-AC_CONFIG_FILES([
-	Makefile
-	src/scripts/amplian.py
-	src/scripts/dec.py
-	src/scripts/mm.py
-	src/scripts/shorah.py
-	src/scripts/snv.py
-	])
+AC_CONFIG_FILES([Makefile])
 AC_OUTPUT
 
 
@@ -169,12 +161,15 @@ dnl ============================
 AC_MSG_RESULT([
 	${PACKAGE_NAME} ${VERSION}
 
-	CC:         ${CC} (${ax_cv_c_compiler_vendor}, ${ax_cv_c_compiler_version})
-	CFLAGS:     ${CFLAGS}
+	CC : ...................... ${CC} (${ax_cv_c_compiler_vendor}, ${ax_cv_c_compiler_version})
+	CFLAGS : .................. ${CFLAGS}
+
+	CXX : ..................... ${CXX} (${ax_cv_cxx_compiler_vendor}, ${ax_cv_cxx_compiler_version})
+	CXXFLAGS : ................ ${CXXFLAGS}
 
-	CXX:        ${CXX} (${ax_cv_cxx_compiler_vendor}, ${ax_cv_cxx_compiler_version})
-	CXXFLAGS:   ${CXXFLAGS}
+	CPPFLAGS : ................ ${CPPFLAGS}
+	LDFLAGS : ................. ${LDFLAGS}
 
-	CPPFLAGS:   ${CPPFLAGS}
-	LDFLAGS:    ${LDFLAGS}
+	PYTHON : .................. ${PYTHON}
+	Python version : .......... ${PYTHON_VERSION}
 ])
diff --git a/meson.build b/meson.build
new file mode 100644
index 0000000..e2ae37c
--- /dev/null
+++ b/meson.build
@@ -0,0 +1,18 @@
+project('shorah',
+  ['c', 'cpp'],
+  version : '1.9.95',
+  default_options : [
+    'buildtype=release',
+    'default_library=shared',
+    'warning_level=3',
+    'b_ndebug=if-release'],
+  license : 'GPL-2+')
+
+# info
+as_version = meson.project_version()
+ver_arr = as_version.split('.')
+as_major_version = ver_arr[0]
+as_minor_version = ver_arr[1]
+as_micro_version = ver_arr[2]
+
+subdir('src')
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ae76819
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+import glob
+import sys
+import os
+import shutil
+
+from setuptools import setup, find_packages
+from setuptools.command.test import test as TestCommand
+from setuptools.command.develop import develop
+from setuptools.command.install import install
+
+def move_files():
+    """Identify the build directory and copy python files and executables into src/shorah."""
+    try:
+        main_file = glob.glob('*/__main__.py')[0]
+    except IndexError:
+        sys.exit('No __main__.py found. Build first.')
+    build_dir = os.path.dirname(main_file)
+    for py_file in glob.glob('%s/*py' % build_dir):
+        shutil.copy(py_file, 'src/shorah/')
+    for exe in ['b2w', 'diri_sampler', 'fil']:
+        ppp = shutil.copy('%s/src/cpp/%s' % (build_dir, exe), 'src/shorah/bin/')
+        print(ppp)
+
+class CustomDevelop(develop):
+    """Subclassing develop to install files in the correct location."""
+    def run(self):
+        move_files()
+        develop.run(self)
+
+
+class CustomInstall(install):
+    """Subclassing develop to install files in the correct location."""
+    def run(self):
+        move_files()
+        install.run(self)
+
+class PyTest(TestCommand):
+    user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")]
+
+    def initialize_options(self):
+        TestCommand.initialize_options(self)
+        self.pytest_args = []
+
+    def finalize_options(self):
+        TestCommand.finalize_options(self)
+        self.test_args = []
+        self.test_suite = True
+
+    def run_tests(self):
+        #import here, cause outside the eggs aren't loaded
+        import pytest
+        errno = pytest.main(self.pytest_args)
+        sys.exit(errno)
+
+
+setup(
+    use_scm_version=True,
+    setup_requires=['setuptools_scm', 'setuptools_scm_git_archive'],
+    install_requires=['setuptools_scm'],
+    tests_require=['pytest', 'flake8', 'pep257'],
+    cmdclass={
+        'test': PyTest,
+        'install': CustomInstall,
+        'develop': CustomDevelop
+        },
+    name='ShoRAH',
+    description='SHOrt Reads Assembly into Haplotypes',
+    url='http://github.com/cbg-ethz/shorah',
+    # author='Osvaldo Zagordi',
+    # author_email='firstname.lastname@gmail.com',
+    packages=find_packages('src'),  # include all packages under src
+    package_dir={'': 'src'},  # tell setuptools packages are under src
+    entry_points={
+        'console_scripts': ['shorah = shorah.cli:main']
+    },
+    license='GPL 3.0',
+    long_description='''
+    ShoRAH is designed to analyse genetically heterogeneous samples. It provides error correction,
+    local haplotype reconstruction, and estimation of the frequency of the different genetic variants
+    present in a mixed sample.
+    ''',
+    project_urls={
+        'Documentation': 'http://cbg-ethz.github.io/shorah',
+        'Source': 'https://github.com/cbg-ethz/shorah',
+    },
+    classifiers=[
+        # How mature is this project? Common values are
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+        'Development Status :: 5 - Production/Stable',
+        # Indicate who your project is intended for
+        'Intended Audience :: Developers',
+        'Topic :: Scientific/Engineering :: Bio-Informatics',
+        'Topic :: Scientific/Engineering :: Medical Science Apps.',
+        # Pick your license as you wish (should match "license" above)
+        'License :: GPL 3.0',
+        # Specify the Python versions you support here. In particular, ensure
+        # that you indicate whether you support Python 2, Python 3 or both.
+        # 'Programming Language :: Python :: 2',
+        # 'Programming Language :: Python :: 2.6',
+        # 'Programming Language :: Python :: 2.7',
+        # 'Programming Language :: Python :: 3',
+        # 'Programming Language :: Python :: 3.2',
+        # 'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6']
+)
diff --git a/src/shorah/b2w.cpp b/src/cpp/b2w.cpp
similarity index 98%
rename from src/shorah/b2w.cpp
rename to src/cpp/b2w.cpp
index 2733c3a..4a50de8 100644
--- a/src/shorah/b2w.cpp
+++ b/src/cpp/b2w.cpp
@@ -39,6 +39,8 @@ adapted from samtools/calDep.c
 #include "faidx.h"
 #include "sam.h"
 
+#define UNUSED(expr) (void)(expr)
+
 // data for fetch_func and pileup_func
 typedef struct
 {
@@ -79,7 +81,8 @@ static int fetch_func1(const bam1_t* b, void* data)
             overlap = tmp->end - Rstart + 1;
         }
         if (overlap >= tmp->min_overlap) {
-            char rd[wSize + 1];  // store blank read segment of 'N's
+            //char rd[wSize + 1]; store blank read segment of 'N's
+            char *rd = new char[wSize + 1];
             for (int i = 0; i < wSize; i++) {
                 rd[i] = 'N';
             }
@@ -93,6 +96,7 @@ static int fetch_func1(const bam1_t* b, void* data)
                 << '>' << bam1_qname(b) << ' ' << Rstart << '\n';
             tmp->outFile  // write read
                 << rd << "\n";
+            delete [] rd;
         }
     }
     return 0;
@@ -102,6 +106,8 @@ static int fetch_func1(const bam1_t* b, void* data)
 static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t* pl, void* data)
 {
     tmpstruct_t* tmp = (tmpstruct_t*)data;
+    UNUSED(tid);
+    UNUSED(n);
     int i = pos - tmp->beg;                    // base position within window
     char* mm = (char*)tmp->rd + i;             // location of base
     if (pos >= tmp->beg && pos <= tmp->end) {  // make sure read position within window
@@ -199,6 +205,7 @@ int main(int argc, char* argv[])
     tmp.fai = fai_load(argv[optind + 1]);
 
     int iBuild = bam_index_build(argv[optind]);  // generate bam index
+    UNUSED(iBuild);
     bam_index_t* idx;
     idx = bam_index_load(argv[optind]);  // load bam index
     if (idx == 0) {
diff --git a/src/shorah/data_structures.hpp b/src/cpp/data_structures.hpp
similarity index 100%
rename from src/shorah/data_structures.hpp
rename to src/cpp/data_structures.hpp
diff --git a/src/shorah/dpm_sampler.cpp b/src/cpp/dpm_sampler.cpp
similarity index 99%
rename from src/shorah/dpm_sampler.cpp
rename to src/cpp/dpm_sampler.cpp
index f6e72cc..71be74d 100644
--- a/src/shorah/dpm_sampler.cpp
+++ b/src/cpp/dpm_sampler.cpp
@@ -810,7 +810,7 @@ void build_assignment(std::ofstream& out_file)
 
         if (cn->next->size == 0) {
 #ifndef NDEBUG
-            printf("removing some node... p=%p\n", cn->next);
+            printf("removing some node... p=%p\n", (void *)cn->next);
 #endif
             remove_comp(&cn->next);
         } else {
@@ -1300,7 +1300,7 @@ ssret* sample_class(unsigned int i, unsigned int step)
     temp = new int[J / 10 + 1];
 #ifndef NDEBUG
     for (removed = 0; removed < q; removed++)
-        printf("c_ptr[%d]=%p\n", removed, c_ptr[removed]);
+        printf("c_ptr[%d]=%p\n", removed, (void *)c_ptr[removed]);
     removed = 0;
     printf("---------------------------------------------------\n");
     printf("-----------> sampling class for %ith read <----------\n", i);
@@ -1496,7 +1496,7 @@ ssret* sample_class(unsigned int i, unsigned int step)
 
 #ifndef NDEBUG
     for (j = 0; j <= st; j++)
-        printf("with P[%i] = %e to class %p\n", j, P[j], cl_ptr[j]);
+        printf("with P[%i] = %e to class %p\n", j, P[j], (void *)cl_ptr[j]);
 #endif
 
     g = gsl_ran_discrete_preproc(st + 1, P);
@@ -1518,7 +1518,7 @@ ssret* sample_class(unsigned int i, unsigned int step)
 
         if (from_class == to_class) {
 #ifndef NDEBUG
-            printf("from %p to itself\n", from_class);
+            printf("from %p to itself\n", (void *)from_class);
 #endif
 
             //      free(P);
@@ -1532,7 +1532,7 @@ ssret* sample_class(unsigned int i, unsigned int step)
 
         else if (to_class != NULL) {
 #ifndef NDEBUG
-            printf("moving the read from %p to %p\n", from_class, to_class);
+            printf("moving the read from %p to %p\n", (void *)from_class, (void *)to_class);
 #endif
 
             c_ptr[i] =
@@ -1554,7 +1554,7 @@ ssret* sample_class(unsigned int i, unsigned int step)
 
         else if (to_class == NULL) {
 #ifndef NDEBUG
-            printf("moving %i to a new class from %p\n", i, from_class);
+            printf("moving %i to a new class from %p\n", i, (void *)from_class);
 #endif
 
             remove_read(search_read(&from_class->rlist, i));
diff --git a/src/shorah/dpm_sampler.hpp b/src/cpp/dpm_sampler.hpp
similarity index 100%
rename from src/shorah/dpm_sampler.hpp
rename to src/cpp/dpm_sampler.hpp
diff --git a/src/shorah/fil.cpp b/src/cpp/fil.cpp
similarity index 100%
rename from src/shorah/fil.cpp
rename to src/cpp/fil.cpp
diff --git a/src/cpp/meson.build b/src/cpp/meson.build
new file mode 100644
index 0000000..5295438
--- /dev/null
+++ b/src/cpp/meson.build
@@ -0,0 +1,44 @@
+message('Is popcnt available?')
+code = '''
+  #include <nmmintrin.h>
+  int main() { return _mm_popcnt_u64(0); }'''
+cpp = meson.get_compiler('cpp')
+success = cpp.links(code, args : '-mpopcnt', name : 'pocnt')
+if success
+    message('popcnt intrinsics are available')
+    extra_cpp_args = ['-mpopcnt']
+else
+    message('popcnt is broken')
+    extra_cpp_args = []
+endif
+
+gsl_dep = dependency('gsl')
+thread_dep = dependency('threads')
+
+dpm_exe = executable(
+  'diri_sampler', files([
+    'data_structures.hpp',
+    'dpm_sampler.hpp',
+    'dpm_sampler.cpp']),
+  dependencies : [ gsl_dep, thread_dep ],
+  include_directories : bam_inc,
+  cpp_args : extra_cpp_args,
+  install : true)
+
+b2w_exe = executable(
+  'b2w', files([
+    'b2w.cpp']),
+  dependencies : thread_dep,
+  include_directories : bam_inc,
+  cpp_args : extra_cpp_args,
+  link_with : bam_library,
+  install : true)
+
+fil_exe = executable(
+  'fil', files([
+    'fil.cpp']),
+  dependencies : [ gsl_dep, thread_dep ],
+  include_directories : bam_inc,
+  cpp_args : extra_cpp_args,
+  link_with : bam_library,
+  install : true)
diff --git a/src/meson.build b/src/meson.build
new file mode 100644
index 0000000..b390df9
--- /dev/null
+++ b/src/meson.build
@@ -0,0 +1,2 @@
+subdir('samtools')
+subdir('cpp')
diff --git a/src/perl/DNA.pm b/src/perl/DNA.pm
deleted file mode 100755
index 0ff403a..0000000
--- a/src/perl/DNA.pm
+++ /dev/null
@@ -1,761 +0,0 @@
-# DNA.pm - Utilities
-
-# Copyright 2007, 2008, 2009
-# Niko Beerenwinkel,
-# Nicholas Eriksson,
-# Moritz Gerstung,
-# Lukas Geyrhofer,
-# Osvaldo Zagordi,
-# ETH Zurich
-
-# This file is part of ShoRAH.
-# ShoRAH is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# ShoRAH is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-
-package DNA;
-
-use strict;
-use NikoUtil;
-use Util;
-use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
-use Exporter;
-
-$VERSION = 1.00;
-@ISA = qw(Exporter);
-
-# autoexport:
-@EXPORT = qw(
-	translate
-	makeCodons
-	reverseCodons
-	revComp
-	NW SW
-	generalizedSW
-	blastAlign
-	correctGaps
-	correctGaps2
-	ambigString
-	);
-# export on request:
-@EXPORT_OK = qw();
-
-sub translate {
-	# input:
-	# dna string
-	# codon hash
-	# optional: if ambiguous, should we output just one aa or
-	# a whole block [CKEIF] ??
-
-	my $dna = shift;
-	my $codon = shift;
-	my $JUSTONEAA = 0;
-	if (@_ > 0) {
-		$JUSTONEAA = shift;
-	}
-	my $pro = '';
-	my $cod;
-	my $choices = 1;
-	my $ambig = 0;
-
-	for(my $i=0; $i < length($dna); $i+=3) {
-		$cod = substr($dna,$i,3);
-		if (length($cod) == 3) {
-			if (defined $codon->{$cod}) {
-				$pro .= $codon->{$cod};
-			} else {
-				my $l = resolveCodon($cod);
-				my %poss;
-				foreach (@$l) {
-					if (defined ($codon->{$_})) {
-						$poss{$codon->{$_}} = 1;
-					} else {
-						$poss{' '} = 1;
-					}
-				}
-				my $tmp = join('', sort(keys(%poss)));
-				if (keys(%poss) > 1) {
-						if ($JUSTONEAA) {
-								$pro .= substr($tmp,0,1);
-						} else {
-								$pro .= '[' . $tmp . ']';
-						}
-						$choices *= keys(%poss);
-						$ambig += 1;
-				} else {
-						$pro .= $tmp;
-				}
-#print "#error : don't know codon ", substr($dna,$i,3),"\n";
-			}
-		}
-	}
-	#print "$choices choices for protein\n";
-	#print "$ambig ambiguous codons\n";
-	#print "$pro\n";
-	return $pro;
-}
-
-sub resolveCodon {
-	#take codon with ambiguity codes and output list of all codons that could
-	#give it
-	my $c = shift;
-	my @l = split '', $c;
-	my @ans = ('', '', '');
-	my @codons;
-	foreach my $p (0..2) {
-		if ($l[$p] eq 'A') {
-			$ans[$p] .= 'A';
-		} elsif ($l[$p] eq 'C') {
-			$ans[$p] .= 'C';
-		} elsif ($l[$p] eq 'G') {
-			$ans[$p] .= 'G';
-		} elsif ($l[$p] eq 'T') {
-			$ans[$p] .= 'T';
-		} elsif ($l[$p] eq 'R') {
-			$ans[$p] .= 'AG';
-		} elsif ($l[$p] eq 'Y') {
-			$ans[$p] .= 'CT';
-		} elsif ($l[$p] eq 'K') {
-			$ans[$p] .= 'GT';
-		} elsif ($l[$p] eq 'M') {
-			$ans[$p] .= 'AC';
-		} elsif ($l[$p] eq 'S') {
-			$ans[$p] .= 'CG';
-		} elsif ($l[$p] eq 'W') {
-			$ans[$p] .= 'AT';
-		} elsif ($l[$p] eq 'D') {
-			$ans[$p] .= 'AGT';
-		} elsif ($l[$p] eq 'H') {
-			$ans[$p] .= 'ACT';
-		} elsif ($l[$p] eq 'B') {
-			$ans[$p] .= 'CGT';
-		} elsif ($l[$p] eq 'V') {
-			$ans[$p] .= 'ACG';
-		} elsif ($l[$p] eq 'X') {
-			$ans[$p] .= 'ACGT';
-		} elsif ($l[$p] eq 'N') {
-			$ans[$p] .= 'ACGT';
-		}
-	}
-	#print "$c -> @ans\n";
-	foreach my $i (0 .. length($ans[0])-1) {
-		foreach my $j (0 .. length($ans[1])-1) {
-			foreach my $k (0 .. length($ans[2])-1) {
-				push(@codons, substr($ans[0],$i,1) . substr($ans[1],$j,1) . substr($ans[2],$k,1));
-			}
-		}
-	}
-	#print "Codon $c -> @codons\n";
-
-	return \@codons;
-}
-
-sub makeCodons {
-	my %codon;
-	$codon{GCT} = $codon{GCC} = $codon{GCA} = $codon{GCG} = 'A';
-	$codon{TTA} = $codon{TTG} = $codon{CTT} = $codon{CTC} = $codon{CTA} = $codon{CTG} = 'L';
-	$codon{CGT} = $codon{CGC} = $codon{CGA} = $codon{CGG} = $codon{AGA} = $codon{AGG} = 'R';
-	$codon{AAA} = $codon{AAG} = 'K';
-	$codon{AAT} = $codon{AAC} = 'N';
-	$codon{ATG} = 'M';
-	$codon{GAT} = $codon{GAC} = 'D';
-	$codon{TTT} = $codon{TTC} = 'F';
-	$codon{TGT} = $codon{TGC} = 'C';
-	$codon{CCT} = $codon{CCC} = $codon{CCA} = $codon{CCG} = 'P';
-	$codon{CAA} = $codon{CAG} = 'Q';
-	$codon{TCT} = $codon{TCC} = $codon{TCA} = $codon{TCG} = $codon{AGT} = $codon{AGC} = 'S';
-	$codon{GAA} = $codon{GAG} = 'E';
-	$codon{ACT} = $codon{ACC} = $codon{ACA} = $codon{ACG} = 'T';
-	$codon{GGT} = $codon{GGC} = $codon{GGA} = $codon{GGG} = 'G';
-	$codon{TGG} = 'W';
-	$codon{CAT} = $codon{CAC} = 'H';
-	$codon{TAT} = $codon{TAC} = 'Y';
-	$codon{ATT} = $codon{ATC} = $codon{ATA} = 'I';
-	$codon{GTT} = $codon{GTC} = $codon{GTA} = $codon{GTG} = 'V';
-	#$codon{ATG} = '+';
-	$codon{TAG} = $codon{TGA} = $codon{TAA} = '*';
-	return \%codon;
-}
-sub reverseCodons {
-	my $codon = shift;
-	my %a2c;
-	#$a2c{A} = [GCT, GCC, GCA, GCG]
-	foreach my $cod (keys(%$codon)) {
-		push(@{$a2c{$$codon{$cod}}},$cod);
-	}
-	return \%a2c;
-}
-
-sub revComp {
-	my $seq = $_[0];
-	my $seqRC = "";
-	my $I;
-
-	for($I = length($seq)-1; $I >= 0; $I--) {
-		my $base = substr($seq, $I, 1);
-		if($base eq "A") { 
-			$seqRC .= "T"; 
-		} elsif($base eq "C") { 
-			$seqRC .= "G"; 
-		} elsif($base eq "G") { 
-			$seqRC .= "C"; 
-		} elsif($base eq "T") { 
-			$seqRC .= "A"; 
-		} else {
-			$seqRC .= $base;
-		}
-	}
-   return($seqRC);
-}
-
-
-sub trans {
-	my $x = shift;
-	if ($x eq 'A') {
-		return 0;
-	} elsif ($x eq 'C') {
-		return 1;
-	} elsif ($x eq 'G') {
-		return 2;
-	} else {
-		return 3;
-	}
-}
-
-sub NW {
-	# be careful if using... .fixes went into SW
-	my $a = shift;
-	my $b = shift;
-	my $d;
-	my $S;
-	if (@_ > 0) {
-		$d = shift; #linear gap
-		$S = shift; #[][] = costs
-	} else {
-		$d = -400;
-		$S = [[91,-114,-31,-123],[-114,100,-125,-31],[-31,-125,100,-114],[-123,-31,-114,91]];
-	}
-
-	my @A = split('', $a);
-	my @B = split('', $b);
-	my ($i, $j, $choice1, $choice2, $choice3);
-	my $AlignmentA;
-	my $AlignmentB;
-	my @F;
-
-	foreach $i (0..@A) {
-		$F[$i][0] = $d * $i;
-	}
-	foreach $j (0..@B) {
-		$F[0][$j] = $d * $j;
-	}
-
-	foreach $i (1..@A) {
-		foreach $j (1..@B) {
-			$choice1 = $F[$i-1][$j-1] + $S->[trans($A[$i-1])][trans($B[$j-1])];
-			$choice2 = $F[$i-1][$j] + $d;
-			$choice3 = $F[$i][$j-1] + $d;
-			$F[$i][$j] = max($choice1,$choice2,$choice3);
-		}
-	}
-
-	#foreach $i (0..@A) {
-	#	foreach $j (0..@B) {
-	#		print "$F[$i][$j] ";
-	#	}
-	#	print "\n";
-	#}
-
-	$AlignmentA = '';
-	$AlignmentB = '';
-	$i = @A;
-	$j = @B;
-
-	while ($i > 0 and  $j > 0) {
-		my $Score = $F[$i][$j];
-		my $ScoreDiag = $F[$i - 1][$j - 1];
-		my $ScoreUp = $F[$i][$j - 1];
-		my $ScoreLeft = $F[$i - 1][$j];
-		my $Ai = trans($A[$i-1]);
-		my $Bj = trans($B[$j-1]);
-
-		if ($Score == $ScoreDiag + $S->[$Ai][$Bj]) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$i--;
-			$j--;
-		} elsif ($Score == $ScoreLeft + $d) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = '-' . $AlignmentB;
-			$i--;
-		} else { # (Score == ScoreUp + d)
-			$AlignmentA = '-' . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$j--;
-		}
-	}
-	while ($i > 0) {
-		$AlignmentA = $A[$i-1] . $AlignmentA;
-		$AlignmentB = '-' . $AlignmentB;
-		$i--;
-	}
-	while ($j > 0) {
-		$AlignmentA = '-' . $AlignmentA;
-		$AlignmentB = $B[$j-1] . $AlignmentB;
-		$j--;
-	}
-	return ($AlignmentA,$AlignmentB);
-}
-
-
-sub SW {
-
-	#Smith Waterman local alignment
-	# assume that first sequence is the short one
-	my $a = shift;
-	my $b = shift;
-	my $d;
-	my $S;
-	if (@_ > 0) {
-		$d = shift; #linear gap
-		$S = shift; #[][] = costs
-	} else {
-		#$d = -100;
-		#$S = [[91,-114,-31,-123],[-114,100,-125,-31],[-31,-125,100,-114],[-123,-31,-114,91]];
-		$d = -4;
-		$S = [[4,-4,-4,-4],[-4,4,-4,-4],[-4,-4,4,-4],[-4,-4,-4,4]];
-	}
-	my $DEBUG = 0;
-	if (@_ > 0) {
-		$DEBUG = shift;
-	}
-
-
-	my @A = split('', $a);
-	my @B = split('', $b);
-	my @tA = map{trans($_)} @A;
-	my @tB = map{trans($_)} @B;
-	#my @tA = map {trans($_)} split ('', $a);
-	#my @tB = map {trans($_)} split ('', $b);
-	my ($i, $j);
-	my $AlignmentA;
-	my $AlignmentB;
-	my @F;
-	my ($largestF, $largesti,$largestj);
-	$largestF = 1;
-
-	foreach $i (0..@A) {
-		$F[$i][0] = 0;
-	}
-	foreach $j (0..@B) {
-		$F[0][$j] = 0;
-	}
-
-
-	my ($c1, $c2, $c3, $m);
-
-	foreach $i (1..@A) {
-		foreach $j (1..@B) {
-			$c1 = $F[$i-1][$j-1] + $S->[$tA[$i-1]][$tB[$j-1]];
-			$c2 = $F[$i-1][$j] + $d;
-			$c3 = $F[$i][$j-1] + $d;
-			$m = $c1 > $c2 ? ($c1 > $c3 ? $c1 : $c3) : ($c2 > $c3 ? $c2 : $c3);
-			$F[$i][$j] = $m > 0 ? $m : 0;
-			#$F[$i][$j] = max($choice1,$choice2,$choice3,0);
-
-			#$F[$i][$j] = max($F[$i-1][$j-1] + $S->[$tA[$i-1]][$tB[$j-1]],
-			#	$F[$i-1][$j] + $d, $F[$i][$j-1] + $d, 0);
-
-			if ($F[$i][$j] >= $largestF) {
-				#print "max at $i $j -- $F[$i][$j]\n";
-				$largestF = $F[$i][$j];
-				$largesti = $i;
-				$largestj = $j;
-			}
-		}
-	}
-	if ($DEBUG) {
-		foreach my $row (@F) {
-			print join("\t", @$row), "\n";
-		}
-	}
-
-
-	#now trace back from the largest value in the matrix and build up the alignment
-
-	$AlignmentA = '';
-	$AlignmentB = '';
-	$i = $largesti;
-	$j = $largestj;
-	#print "Largest F at $i, $j\n";
-	#
-	#	my $gapend = (length($a) - $largesti);
-	#	if ($gapend > 0) {
-	#		print "adding $gapend gaps at end\n";
-	#		$AlignmentA = 'x' x $gapend;
-	#		#foreach my $x ($j .. ($j + $gapend - 1)) {
-	#		#	print "$x $B[$x]\n";
-	#		#}
-	#		#$AlignmentB = lc(join('',@B[$j .. ($j + $gapend-1)]));
-	#		$AlignmentB = 'x' x $gapend;
-	#	}
-	#
-	while ($F[$i][$j] > 0) {
-		if ($DEBUG) {
-			print "$F[$i-1][$j-1] $F[$i-1][$j]\n$F[$i][$j-1] $F[$i][$j]\n";
-		}
-		if ($F[$i][$j] == $F[$i-1][$j-1] + $S->[$tA[$i-1]][$tB[$j-1]]) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$i--; $j--;
-		} elsif ($F[$i][$j] == $F[$i-1][$j] + $d) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = '-' . $AlignmentB;
-			$i--;
-		} else {
-			$AlignmentA = '-' . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$j--;
-		}
-		if ($DEBUG) {
-			print "$AlignmentA\n$AlignmentB\n";
-			print '-' x 20, "\n";
-		}
-	}
-
-	if (0) {
-
-	while ($F[$i][$j] > 0) {
-		if ($F[$i][$j] == $F[$i-1][$j] + $d) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = '-' . $AlignmentB;
-			$i--;
-		} elsif ($F[$i][$j] == $F[$i][$j-1] + $d) {
-			$AlignmentA = '-' . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$j--;
-		} elsif ($F[$i][$j] == $F[$i-1][$j-1] + $S->[$tA[$i-1]][$tB[$j-1]]) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$i--; $j--;
-		} 
-		print "$AlignmentA\n$AlignmentB\n";
-		print '-' x 20, "\n";
-	}
-}
-	#while ($i > 0) {
-	#	$AlignmentA = '-' . $AlignmentA;
-	#	$AlignmentB = $B[$i] . $AlignmentB;
-	#	#$AlignmentB = 'x' . $AlignmentB;
-	#	$i--;
-	#}
-
-	return ($AlignmentA,$AlignmentB, $j);
-}
-
-sub generalizedSW {
-
-	#Smith Waterman local alignment
-	#align a sequence to a quasi-sequence:
-	#
-	# so $a = 'AACCTT'
-	# and $b = [ {A => .5, C => .5}, ... ]
-	# assume that first sequence is the short one
-	# b is a list of lists, actually, in order ACGT
-	# b = [ [.5, .5, 0, 0], ... ]
-	my $a = shift;
-	my $b = shift;
-	# ltrIdx is hash A => 0, ... T => 3
-	# alphabet is list [A C G T]
-	my $LtrIdx = shift;
-	my $alphabet = shift;
-	my $d;
-	my $S;
-	if (@_ > 0) {
-		$d = shift; #linear gap
-		$S = shift; #[][] = costs
-	} else {
-		#$d = -100;
-		#$S = [[91,-114,-31,-123],[-114,100,-125,-31],[-31,-125,100,-114],[-123,-31,-114,91]];
-		$d = -4;
-		#$S = [[4,-8,-8,-8],[-8,4,-8,-8],[-8,-8,4,-8],[-8,-8,-8,4]];
-		$S = [[4,-4,-4,-4],[-4,4,-4,-4],[-4,-4,4,-4],[-4,-4,-4,4]];
-	}
-	my $DEBUG = 0;
-	if (@_ > 0) {
-		$DEBUG = shift;
-	}
-
-
-	if ($DEBUG) {
-		print "Aligning\n$a\n";
-		print "[@$_], " foreach (@$b);
-		print "\n";
-	}
-	my @A = split('', $a);
-	my @tA = map{$LtrIdx->{$_}} @A;
-
-	my $numB = @$b;
-	#my @B = split('', $b);
-	#my @tB = map{trans($_)} @B;
-
-	my @B = map {$alphabet->[maxind($_)]} @$b;
-
-	my ($i, $j);
-	my $AlignmentA;
-	my $AlignmentB;
-	my @F;
-	my ($largestF, $largesti,$largestj);
-	$largestF = 1;
-
-	foreach $i (0..@A) {
-		$F[$i][0] = 0;
-	}
-	foreach $j (0..$numB) {
-		$F[0][$j] = 0;
-	}
-
-
-	my ($c1, $c2, $c3, $m);
-
-	foreach $i (1..@A) {
-		foreach $j (1..$numB) {
-			#$c1 = $F[$i-1][$j-1] + $S->[$tA[$i-1]][$tB[$j-1]];
-			#$c1 = F[i-1][j-1] + sum_alphabet pr(b_j-1 = K)*S->[a_i-1][K]
-			$c1 = $F[$i-1][$j-1] + score($tA[$i-1], $b->[$j-1], $S);
-
-			$c2 = $F[$i-1][$j] + $d;
-			$c3 = $F[$i][$j-1] + $d;
-			#print "$i $j = max($c1 $c2 $c3)\n";
-			$m = $c1 > $c2 ? ($c1 > $c3 ? $c1 : $c3) : ($c2 > $c3 ? $c2 : $c3);
-			$F[$i][$j] = $m > 0 ? $m : 0;
-			#$F[$i][$j] = max($choice1,$choice2,$choice3,0);
-
-			#$F[$i][$j] = max($F[$i-1][$j-1] + $S->[$tA[$i-1]][$tB[$j-1]],
-			#	$F[$i-1][$j] + $d, $F[$i][$j-1] + $d, 0);
-
-			if ($F[$i][$j] >= $largestF) {
-				#print "max at $i $j -- $F[$i][$j]\n";
-				$largestF = $F[$i][$j];
-				$largesti = $i;
-				$largestj = $j;
-			}
-		}
-	}
-	if ($DEBUG) {
-		foreach my $row (@F) {
-			print join("\t", @$row), "\n";
-		}
-	}
-
-
-	#now trace back from the largest value in the matrix and build up the alignment
-
-	$AlignmentA = '';
-	$AlignmentB = '';
-	$i = $largesti;
-	$j = $largestj;
-	$i = @A;
-	$j = $numB;
-	#print "Largest F at $i, $j\n";
-	#
-	#	my $gapend = (length($a) - $largesti);
-	#	if ($gapend > 0) {
-	#		print "adding $gapend gaps at end\n";
-	#		$AlignmentA = 'x' x $gapend;
-	#		#foreach my $x ($j .. ($j + $gapend - 1)) {
-	#		#	print "$x $B[$x]\n";
-	#		#}
-	#		#$AlignmentB = lc(join('',@B[$j .. ($j + $gapend-1)]));
-	#		$AlignmentB = 'x' x $gapend;
-	#	}
-	#
-	while ($F[$i][$j] > 0) {
-		if ($DEBUG) {
-			print "$F[$i-1][$j-1] $F[$i-1][$j]\n$F[$i][$j-1] $F[$i][$j]\n";
-		}
-		if ($F[$i][$j] == $F[$i-1][$j-1] + score($tA[$i-1], $b->[$j-1], $S)) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$i--; $j--;
-		} elsif ($F[$i][$j] == $F[$i-1][$j] + $d) {
-			$AlignmentA = $A[$i-1] . $AlignmentA;
-			$AlignmentB = '-' . $AlignmentB;
-			$i--;
-		} else {
-			$AlignmentA = '-' . $AlignmentA;
-			$AlignmentB = $B[$j-1] . $AlignmentB;
-			$j--;
-		}
-		if ($DEBUG) {
-			print "$AlignmentA\n$AlignmentB\n";
-			print '-' x 20, "\n";
-		}
-	}
-
-	return ($AlignmentA,$AlignmentB, $j);
-}
-
-sub blastAlign {
-#s2 should be a subsequence of s1
-	my $s1 = shift;
-	my $s2 = shift;
-	system("echo $s1 > /tmp/seq1");
-	system("echo $s2 > /tmp/seq2");
-	system("bl2seq -i /tmp/seq1 -j /tmp/seq2 -p blastn |grep -A 2 Query:");
-}
-
-sub score {
-	my $a = shift;
-	my $b = shift;
-	my $S = shift;
-	my $ans = 0;
-	foreach my $baseInd (0 .. @$b-1) {
-		$ans += $S->[$a][$baseInd] * $b->[$baseInd];
-	}
-	#print "Score $a, [@$b] = $ans\n";
-	return $ans;
-}
-
-sub correctGaps {
-	my $reads = shift;
-
-	my @AAlist;
-	foreach my $rd (@$reads) {
-		$rd->{seq} =~ s/N/-/g;
-		foreach my $pos ($$rd{st} .. $$rd{end}-1) {
-			${$AAlist[$pos]}{substr($$rd{seq},$pos - $$rd{st}, 1)}++;
-		}
-	}
-	my @corInd;
-	my @cons;
-
-	foreach my $i (0..@AAlist-1) {
-		#my @chars = keys(%{$AAlist[$i]});
-		if ((defined ${$AAlist[$i]}{'-'}) and (keys(%{$AAlist[$i]}) == 2)) {
-			#foreach (sort{$AAlist[$i]{$b} <=> $AAlist[$i]{$a}}keys(%{$AAlist[$i]})) {
-			delete ${$AAlist[$i]}{'-'};
-			push(@corInd, $i);
-			#my @c = keys(%{$AAlist[$i]});
-			my @c = sort {$AAlist[$i]{$b} <=> $AAlist[$i]{$a}} keys(%{$AAlist[$i]});
-			push(@cons, $c[0]);
-		}
-	}
-
-	foreach my $r (@$reads) {
-		foreach my $i (0 .. @corInd-1) {	
-			#if r->st <= corInd[i]  <= r->end
-			#and r->seq . substr(i-start) == '-'
-			#correct to cons
-			if (($r->{st} <= $corInd[$i]) and ($r->{end} >= $corInd[$i])) {
-				if (substr($r->{seq},$corInd[$i] - $r->{st},1) eq '-') {
-					substr($r->{seq},$corInd[$i] - $r->{st},1)  = $cons[$i];
-				}
-			}
-		}
-	}
-}
-
-
-
-sub correctGaps2 {
-	#take a set of reads (given as two lists, @head and @seq)
-	#and correct all gaps for which there is only one choice
-	my $head = shift;
-	my $seq = shift;
-	my $starts;
-	my $ends;
-	#fix starts and ends
-	my $n = @$head;
-	my $num = 0;
-	foreach my $i (0 .. $n-1) {
-		my ($left, $right) = split /\$/, $head->[$i];
-		my ($id, $start, undef) = split / /, trim($left);
-		my $len = length($seq->[$i]);
-		push(@$starts, $start);
-		push(@$ends, $start + $len);
-	}
-
-	my @AAlist;
-	foreach my $rd (0 .. $n-1) {
-		$seq->[$rd] =~ s/N/-/g;
-		#if ($seq->[$rd] =~ / /) {
-		#	print "SPACE: read $rd == .$seq->[$rd].\n";
-		#}
-		#print "read $rd starts at $starts->[$rd] ends $ends->[$rd]\n";
-		#print "length ", length($seq->[$rd]), "\n";
-		foreach my $pos ($starts->[$rd] .. $ends->[$rd] - 1) {
-			${$AAlist[$pos]}{substr($seq->[$rd],$pos - $starts->[$rd], 1)}++;
-		}
-	}
-	my @corInd;
-	my @cons;
-
-	foreach my $i (0..@AAlist-1) {
-		#my @chars = keys(%{$AAlist[$i]});
-		#print "pos $i, keys = [", join(',', keys(%{$AAlist[$i]})), "]\n";
-		if ((defined ${$AAlist[$i]}{'-'}) and (keys(%{$AAlist[$i]}) == 2)) {
-			#foreach (sort{$AAlist[$i]{$b} <=> $AAlist[$i]{$a}}keys(%{$AAlist[$i]})) 
-			delete ${$AAlist[$i]}{'-'};
-			push(@corInd, $i);
-			my @c = sort {$AAlist[$i]{$b} <=> $AAlist[$i]{$a}} keys(%{$AAlist[$i]});
-			push(@cons, $c[0]);
-		}
-	}
-	#print "correcting at @corInd\n";
-
-	foreach my $r (0 .. $n-1) {
-		foreach my $i (0 .. @corInd-1) {	
-			#if r->st <= corInd[i]  <= r->end
-			#and r->seq . substr(i-start) == '-'
-			#correct to cons
-			if (($starts->[$r] <= $corInd[$i]) and ($ends->[$r] >= $corInd[$i])) {
-				if (substr($seq->[$r],$corInd[$i] - $starts->[$r],1) eq '-') {
-					substr($seq->[$r],$corInd[$i] - $starts->[$r],1)  = $cons[$i];
-					#print "Correct at $corInd[$i]\n";
-					$num++;
-				}
-			}
-		}
-	}
-	return $num;
-}
-
-sub ambigString {
-	#input: $a = asdf[123]
-	#output  @b = ({a=>1} {s=>1} {d=>1} {f=>1} {1=>1, 2=>1, 3=>1})
-	#
-	my $a = shift;
-	my @b;
-
-	my $i = 0;
-
-	while ($i < length($a)) {
-		my $chunk = '';
-		if (substr($a,$i,1) ne '[') {
-			$chunk = substr($a,$i,1);
-		} else {
-			# find end of [...] block
-			while (substr($a,$i,1) ne ']') {
-				$chunk .= substr($a,$i,1);
-				$i++;
-			}
-		}
-		$i++;
-		my %tmp = map {$_ => 1.0/length($chunk)} split '', $chunk;
-		push(@b, \%tmp);
-	}
-	return \@b;
-}
-
-
-1;
diff --git a/src/perl/NikoUtil.pm b/src/perl/NikoUtil.pm
deleted file mode 100755
index 665d2f3..0000000
--- a/src/perl/NikoUtil.pm
+++ /dev/null
@@ -1,473 +0,0 @@
-#
-# NikoUtil.pm - Utilities
-# Copyright 2007, 2008, 2009
-# Niko Beerenwinkel,
-# Nicholas Eriksson,
-# Moritz Gerstung,
-# Lukas Geyrhofer,
-# Osvaldo Zagordi,
-# ETH Zurich
-
-# This file is part of ShoRAH.
-# ShoRAH is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# ShoRAH is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-
-package NikoUtil;
-
-use strict;
-use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
-use Exporter;
-
-$VERSION = 1.00;
-@ISA = qw(Exporter);
-
-# autoexport:
-@EXPORT = qw(
-		trim
-		is_numeric
-		round
-		intrand  discr_rand
-		concat_list
-		read_fasta  read_fastas  print_fasta
-		print_matrix  transpose  exp_matrix
-		where_clause_list
-		hamming
-		hamming_distance  alphabet
-		makeDistMatrix
-		random
-		Nmin Nmax
-);
-
-# export on request:
-@EXPORT_OK = qw();
-
-sub concat_list {  # concatenate lists
-
-	my @lists = @_;  # list of refs. to lists to be concatenated
-
-# concatenate all lists
-	my @all; 
-	for (@lists) {
-		push @all, @{$_};
-	}
-
-# count occurences in concatenation
-	my (@union, %count);
-	for (@all) {
-		$count{"@{$_}"}++;
-		push @union, $_ unless ($count{"@{$_}"} > 1);
-	}
-
-	return \@union;  # multiple occurences appear only once
-}
-
-
-
-sub is_numeric {  # Cookbook p. 44
-	my $val = shift || '';  # return FALSE if $val is FALSE
-
-	return ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/);
-}
-
-
-
-sub round {  # round to nearest integer
-
-	my @input = @_;
-
-	for (@input) {
-		$_ = int($_ + 0.5);
-	}
-
-	return wantarray ? @input : $input[0];
-}
-
-
-
-sub intrand {
-
-# random integer number between and including $a and $b
-
-	my $a = shift;
-	my $b = shift;
-
-	return int($a) + int(rand($b-$a+1));
-}
-
-
-
-sub discr_rand {
-
-# random integers in given proportion
-
-	my @proportion = @_;
-	my $n = scalar(@proportion);
-
-	my @cum;
-	my $i;
-
-	my $sum = 0.0;
-	for $i (0 .. $n - 1) {
-		$sum += $proportion[$i];
-	}
-	$cum[0] = ($proportion[0] / $sum);
-	for $i (1 .. $n - 1) {
-		$cum[$i] = $cum[$i-1] + ($proportion[$i] / $sum);
-	}
-
-	my $r = rand();
-#print "@cum  --->  $r\n";
-
-	$i = 0;
-	while (($i < $n-1) && ($cum[$i] < $r)) {
-		$i++;
-	}
-
-	return $i;
-}
-
-
-
-sub trim {  # remove white spaces to the left and right
-
-	my @input = @_;
-
-	for (@input) {
-		s/^\s+//;
-		s/\s+$//;
-	}
-
-	return wantarray ? @input : $input[0];
-}
-
-
-
-sub read_fasta {
-
-	my $filename = shift;
-
-	my $sep = chr(13);  # CR
-
-	open FASTA, $filename 
-		or die "Can't open fasta file $filename!\n";
-	my @lines = <FASTA>;
-	close FASTA;
-
-	for my $i (0 .. $#lines) {
-		splice (@lines, $i, 1, (split /$sep/, $lines[$i]));
-	}
-
-	return (substr($lines[0], 1), join('', @lines[1..$#lines]));
-}
-
-
-
-sub read_fastas {
-
-	my $filename = shift;
-
-	my (@header, @seq);
-	my $i = -1;
-
-	open FASTA, $filename 
-		or die "Can't open fasta file -- $filename!\n";
-
-	while (my $line = <FASTA>) {
-		next if ($line =~ /^#/);
-		if (substr($line, 0, 1) eq ">") {  # header
-			$i++;
-			$header[$i] = trim(substr($line, 1));
-		}
-		else {  # sequence
-			die "Error - fasta file without header lines @ $line\n" if ($i == -1);
-			$seq[$i] .= trim($line);
-		}
-	}
-
-	close FASTA;
-
-	return (\@header, \@seq);
-
-}
-
-
-
-sub print_fasta {
-
-	my $head = shift;
-	my $seq  = shift;
-
-	my $width = 60;
-
-	my @seq = split //, $seq;
-
-	my $fasta = ">$head\n";
-	while (scalar(@seq) > 0) {
-		$fasta .= join('', splice(@seq, 0, $width)) . "\n";
-	}
-
-#return $fasta . "\n";
-	return $fasta;
-}
-
-
-
-sub print_matrix {
-
-# print matrix (tab- and newline-separated)
-
-	my @mat       = @{shift @_};    # data im $mat[][]
-	my $row_label = shift;          # row labels == first column to print
-	my $col_label = shift;          # column labels == first row to print
-	my $mat_label = shift || "\\";  # upper leftmost cell
-
-	print $mat_label, "\t" if ($row_label);
-	if ($col_label) {
-		print join("\t", @$col_label), "\n";
-	}
-
-	for my $i (0 .. $#mat) {
-		print $row_label->[$i], "\t" if ($row_label);
-		for my $j (0 .. $#{$mat[$i]}) {
-			if (defined($mat[$i][$j])) {
-				print $mat[$i][$j], "\t";
-			}
-			else {
-				print "undef\t";
-			}
-		}
-		print "\n";
-	}
-}
-
-
-
-sub transpose {
-
-# transpose matrix
-
-	my @M = @{shift @_};
-
-	my @Mt;
-	for my $i (0 .. $#M) {
-		for my $j (0 .. $#{$M[0]}) {
-			$Mt[$j][$i] = $M[$i][$j];
-		}
-	}
-
-	return \@Mt;
-}
-
-
-
-sub exp_matrix {
-
-	my $mat  = shift;
-	my $base = shift || 10;
-
-	my $emat;
-
-	for my $i (0 .. scalar(@$mat)-1) {
-		for my $j (0 .. scalar(@{$mat->[0]})-1) {
-			$emat->[$i][$j] = $base ** $mat->[$i][$j];
-		}
-	}
-
-	return $emat;
-}
-
-
-
-sub where_clause_list {
-
-	my $field  = shift;
-	my @values = @_;
-
-	my @cond = ();
-
-	for (@values) {
-		if (is_numeric($_)) {
-			push @cond, sprintf "%s=%d", $field, $_;
-		}
-		else {
-			push @cond, sprintf "%s='%s'", $field, $_;
-		}
-	}
-
-	return join(' or ', @cond);
-}
-
-
-
-
-sub Nmax {
-
-# Maximum of a vector containing undef's
-
-	my @input = @_;
-
-	my ($max, $argmax);
-
-	for my $i (0 .. $#input) {
-		if (defined($input[$i])) {
-			if (! defined($argmax)) {
-				$max = $input[$i];
-				$argmax = $i;
-			}
-			elsif ($input[$i] > $max) {  # same action, but avoids warning
-				$max = $input[$i];
-				$argmax = $i;
-			}
-		}
-	}
-
-	return wantarray ? ($max, $argmax) : $max;
-}
-
-
-sub Nmin {
-
-# Minimum of a vector containing undef's
-
-	my @input = @_;
-
-	my ($min, $argmin);
-
-	for my $i (0 .. $#input) {
-		if (defined($input[$i])) {
-			if (! defined($argmin)) {
-				$min = $input[$i];
-				$argmin = $i;
-			}
-			elsif ($input[$i] < $min) {  # same action, but avoids warning
-				$min = $input[$i];
-				$argmin = $i;
-			}
-		}
-	}
-	return wantarray ? ($min, $argmin) : $min;
-}
-
-sub hamming_distance {
-
-	my $X = uc(shift);
-	my $Y = uc(shift);
-
-	my $min;
-	if (length($X) > length($Y)) {
-		$min = length($Y);
-	} else {
-		$min = length($X);
-	}
-
-	my $comparisons = 0;
-	my $dist = 0;
-	my ($x, $y);
-	for my $pos (0 .. $min - 1) {
-		$x = substr($X,$pos,1);
-		$y = substr($Y,$pos,1);
-		if (($x ne '.') && ($y ne '.')) {
-			$comparisons++;
-			if ($x ne $y) {
-				$dist++;
-			}
-		}
-	}
-
-	#die "Incomparable strings:\n$X\n$Y\n" if ($comparisons == 0);
-	if ($comparisons == 0) {
-		#print "Error: incomparable strings:\n$X\n$Y\n";
-		return 100;
-	}
-
-	return ($dist / $comparisons);
-}
-
-sub makeDistMatrix  {
-	my $seqs = shift;
-	my @ans;
-	my $N = @$seqs;
-	foreach my $i (0..$N-1) {
-		foreach my $j (0..$N-1) {
-			$ans[$i][$j] = hamming($$seqs[$i],$$seqs[$j]);
-		}
-		#print "$i\n" if ($i % 10 == 0);
-	}
-	#print "distance matrix:\n";
-	#print_matrix(\@ans);
-	#print "\n\n";
-	return \@ans;
-}
-
-sub hamming {
-	#hamming distance, absolute, not caring about '.'
-	my $s = shift;
-	my $t = shift;
-	my $diff = 0;
-	# compute numer of differences between $s and $t
-	#die "ERROR in hamming $s\n$t\n" if (length($s) != length($t));
-	my $len = Nmin(length($s), length($t));
-	foreach my $i (0.. $len-1) {
-		if (!(substr($s,$i,1) eq substr($t,$i,1))) {
-			$diff++;
-		}
-	}
-	return $diff;
-}
-
-
-
-sub alphabet {
-
-	my $type = uc(shift);
-
-	my (@Alphabet, @Special);
-
-	if ($type eq 'AA') {
-		@Alphabet = qw(A C D E F G H I K L M N P Q R S T V W Y);
-		@Special = ("-", ".", "*");
-	}
-	elsif ($type eq 'NT') {
-		@Alphabet = qw(A C G T);
-		@Special = ("-", ".", "N");
-	}
-	elsif ($type eq '01') {
-		@Alphabet = qw(0 1);
-		@Special = ("-", ".");
-	}
-	else {
-		die "Unknown alphabet -- $type!\n";
-	}
-
-	# indices:
-	my %LtrIdx;
-	for my $k (0 .. $#Special) {
-		$LtrIdx{$Special[$k]} = -$k - 1;
-	}
-	for my $k (0 .. $#Alphabet) {
-		$LtrIdx{$Alphabet[$k]} = $k;
-	}
-
-	return (\@Alphabet, \@Special, \%LtrIdx);
-}
-
-sub random {  # generate an integer random number                                                    
-
-	my $x = shift;  # between $x
-	my $y = shift;  # and $y (including both)
-
-	return int(rand($y - $x + 1)) + $x;
-}
-
-
-1;
diff --git a/src/perl/ReadUtil.pm b/src/perl/ReadUtil.pm
deleted file mode 100755
index 644db85..0000000
--- a/src/perl/ReadUtil.pm
+++ /dev/null
@@ -1,219 +0,0 @@
-# utilities for reading files of reads
-#
-# Copyright 2007, 2008, 2009
-# Niko Beerenwinkel,
-# Nicholas Eriksson,
-# Moritz Gerstung,
-# Lukas Geyrhofer,
-# Osvaldo Zagordi,
-# ETH Zurich
-
-# This file is part of ShoRAH.
-# ShoRAH is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# ShoRAH is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-
-package ReadUtil;
-
-use strict;
-use NikoUtil;
-use DNA;
-use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
-use Exporter;
-
-$VERSION = 1.00;
-@ISA = qw(Exporter);
-
-# autoexport:
-@EXPORT = qw(read_ReadSim
-readReadFile readFastaFile
-compareToCons compareReadToCons
-);
-# export on request:
-@EXPORT_OK = qw();
-
-
-
-sub read_ReadSim {
-	my $ReadSimOut = shift;
-	my @readList;
-
-	my ($h, $s) = read_fastas($ReadSimOut);
-	foreach my $i (0 .. @$s-1) {
-		# process $h->[$i] =  read_0001|beg|2|length|92|forward|HIV
-		# and     $s->[$i] = TCAAATCACTCTTTGGCAACGACCCCTCGTCGACAATAA
-
-		#split on '|' doesn't work well, so first substitute spaces
-		$h->[$i] =~ s/\|/ /g;
-		my @head = split ' ', $h->[$i];
-
-		die "missing header $h->[$i]\n" if (@head <= 1);
-		my $seqID = $head[0];
-		my $startpos = $head[2];
-		my $dir = $head[5];
-		my $source = $head[6];
-
-		my %read;
-		$read{id} = $seqID;
-		$read{st} = $startpos;
-		if ($dir eq 'forward') {
-			$read{seq} = $s->[$i];
-		} else {
-			$read{seq} = revComp($s->[$i]);
-		}
-		$read{len} = length($s->[$i]);
-		$read{end} = $startpos + $read{len} - 1;
-		$read{dir} = $dir;
-		$read{source} = $source;
-		push(@readList, \%read);
-	}
-
-	return \@readList;
-}
-
-sub readReadFile {
-	# read a file of reads given by 
-	# startpos sequence
-	# startpos sequence
-	# startpos sequence
-	# ...
-	my $file = $_[0];
-	my @readList;
-
-	if (! defined $file) {
-		die "Error: No fasta file specified\n";
-	} 
-	
-	if (! -r $file) {
-		die "Fasta file \"$file\" not readable\n";
-	}
-    open(IN, "<$file") or die "Can't open $file: $!";
-    while (<IN>) {
-		chomp;
-		next if (/^#/);
-		my ($start, $seq) = split;
-		my %read;
-		$read{st} = $start;
-		$read{seq} = $seq;
-		$read{len} = length($seq);
-		$read{end} = $start + $read{len} - 1;
-		push(@readList, \%read);
-	}
-	return \@readList;
-}
-
-
-
-
-sub readFastaFile {
-    my $file = $_[0];
-	my $seqID;
-	my $startpos;
-	my @readList;
-	my $seq = '';
-	my @head;
-
-	if (! defined $file) {
-		die "Error: No fasta file specified\n";
-	} 
-	
-	if (! -r $file) {
-		die "Fasta file \"$file\" not readable\n";
-	}
-    open(IN, "<$file") or die "Can't open $file: $!";
-    while (<IN>) {
-		chomp;
-		if (/^\#/) {
-			next;
-		} elsif ($_ eq '') {
-			next;
-		} elsif (/^>/) {
-			# header looks like ">seqID startpos ..."
-			if (length($seq) > 0) {
-				#we've found a sequence, so store it
-
-				my %read;
-				$read{id} = $seqID;
-				$read{st} = $startpos;
-				$read{seq} = $seq;
-				$read{len} = length($seq);
-				$read{end} = $startpos + $read{len} - 1;
-				#my @tmp = @head;
-				#$read{qs} = \@tmp;
-				push(@readList, \%read);
-				$seq = '';
-			}
-			@head = split;
-			if (@head > 1) {
-				$seqID = shift @head; #$head[0];
-				$seqID =~ s/>//;
-				$startpos = shift @head; #$head[1];
-			} else {
-				print "Error - @head missing characters\n";
-			}
-		} else {
-			#a sequence line
-			#remove spaces at beginning and end
-			s/^\s+//;
-			s/\s+$//;
-			$seq .= $_;
-		}
-
-	}
-	#still have sequence in the queue, need to put the last one in
-	#this should be done in a better fashion...
-	my %read;
-	$read{id} = $seqID;
-	$read{st} = $startpos;
-	$read{seq} = $seq;
-	$read{len} = length($seq);
-	$read{end} = $startpos + $read{len} - 1;
-#	$read{qs} = \@head;
-	push(@readList, \%read);
-
-	close IN;
-	return \@readList;
-}
-
-sub compareToCons {
-	my $seq = shift;
-	my $cons = shift;
-	my @ans;
-	#die "$seq\n$cons\n" if (length($seq) != length($cons));
-	my $minl = Nmin(length($seq), length($cons));
-	#input: two strings of same length
-	#output: list of differences in format A90I
-	foreach my $i (1..$minl) {
-		my $c1 = substr($seq,$i-1,1);
-		my $c0 = substr($cons,$i-1,1);
-		if (!($c1 eq $c0)) {
-			push(@ans, $c0 . $i . $c1);
-		}
-	}
-	return \@ans;
-}
-
-sub compareReadToCons {
-	# not really right, since  compareToCons
-	# prints in local coordinates...
-	my $read = shift;
-	my $cons = shift;
-	#input a read %r = {id:AHAHA seq : ACGTAT, len : 6, start : 80}
-	#and a string
-	#output: %r = {id: AHAHA, ..., muts = join(',', compareToCons(read, cons) }
-	#my %r;
-	#$r{id} = $read->{id};
-	#$r{len} = $read->{len};
-	$read->{muts} = join(',', compareToCons($read->{seq}, substr($cons, $read->{st}, $read->{len})));
-}
-
-1;
diff --git a/src/perl/Util.pm b/src/perl/Util.pm
deleted file mode 100755
index 0b7b3a1..0000000
--- a/src/perl/Util.pm
+++ /dev/null
@@ -1,255 +0,0 @@
-package Util;
-# various statistical utilities
-
-# Copyright 2007, 2008, 2009
-# Niko Beerenwinkel,
-# Nicholas Eriksson,
-# Moritz Gerstung,
-# Lukas Geyrhofer,
-# Osvaldo Zagordi,
-# ETH Zurich
-
-# This file is part of ShoRAH.
-# ShoRAH is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# ShoRAH is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-
-use strict;
-use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
-use Exporter;
-use NikoUtil;
-
-$VERSION = 1.00;
-@ISA = qw(Exporter);
-
-# autoexport:
-@EXPORT = qw(dec2bin findProg Psystem average median countChar
-normalize maxind makeRandomDist makeRandomDistMin binomial fact rand_perm
-computeInDelMut);
-# export on request:
-@EXPORT_OK = qw();
-
-sub dec2bin {
-	my $arg = shift;
-	my $N = shift;
-	substr(unpack("B32", pack('C', $arg)),-$N);
-}
-
-sub findProg {
-	my $name = shift;
-	my $ans;
-
-	my @pathList = split(':',$ENV{'PATH'});
-	push(@pathList,"$ENV{'HOME'}/bin");
-	push(@pathList,'.');
-
-	my $found = 0;
-	foreach my $dir (@pathList) {
-		if (-r "$dir/$name") {
-			$ans = "$dir/$name";
-			$found = 1;
-			print "found $name as $ans\n";
-			last;
-		}
-	}
-	if ($found == 0) {
-		print "error, can't find $name in PATH = @pathList\n";
-		exit 1;
-	}
-	return $ans;
-}
-
-sub Psystem {
-        my $s = shift;
-        print "exec : $s\n";
-        system ("$s");
-}
-
-sub Fsystem {
-        my $s = shift;
-        print "$s\n";
-}
-
-sub average {
-	my $l = shift;
-	if (@$l == 0) {
-		return undef;
-	}
-	my $ave =  0;
-	foreach (@$l) {
-		$ave += $_;
-	} 
-	$ave /= @$l;
-	return $ave;
-}
-
-sub median {
-	#actually computes the mode
-	my $l = shift;
-	my %c;
-	foreach (@$l) {
-		$c{$_}++;
-	}
-	my $maxcnt = 0;
-	my $med;
-	foreach (keys(%c)) {
-		if ($c{$_} > $maxcnt) {
-			$maxcnt = $c{$_};
-			$med = $_;
-		}
-	}
-	return $med;
-}
-
-sub countChar {
-	#count occurances of $char in $s
-	#
-	my $s = shift;
-	my $char = shift;
-	#print "counting $char in $s\n";
-	# this little trick doesn't work if char = '.'
-	#$_ = $s;
-	#return tr/$char/$char/;
-
-	my $count = 0;
-	foreach (0.. length($s)) {
-		if (substr($s,$_,1) eq $char) {
-			$count++;
-		}
-	}
-	#print "get $count\n";
-	return $count;
-}
-
-sub normalize {
-	#input: list of positive integers
-	#output: list / sum
-	my $l = shift;
-	#print "Calling norm with @$l\n";
-	my $sum = 0;
-	foreach my $a (@$l) {
-		if (defined $a) {
-			$sum += $a;
-		}
-	}
-	if ($sum > 0) {
-		foreach my $a (@$l) {
-			if (defined $a) {
-				$a = $a / $sum;
-			} else {
-				$a = 0;
-			}
-		}
-	}
-}
-
-sub maxind {
-	my $l = shift;
-	my $max = -1e100;
-	my $maxind = 0;
-	foreach my $i (0..@$l-1) {
-		if ($l->[$i] > $max) {
-			$max = $l->[$i];
-			$maxind = $i;
-		}
-	}
-	return $maxind;
-}
-
-sub makeRandomDist {
-	my $n = int(shift);
-	return 0 if ($n < 1);
-	my @ans;
-
-	foreach my $i (1 .. $n) {
-		push(@ans, int(rand(1000)));
-	}
-	normalize(\@ans);
-	return \@ans;
-}
-
-sub makeRandomDistMin {
-	my $n = int(shift);
-	my $minFreq = shift;
-	return 0 if ($n < 1);
-	die "$n * $minFreq > 1\n" if ($n * $minFreq > 1);
-	my @ans;
-
-	push @ans, 0;
-	foreach my $i (1 .. $n-1) {
-		push(@ans, int(rand(1000)));
-	}
-	# normalize so that sum @ans = 1 - $minFreq*$n
-	my $num = 1 - $minFreq*$n;
-	my $sum = 0;
-	$sum += $_ foreach (@ans);
-	$_ *= $num/$sum foreach (@ans);
-	$_ += $minFreq foreach (@ans);
-	return \@ans;
-}
-
-sub binomial {
-	my ($n, $k) = round(@_);
-	if ($k > $n) {
-		return 0;
-	}
-	return (fact($n) / (fact($k) * fact($n - $k)));
-}
-
-sub fact {
-	my $n = shift;
-	if ($n == 0) {
-		return 1;
-	} else {
-		my $ans = 1;
-		foreach (1..$n) {
-			$ans *= $_;
-		}
-		return $ans;
-	}
-}
-
-
-sub rand_perm {
-
-	# returns a random permutation of the $N integers 0, ..., $N-1
-
-	my $N = shift;
-
-	my @list = (0 .. $N-1);
-	my @perm;
-
-	while (@list) {
-		my $r = random(0, $#list);
-		push @perm, splice(@list, $r, 1);
-	}
-
-	return @perm;
-}
-
-sub computeInDelMut {
-	my $a = shift;
-	my $b = shift;
-	my $del = countChar($a, '-');
-	my $ins = countChar($b, '-');
-	my $Mut = 0;
-	foreach my $i (0 .. length($a) - 1) {
-		my $x = substr($a,$i,1);
-		my $y = substr($b,$i,1);
-		if (($x ne $y) and ($x ne '-') and ($y ne '-')) {
-			$Mut++;
-		}
-	}
-	return ($ins, $del, $Mut);
-}
-
-1;
diff --git a/src/python/shorah_matching.py b/src/python/shorah_matching.py
deleted file mode 100755
index 6b36ef8..0000000
--- a/src/python/shorah_matching.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Hopcroft-Karp bipartite max-cardinality matching and max independent set
-# David Eppstein, UC Irvine, 27 Apr 2002
-#  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/123641
-
-# Example from Cormen, Leiserson, and Rivest, section 27.3:
-#a = bipartiteMatch({ 0:[0], 1:[0,2], 2:[1,2,3], 3:[2], 4:[2] })
-
-
-def bipartiteMatch(graph):
-    '''Find maximum cardinality matching of a bipartite graph (U,V,E).
-    The input format is a dictionary mapping members of U to a list
-    of their neighbors in V.  The output is a triple (M,A,B) where M is a
-    dictionary mapping members of V to their matches in U, A is the part
-    of the maximum independent set in U, and B is the part of the MIS in V.
-    The same object may occur in both U and V, and is treated as two
-    distinct vertices if this happens.'''
-
-    # initialize greedy matching (redundant, but faster than full search)
-    matching = {}
-    for u in graph:
-        for v in graph[u]:
-            if v not in matching:
-                matching[v] = u
-                break
-
-    while 1:
-        # structure residual graph into layers
-        # pred[u] gives the neighbor in the previous layer for u in U
-        # preds[v] gives a list of neighbors in the previous layer for v in V
-        # unmatched gives a list of unmatched vertices in final layer of V,
-        # and is also used as a flag value for pred[u] when u is in the first
-        # layer
-        preds = {}
-        unmatched = []
-        pred = dict([(u, unmatched) for u in graph])
-        for v in matching:
-            del pred[matching[v]]
-        layer = list(pred)
-
-        # repeatedly extend layering structure by another pair of layers
-        while layer and not unmatched:
-            newLayer = {}
-            for u in layer:
-                for v in graph[u]:
-                    if v not in preds:
-                        newLayer.setdefault(v, []).append(u)
-            layer = []
-            for v in newLayer:
-                preds[v] = newLayer[v]
-                if v in matching:
-                    layer.append(matching[v])
-                    pred[matching[v]] = v
-                else:
-                    unmatched.append(v)
-
-        # did we finish layering without finding any alternating paths?
-        if not unmatched:
-            unlayered = {}
-            for u in graph:
-                for v in graph[u]:
-                    if v not in preds:
-                        unlayered[v] = None
-            return (matching, list(pred), list(unlayered))
-
-        # recursively search backward through layers to find alternating paths
-        # recursion returns true if found path, false otherwise
-        def recurse(v):
-            if v in preds:
-                L = preds[v]
-                del preds[v]
-                for u in L:
-                    if u in pred:
-                        pu = pred[u]
-                        del pred[u]
-                        if pu is unmatched or recurse(pu):
-                            matching[v] = u
-                            return 1
-            return 0
-
-        for v in unmatched:
-            recurse(v)
diff --git a/src/python/shorah_mm.py b/src/python/shorah_mm.py
deleted file mode 100755
index 9944e4e..0000000
--- a/src/python/shorah_mm.py
+++ /dev/null
@@ -1,778 +0,0 @@
-# Copyright 2007, 2008, 2009
-# Niko Beerenwinkel,
-# Nicholas Eriksson,
-# Moritz Gerstung,
-# Lukas Geyrhofer,
-# Osvaldo Zagordi,
-# Susana Posada Cespedes,
-# ETH Zurich
-
-# This file is part of ShoRAH.
-# ShoRAH is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# ShoRAH is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-
-'''
-
-usage: mm.py filename [max_haplos]
-
-------------
-Input: filename should consist of lines like
-0 ACAGACGA
-1 CAGACGA
-4 AAAAAAAAAA
-
-where the first column is the start position and the second column is the read.
-The start positions must be indexed from 0.
-
-Any alphabet is allowed for the reads.
-
-Duplicate reads or reads which are redundant might cause unexpected results.
-For example, the reads
-0 ACGT
-1 CGT
-are redundant (since the first contains the second exactly).
-
-------------
-Output:
-    the last 4 characters of filename are deleted and replaced with geno
-    so if input is file.read then output is file.geno
-------------
-parameters (see the beginning of main() in the code to change these)
-
-MAXGENO = 10000 (can be changed with max_haplos on command line)
- - maximum number of haplotypes we output
-
-MAXITER = 500
- - maximum number of times we rerun the matching algorithm to try to find new
-   haplotypes
-
-ONEPATH = True
- - extend chains to just one path in the graph?  If false, chains get extended
-   to all possible paths.
-   This can be a problem if the chain is small, you can get too many paths.
-
-TODO
- ONEPATH should have an option to output up to MAXPATHS paths
-'''
-
-from __future__ import print_function
-import random
-import shorah_matching
-
-MAXGENO_DEFAULT = 10000
-MAXITER_DEFAULT = 50
-MINOVERLAP_DEFAULT = 1
-ONEPATH_DEFAULT = True
-OUTPUTGRAPH_DEFAULT = False
-
-
-def parse_com_line():
-
-    import argparse
-
-    parser = argparse.ArgumentParser(description="Global reconstruction: minimal set of haplotypes that\
-                                     explains all reads in the dataset",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    opts = main.__defaults__
-
-    parser.add_argument("filename", nargs=1,
-                        help="Input file containing starting position and sequences\
-                             for each of the reads")
-
-    parser.add_argument("-m", "--maxhaplo", action="store", metavar='INT', type=int,
-                        required=False, dest="MAXGENO", default=opts[0],
-                        help="keep searching until we find at least MAXGENO \
-                              haplotypes")
-
-    parser.add_argument("-i", "--maxiter", action="store", metavar='INT', type=int,
-                        required=False, dest="MAXITER", default=opts[1],
-                        help="but only look up to MAXITER rounds")
-
-    parser.add_argument("-l", "--length", action="store", metavar='INT', type=int,
-                        required=False, dest="MINOVERLAP", default=opts[2],
-                        help="minimum overlap to include in graph")
-
-    # logical options
-    parser.add_argument("-o", "--one", action="store_true", required=False,
-                        dest="ONEPATH", default=ONEPATH_DEFAULT,
-                        help="search one path only")
-
-    parser.add_argument("-a", "--all", action="store_true", required=False,
-                        dest="MULTIPATH", help="extend search to many paths")
-
-    parser.add_argument("-g", "--graph", action="store_true", required=False,
-                        dest="OUTPUTGRAPH", default=OUTPUTGRAPH_DEFAULT,
-                        help="output a graph?")
-
-    args = parser.parse_args()
-    if args.MULTIPATH:
-        args.ONEPATH = False
-
-    return args
-
-
-def permuteList(l, perm):
-    '''apply a permutation to a list'''
-    return [perm[i] for i in l]
-
-
-def permuteGraph(graph, perm):
-    '''apply a permutation to a graph'''
-    return dict([(perm[g], permuteList(child, perm))
-                 for (g, child) in graph.items()])
-
-
-def permute(graph, reads, descList):
-    ''' pick a random elt of S_{len(graph) - 2} and apply the permutation
-        (fixing the source and sink) to graph, reads, and descList
-        '''
-    # build permutation in S_n
-    n = len(graph) - 2
-    perm = list(range(n))
-    random.shuffle(perm)
-    # and then make sure to fix the source and sink
-    perm.append(source)
-    perm.append(sink)
-    if verbose:
-        print('permuting with', perm)
-    newGraph = permuteGraph(graph, perm)
-    #newReads = [reads[i] for i in perm[:-2]]
-    # DOESN'T WORK - NEED INVERSE OF PERM
-
-    newReads = [[] for i in range(n)]
-    for i in range(n):
-        newReads[perm[i]] = reads[i]
-    # permute labels in descList
-    tmpdescList = list(
-        map(lambda l: permuteList(l, perm), map(list, descList)))
-    # also permute places ### VIA INVERSE
-    newdescList = [[] for i in tmpdescList]
-    for i in range(n + 2):
-        newdescList[perm[i]] = tmpdescList[i]
-    return newGraph, newReads, newdescList
-
-
-def readFasta(filename):
-    '''name says it'''
-    try:
-        f = open(filename, 'r')
-    except IOError:
-        return [], 0
-    ans = []
-    maxEnd = 0
-    for line in f:
-        pos, seq = line.split()
-        pos = int(pos)
-        end = pos + len(seq)
-        if end > maxEnd:
-            maxEnd = end
-        ans.append([pos, seq])
-    return ans, maxEnd
-
-
-def printFasta(seqs, lineLen=80):
-    '''simple enough'''
-    start = 0
-    totalLen = len(seqs[0])
-
-    while start < totalLen:
-        length = min(lineLen, totalLen - start)
-        print()
-        for i in seqs:
-            print(i[start:start + length])
-        start += length
-    return 0
-
-
-def addSourceSink(graph, descList, reads, seqLen):
-    """
-    adds a source and sink onto the graph
-    and into descList
-    """
-    source = len(reads)
-    sink = len(reads) + 1
-    maxstart = 0
-    for r in reads:
-        if r[0] > maxstart:
-            maxstart = r[0]
-    # reads.append([-1,''])
-    #reads.append([seqLen + 1,''])
-
-    # need edges source -> r whenever r has no parents
-    # and r -> sink whenever it has no children
-
-    # find everything which is not a child
-
-    allChildren = set([])
-    list(map(allChildren.update, [graph[i] for i in graph]))
-    initialReads = set(list(range(len(graph)))).difference(allChildren)
-    graph.setdefault(source, list(initialReads))
-
-    # find everything without children and create edge to sink
-    for r in range(len(graph)):
-        if graph[r] == []:
-            graph[r].append(sink)
-    graph.setdefault(sink, [])
-
-    # print(graph)
-    if OUTPUTGRAPH:
-        print('BEGIN')
-        for r in graph:
-            #print('node name', r)
-            #print('node start', reads[r][0])
-            #print('children', graph[r])
-            if r == sink:
-                print(1000, ':', r, ':', [])
-            if r == source:
-                print(-1, ':', r, ':', graph[r])
-            if (r != sink and r != source):
-                print(reads[r][0], ':', r, ':', graph[r])
-        print('END')
-        print(graph)
-    # fix up descList
-    # everything as sink as desc
-    for i in descList:
-        i.add(sink)
-    # push source and then sink on descList
-    # source has everthing as desc
-    sourceDesc = set(list(range(len(descList))))
-    sourceDesc.add(sink)
-    sinkDesc = set([])
-    descList.append(sourceDesc)
-    descList.append(sinkDesc)
-
-
-def makeGraph(reads):
-    '''missing'''
-    def readsMatch(r, s):
-        '''check if two reads agree on their overlap
-        called with r starting before s
-        '''
-        overlap = r[0] + len(r[1]) - s[0]
-        if overlap < MINOVERLAP:
-            return 0
-        if overlap > len(s[1]):
-            overlap = len(s[1])
-        #if overlap < 0: overlap = 0
-        return r[1][-overlap:] == s[1][:overlap]
-
-    graph = {}
-    # initialize graph
-    for r in range(len(reads)):
-        graph.setdefault(r, [])
-
-    # set edges between reads
-    for r in range(len(reads)):
-        if r % 100 == 0:
-            print(r)
-        for s in range(len(reads)):
-            if reads[r][0] < reads[s][0]:
-                if readsMatch(reads[r], reads[s]):
-                    # edge from r to s
-                    graph.setdefault(r, []).append(s)
-    return graph
-
-
-def findDescendants(graph):
-    '''missing'''
-    descList = []
-    for i in range(len(graph)):
-        descList.append(set([]))
-
-    def getDesc(node):
-        '''missing'''
-        #print('getDesc called on', node)
-        # node is an int,
-        # descList is a list of lists of ints
-        # set descList[node] by:
-        # union of all descList[x] for x in graph[node]
-        if len(descList[node]) > 0:
-            #print(node, descList[node])
-            return descList[node]
-        else:
-            #descList[node] = descList[node].union(set(graph[node]))
-            descList[node].update(set(graph[node]))
-            for x in graph[node]:
-                # doesn't speed it up
-                # if len(descList[x]) > 0:
-                #    descList[node] = descList[node].union(descList[x])
-                # else:
-                #descList[node] = descList[node].union(getDesc(x))
-                descList[node].update(getDesc(x))
-            return descList[node]
-
-    # must call getDesc on the entire graph
-    # but most of the calls are trivial
-    for i in range(len(graph)):
-        getDesc(i)
-        #print('desc of', i, 'are', getDesc(i))
-    return descList
-
-
-def removeCycles(graph, descList):
-    '''missing'''
-    for i in graph.keys():
-        ##print('checking the',len(graph[i]),'children of node',i)
-        # python is a pain if we modify graph[i] while looping over it
-        # so need a temporary variable
-        tmp = []
-        tmp.extend(graph[i])
-        for j in tmp:
-            for k in tmp:
-                if j != k:
-                    # print('checking',j,k)
-                    if j in descList[k]:
-                        #print('removing node',j)
-                        #print('graph[i] =',graph[i])
-                        graph[i].remove(j)
-                        break
-        ###print('reduced to',len(graph[i]), 'children')
-    return graph
-
-
-def matchToChain(match, n):
-    '''takes a matching and creates the
-    corresponding chain in the poset
-    Who knows how it works...'''
-    startpoints = set(list(range(n))).difference(list(match.values()))
-    allChains = []
-    for i in startpoints:
-        curChain = []
-        curChain.append(i)
-        tmp = i
-        while True:
-            if tmp in match.keys():
-                tmp = match[tmp]
-                curChain.append(tmp)
-            else:
-                break
-        allChains.append(curChain)
-    return allChains
-
-
-def onePath(x, y):
-    ''' find one path between x and y
-        using descList
-        currently unused, but might be nice
-    '''
-    path = [x]
-    if y not in descList[x]:
-        return None
-    if x == y:
-        return path
-    done = 0
-    while not done:
-        for child in graph[path[-1]]:
-            if y in descList[child]:
-                path.append(child)
-                break
-            if y == child:
-                path.append(y)
-                done = 1
-                break
-    return path
-
-
-def find_all_paths(GRAPH, start, end, path=[]):
-    '''from Guido's essay'''
-    path = path + [start]
-    if start == end:
-        return [path]
-#    if not GRAPH.has_key(start):
-    if start not in GRAPH:
-        return []
-    paths = []
-    for node in GRAPH[start]:
-        if node not in path:
-            newpaths = find_all_paths(GRAPH, node, end, path)
-            for newpath in newpaths:
-                paths.append(newpath)
-    return paths
-
-
-def allPaths(x, y):
-    '''find all paths from x to y in the graph'''
-    ##print('allPaths', x, y)
-    paths = [[x]]
-    if y not in descList[x]:
-        return None
-    if x == y:
-        return paths
-    finishedpaths = []
-    while len(paths) > 0:
-        # for last element of each list in paths
-        # loop over children, making new list of paths
-        newpaths = []
-        # for every path
-        for i in range(len(paths)):
-            # try to extend it
-            for child in graph[paths[i][-1]]:
-                # if we can extend along this child
-                if y in descList[child]:
-                    # y is later, so create a new, longer path
-                    # FIXME: test this, possibly just use find_all_paths
-                    #tmppath = []
-                    # tmppath.extend(paths[i])
-                    # tmppath.append(child)
-                    newpaths.append(paths[i] + [child])
-                if y == child:
-                    # done, push on final
-                    tmppath = []
-                    tmppath.extend(paths[i])
-                    # don't include y
-                    # tmppath.append(child)
-                    finishedpaths.append(tmppath)
-        #print('newpaths = ', newpaths)
-        paths = newpaths
-    #print('returns', len(finishedpaths), 'paths')
-    return finishedpaths
-
-
-def pathToGeno(path):
-    '''
-    first get rid of sink and source
-    (this has the side effect of modifying the paths)
-    '''
-    geno = ''
-    if len(path) == 0:
-        return None
-    if path[0] == source:
-        path.pop(0)
-    if len(path) == 0:
-        return None
-    if path[-1] == sink:
-        path.pop(-1)
-    for i in range(len(path) - 1):
-        r = path[i]
-        s = path[i + 1]
-        # nonoverlap is how much of read r occurs before read s starts
-        nonoverlap = reads[s][0] - reads[r][0]
-        geno = geno + reads[r][1][:nonoverlap]
-        # geno.append(reads[r][1][:nonoverlap]
-    # add the last read
-    geno = geno + reads[path[-1]][1]
-    # geno.append(reads[path[-1]][1])
-
-    # pad with .'s at beginning and end to make right length
-    #    countBegin = 0
-    #    countEnd = 0
-
-    for i in range(reads[path[0]][0]):
-        # geno.append('.')
-        geno = '.' + geno
-        #    countBegin += 1
-
-    offsetEnd = seqLen - reads[path[-1]][0] - len(reads[path[-1]][1])
-    for i in range(offsetEnd):
-        geno = geno + '.'
-        # countEnd += 1
-
-#    print('path of length',len(path), 'has', countBegin, '. and', countEnd)
-#    if countBegin > .5 * len(geno):
-#        geno = None
-#    if countEnd > .5 * len(geno):
-#        geno = None
-
-    return geno
-
-
-def extendChainToOne(chain):
-    '''extend chain to ONE path in the graph
-    input: a chain (list of ints)
-    output: a list containing a path that extends the chain
-    '''
-    ans = []
-    if len(chain) == 0:
-        return ans
-    if chain[0] == source:
-        chain.pop(0)
-    if len(chain) == 0:
-        return None
-    if chain[-1] == sink:
-        chain.pop(-1)
-    # deal with paths from source to chain[0] but
-    # if chain started as [source, sink] then it is of
-    # length 0 now
-    if len(chain) == 0:
-        ans = onePath(source, sink)
-    else:
-        ans = onePath(source, chain[0])
-    for i in range(len(chain)):
-        if i != len(chain) - 1:
-            nxt = chain[i + 1]
-        else:
-            nxt = sink
-        if nxt not in graph[chain[i]]:
-            ans.extend(onePath(chain[i], nxt))
-        else:
-            ans.append(chain[i])
-    return [ans]
-
-
-def extendChain(chain):
-    '''extend chain to all possible paths in graph
-    input: a chain (list of ints)
-    output: a list of all paths that extend the chain
-    print 'input:',chain
-    '''
-    ans = [[]]
-    newans = []
-    if len(chain) == 0:
-        return ans
-    if chain[0] == source:
-        chain.pop(0)
-    if len(chain) == 0:
-        return None
-    if chain[-1] == sink:
-        chain.pop(-1)
-    # deal with paths from source to chain[0] but
-    # if chain started as [source, sink] then it is of
-    # length 0 now
-    if len(chain) == 0:
-        ans = allPaths(source, sink)
-    else:
-        ans = allPaths(source, chain[0])
-    for i in range(len(chain)):
-        #print('currently have', len(ans),'chains')
-        if i != len(chain) - 1:
-            nxt = chain[i + 1]
-        else:
-            nxt = sink
-        if nxt not in graph[chain[i]]:
-            for p in allPaths(chain[i], nxt):
-                for j in ans:
-                    newans.append(j + p)
-            ans = newans
-            newans = []
-        else:
-            for j in ans:
-                j.append(chain[i])
-    ##print('output:', ans)
-    return ans
-
-
-def minimalCover(graph, reads, descList):
-    """ the main code: takes a graph and creates a matching
-        problem.  then solves the matching to get chains in
-        the graph, which are extended to paths in all ways """
-
-    #print('making bipartite graph')
-    # make bipartiteGraph
-    biGraph = {}
-    for i in range(len(graph)):
-        biGraph.setdefault(i, list(descList[i]))
-
-    print('...finding matching')
-    # find matching
-    a = shorah_matching.bipartiteMatch(biGraph)
-    # a[0] is the matching, a[1] and a[2] the parts of
-    # the maximal independent sets in the two parts of biGraph
-    print('...Reversing matching')
-    # reverse the matching
-    match = dict([(v, k) for (k, v) in a[0].items()])
-    # free some memory, perhaps
-    del a
-    #print('matching =', match)
-
-    # transform to chains in poset
-    chainList = matchToChain(match, len(graph))
-    if OUTPUTGRAPH:
-        print('chains=', chainList)
-    maximalAntiChain = len(chainList)
-    print('...largest antichain of size', maximalAntiChain)
-    if verbose:
-        for zz in range(len(chainList)):
-            print(zz, '==', chainList[zz])
-
-    # now that we're done with the matching, add the source and sink into graph
-    # this is needed for the extendChain and pathToGeno functions
-    # now done before the matching
-    #addSourceSink(graph, descList, reads, seqLen)
-
-    print('...transforming chains to paths')
-    # transform chains to paths in all possible ways
-    if ONEPATH:
-        paths = list(map(extendChainToOne, chainList))
-    else:
-        paths = list(map(extendChain, chainList))
-
-    print(len(chainList), 'chains give', sum(map(len, paths)),
-          'paths (', list(map(len, paths)), ')')
-    if verbose:
-        print('paths=', paths)
-
-    print('...translating paths->genotypes')
-    genos = []
-    for i in range(len(paths)):
-        genos.extend(map(pathToGeno, paths[i]))
-    del(reads)
-
-    return genos, maximalAntiChain
-
-
-def countPaths(graph, source, sink):
-    '''missing'''
-    # numPaths[i] = # of paths from i to the sink in graph
-    numPaths = [-1 for i in range(len(graph))]
-
-    numPaths[sink] = 1
-
-    def getPaths(node):
-        ''' count paths from node to sink '''
-        if numPaths[node] < 0:
-            numPaths[node] = sum(map(getPaths, graph[node]))
-        #print('node', node, 'has', numPaths[node], 'paths to sink')
-        return numPaths[node]
-    getPaths(source)
-    #print('paths =', numPaths)
-    return numPaths[source]
-
-
-###########main code#########
-
-def main(readfile, maxhaplo=MAXGENO_DEFAULT, maxiter=MAXITER_DEFAULT,
-         minoverlap=MINOVERLAP_DEFAULT, onepath=ONEPATH_DEFAULT,
-         outputgraph_local=OUTPUTGRAPH_DEFAULT):
-    '''This can be called from shorah.py directly
-    '''
-    # want to do outfile = readfile; outfile ~= s/rest/geno$/
-    if readfile[-4:] == 'rest' or readfile[-4:] == 'read':
-        #outFileName = readfile[:-4] + str(MAXGENO) + '.geno'
-        outFileName = readfile[:-4] + 'geno'
-    else:
-        outFileName = readfile + '.geno'
-
-    # This use of global variables should definitely be changed
-    global verbose, seqLen, descList, graph, reads, source, sink
-    global MAXGENO
-    MAXGENO = maxhaplo
-
-    global MAXITER
-    MAXITER = maxiter
-
-    global ONEPATH
-    ONEPATH = onepath
-
-    global MINOVERLAP
-    MINOVERLAP = minoverlap
-
-    global OUTPUTGRAPH
-    OUTPUTGRAPH = outputgraph_local
-
-    print('inputfile =', readfile)
-    print('outputfile=', outFileName)
-    print('Creating up to', MAXGENO, 'haplotypes')
-
-    print('Permuting up to', MAXITER, 'times')
-    print('ONEPATH = ', ONEPATH)
-    print('minimum overlap in graph:', MINOVERLAP)
-
-    # get reads
-    reads, seqLen = readFasta(readfile)
-    if seqLen == 0:
-        print('No reads in file (or couldn\'t open file)')
-        return 0
-    source = len(reads)
-    sink = len(reads) + 1
-    if len(reads) < 51:
-        verbose = 1
-    else:
-        verbose = 0
-    print(len(reads), 'reads, sequence length =', seqLen)
-    if verbose:
-        for r in reads:
-            print(reads.index(r), r)
-
-    # Build read graph
-    graph = makeGraph(reads)
-    if verbose:
-        print('graph = ', graph)
-    print('finished graph')
-
-    # take transitive closure of graph
-    descList = findDescendants(graph)
-    if verbose:
-        print('desc =', list(map(list, descList)))
-    print('...done with transitive closure')
-
-    print('...removing cycles')
-    graph = removeCycles(graph, descList)
-    if verbose:
-        print('graph \ cycles = ', graph)
-
-    # add on a source and sink to the graph
-    addSourceSink(graph, descList, reads, seqLen)
-    if verbose:
-        print('graph with SS', graph)
-        print('new Desclist', list(map(list, descList)))
-    # count paths in the graph
-
-    totalPaths = countPaths(graph, source, sink)
-    print('There are', totalPaths, 'paths in the graph')
-
-    #global maximalAntiChain
-
-    allGenos = set([])
-    DONE = False
-
-    iteration = 0
-    if totalPaths < MAXGENO:
-        print('This is less than MAXGENO =', MAXGENO)
-        print('So matching is not run')
-        paths = extendChain([source, sink])
-        genos = list(map(pathToGeno, paths))
-        allGenos.update(genos)
-        DONE = True
-        genos, maximalAntiChain = minimalCover(graph, reads, descList)
-    else:
-        print('starting matching')
-        print()
-        while not DONE and (len(allGenos) < MAXGENO) and (iteration < MAXITER):
-            # Done with preproccessing. Now permute
-            # graph, reads, and descList and get a new set of genotypes
-            # then find a minimal chain decomposition of the graph
-            # and return all genotypes obtained by extending
-            # these chains
-            if iteration > 1:
-                print('...permuting')
-                graph, reads, descList = permute(graph, reads, descList)
-            genos, maximalAntiChain = minimalCover(graph, reads, descList)
-            sizeBefore = len(allGenos)
-            allGenos.update(genos)
-            sizeAfter = len(allGenos)
-            print('found', len(allGenos), 'genotypes out of', MAXGENO)
-            print('gained', sizeAfter - sizeBefore, 'in round', iteration)
-            print()
-            # printFasta(genos,1000)
-            iteration += 1
-
-    outfile = open(outFileName, 'w')
-
-    for i in list(allGenos):
-        # check if i == None
-        if i:
-            outfile.write(i)
-            outfile.write('\n')
-    outfile.close()
-    if verbose:
-        printFasta(list(allGenos), 1000)
-
-    # summary
-    print('graph has', totalPaths, 'total paths')
-    print('minimal path cover is of size', maximalAntiChain)
-    print('ran matching algorithm', iteration, 'times')
-    print('output', len(allGenos), 'haplotypes')
-
-    print('% TOTALPATHS =', totalPaths)
-    print('% ANTICHAIN =', maximalAntiChain)
-    print('% HAPLO =', len(allGenos))
diff --git a/src/samtools/meson.build b/src/samtools/meson.build
new file mode 100644
index 0000000..cb7b458
--- /dev/null
+++ b/src/samtools/meson.build
@@ -0,0 +1,45 @@
+bam_sources = files([
+  'bam.c',
+  'bam_aux.c',
+  'bam_cat.c',
+  'bam_import.c',
+  'bam_index.c',
+  'bam_lpileup.c',
+  'bam_md.c',
+  'bam_pileup.c',
+  'bam_reheader.c',
+  'bam_sort.c',
+  'bedidx.c',
+  'bgzf.c',
+  'faidx.c',
+  'knetfile.c',
+  'kprobaln.c',
+  'kstring.c',
+  'razf.c',
+  'sam.c',
+  'sam_header.c',
+  'bam.h',
+  'bam_endian.h',
+  'bgzf.h',
+  'faidx.h',
+  'kaln.h',
+  'khash.h',
+  'knetfile.h',
+  'kprobaln.h',
+  'kseq.h',
+  'ksort.h',
+  'kstring.h',
+  'razf.h',
+  'sam.h',
+  'sam_header.h'])
+
+zlib_dep = dependency('zlib')
+
+bam_library = static_library(
+  'libbam',
+  bam_sources,
+  dependencies : zlib_dep,
+  c_args : ['-D_USE_KNETFILE'],
+  install : false)
+
+bam_inc = include_directories('.')
diff --git a/src/scripts/dec.py.in b/src/scripts/dec.py.in
deleted file mode 100755
index f75a24e..0000000
--- a/src/scripts/dec.py.in
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env @PYTHON@
-
-from __future__ import print_function
-import argparse
-import os
-import shorah_dec
-
-# parse command line
-parser = argparse.ArgumentParser(description="Local haplotype recontruction: Dirichlet process mixture",
-                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-# set the defaults (see http://bit.ly/2hCTQl)
-opts = shorah_dec.main.__defaults__
-
-group1 = parser.add_argument_group('Input files', 'Required input')
-group2 = parser.add_argument_group('Run options',
-                                   'Parameters that can (maybe should) be changed \
-                                   according to the needs')
-group3 = parser.add_argument_group('More options',
-                                   'Do you really want to change this?')
-
-
-group1.add_argument("-b", "--bam", metavar='BAM', required=True, type=str,
-                    dest="in_bam", help="file with aligned reads in .bam format")
-
-group1.add_argument("-f", "--fasta", metavar='REF', required=True, type=str,
-                    dest="in_fasta", help="reference genome in fasta format")
-
-group2.add_argument("-w", "--windowsize", metavar='INT', required=False, type=int,
-                    default=opts[0], dest="win_length", help="window size")
-
-group2.add_argument("-r", "--region", metavar='chrm:start-stop', required=False, type=str,
-                    default=opts[2], dest="region", help="region in format 'chrm:start-stop', \
-                                                          e.g. 'ch3:1000-3000'")
-
-group2.add_argument("-a", "--alpha", metavar='FLOAT', required=False, type=float,
-                    default=opts[4], dest="alpha", help="alpha in dpm sampling")
-
-group2.add_argument("-S", "--seed", metavar='INT', required=False, type=int,
-                    default=opts[6], dest="seed", help="set seed for reproducible results")
-
-group3.add_argument("-s", "--winshifts", metavar='INT', required=False, type=int,
-                    default=opts[1], dest="win_shifts", help="number of window shifts")
-
-group3.add_argument("-x", "--maxcov", metavar='INT', required=False, type=int,
-                    default=opts[3], dest="max_coverage", help="approximate max coverage allowed")
-
-group3.add_argument("-k", "--keep_files", required=False, action='store_true',
-                    default=opts[5], dest="keep_files", help="keep all intermediate files")
-
-args = parser.parse_args()
-
-supported_formats = {
-    'bam': 'aligned reads',
-    'fasta': 'reference genome'
-}
-# check the input file is in supported format
-try:
-    tmp_filename = os.path.split(args.in_bam)[1]
-    [in_stem, in_format] = [tmp_filename.split('.')[0],
-                            tmp_filename.split('.')[-1]]
-    t = supported_formats[in_format]
-except IndexError:
-    shorah_dec.declog.error('The input file must be filestem.format')
-    print('The input file must be filestem.format')
-    print('Supported formats are')
-    for sf in supported_formats.items():
-        print(sf[0], ':', sf[1])
-    sys.exit()
-except KeyError:
-    shorah_dec.declog.error('format unknown')
-    print(in_format, 'format unknown')
-    print('Supported formats are')
-    for sf in supported_formats.items():
-        print(sf[0], ':', sf[1])
-    sys.exit()
-
-shorah_dec.main(args.in_bam, args.in_fasta, args.win_length, args.win_shifts,
-                args.region, args.max_coverage, args.alpha, args.keep_files, args.seed)
diff --git a/src/scripts/fas2read.pl.in b/src/scripts/fas2read.pl.in
deleted file mode 100755
index f948849..0000000
--- a/src/scripts/fas2read.pl.in
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env perl
-#translate a fasta file to read format
-
-use lib "@perldir@";
-use strict;
-use Getopt::Long;
-use DNA;
-use ReadUtil;
-
-my ($HELP, $VERBOSE, $FASTAFILE);
-
-GetOptions(
-	   "help" => \$HELP,
-	   "fasta=s" => \$FASTAFILE,
-	  );
-help() if (($HELP) or (! defined $FASTAFILE));
-my $outfile = $FASTAFILE;
-$outfile =~ s/fas$/read/;
-print "Reading $FASTAFILE\nOutput to $outfile\n";
-open OUT, ">$outfile";
-my $reads = readFastaFile($FASTAFILE);
-
-foreach my $rd (@$reads) {
-  print OUT "$rd->{st} $rd->{seq}\n";
-}
-close OUT;
-
-sub help {
-  print <<END;
-usage: $0 -f file.fas [ -h -v -o]
-	reads from file.fas, outputs to file.read
-END
-  exit;
-}
diff --git a/src/scripts/mm.py.in b/src/scripts/mm.py.in
deleted file mode 100755
index dd2bacc..0000000
--- a/src/scripts/mm.py.in
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env @PYTHON@
-
-import shorah_mm
-
-################
-# WARNINGS:
-#    only works if reads are indexed from 0 (e.g., start positions)
-#    this is pretty fundamental to the code, unfortunately
-################
-
-args = shorah_mm.parse_com_line()
-shorah_mm.main(args.filename[0], args.MAXGENO, args.MAXITER,
-               args.MINOVERLAP, args.ONEPATH, args.OUTPUTGRAPH)
-#    import hotshot
-#    prof = hotshot.Profile("hotshot_edi_stats")
-#    prof.runcall(main)
-#    prof.close()
diff --git a/src/scripts/shorah.py.in b/src/scripts/shorah.py.in
deleted file mode 100755
index 960dc60..0000000
--- a/src/scripts/shorah.py.in
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/env @PYTHON@
-
-# Copyright 2007-2012
-# Niko Beerenwinkel,
-# Nicholas Eriksson,
-# Moritz Gerstung,
-# Lukas Geyrhofer,
-# Kerensa McElroy,
-# Osvaldo Zagordi,
-# ETH Zurich
-
-# This file is part of ShoRAH.
-# ShoRAH is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# ShoRAH is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-''' shorah.py is the program that performs the global reconstruction.
-    It runs error correction, global reconstruction and frequency estimation
-    by calling the relevant programs. Additionally, it performs SNV discovery
-    by calling the program snv.py
-'''
-from __future__ import division
-import os
-import os.path
-import sys
-
-import logging
-import logging.handlers
-
-# Make a global logging object.
-sholog = logging.getLogger(__name__)
-
-dn = os.path.dirname(__file__)
-
-
-def run_child(exe_name, arg_string):
-    '''use subrocess to run an external program with arguments'''
-    import subprocess
-
-    if not arg_string.startswith(' '):
-        arg_string = ' ' + arg_string
-
-    try:
-        sholog.debug("running %s" % (exe_name + arg_string))
-        retcode = subprocess.call(exe_name + arg_string, shell=True)
-        if retcode < 0:
-            sholog.error(exe_name + arg_string)
-            sholog.error("Child %s terminated by signal" % exe_name, -retcode)
-        else:
-            sholog.debug("Child %s returned %i" % (exe_name, retcode))
-    except OSError as ee:
-        sholog.error("Execution of %s failed: %s" % (exe_name, ee))
-
-    return retcode
-
-
-if __name__ == "__main__":
-
-    import argparse
-    import shutil
-
-    from Bio import SeqIO
-
-    import shorah_dec
-    import shorah_mm
-
-    # parse command line
-    parser = argparse.ArgumentParser(description="ShoRAH: Short Read Assembly into Haplotypes",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    # First define all option groups
-    group1 = parser.add_argument_group('Input files', 'Required input')
-    group2 = parser.add_argument_group('Run options',
-                                       "Parameters that can (maybe should) be changed \
-                                       according to the needs")
-    group3 = parser.add_argument_group(
-        'More options', 'Do you really want to change this?')
-
-    group1.add_argument("-b", "--bam", metavar='BAM', required=True, type=str,
-                        dest="b", help="sorted bam format alignment file")
-
-    group1.add_argument("-f", "--fasta", metavar='REF', required=True, type=str,
-                        dest="f", help="reference genome in fasta format")
-
-    group2.add_argument("-a", "--alpha", metavar='FLOAT', required=False, type=float,
-                        dest="a", default=0.1, help="alpha in dpm sampling")
-
-    group2.add_argument("-w", "--windowsize", metavar='INT', required=False, type=int,
-                        dest="w", default=201, help="window size")
-
-    group2.add_argument("-r", "--region", metavar='chrm:start-stop', required=False, type=str,
-                        dest="r", default='', help="region in format 'chr:start-stop', e.g.\
-                                                   'chrm:1000-3000'")
-
-    group2.add_argument("-S", "--seed", metavar='INT', required=False, type=int,
-                        dest="S", default=None, help="set seed for reproducible results")
-
-    group3.add_argument("-s", "--winshifts", metavar='INT', required=False, type=int,
-                        dest="s", default=3, help="number of window shifts")
-
-    group3.add_argument("-i", "--sigma", metavar='FLOAT', required=False, type=float,
-                        dest="i", default=0.01, help="value of sigma to use when calling SNVs")
-
-    group3.add_argument("-x", "--maxcov", metavar='INT', required=False, type=int,
-                        dest="x", default=10000, help="approximate max coverage allowed")
-
-    group3.add_argument("-k", "--keep_files", required=False, action="store_true", default=True,
-                        dest="k", help="keep intermediate files")
-
-    args = parser.parse_args()
-
-    # set logging level
-    sholog.setLevel(logging.DEBUG)
-    # This handler writes everything to a file.
-    LOG_FILENAME = './shorah.log'
-    h = logging.handlers.RotatingFileHandler(LOG_FILENAME, 'w',
-                                             maxBytes=100000, backupCount=5)
-    f = logging.Formatter("%(levelname)s %(asctime)s %(funcName)s\
-                          %(lineno)d %(message)s")
-    h.setFormatter(f)
-    sholog.addHandler(h)
-    sholog.info(' '.join(sys.argv))
-
-    in_stem = '.'.join(os.path.split(args.b)[1].split('.')[:-1])
-
-    # 1. run main from shorah_dec; it will also launch snv.py
-    if not os.path.exists('%s.cor.fas' % in_stem):
-        sholog.debug('running dec.py')
-        shorah_dec.main(in_bam=args.b, in_fasta=args.f, win_length=args.w,
-                        win_shifts=args.s, max_coverage=args.x, region=args.r,
-                        alpha=args.a, keep_files=args.k, seed=args.S)
-
-    # 2. copy file and run fas2read to convert from fasta
-    shutil.move('%s.cor.fas' % in_stem, '%s_cor.fas' % in_stem)
-    sholog.debug('running fas2reads')
-    my_prog = 'perl -I %s %s' % (dn + '/perllib',
-                                 os.path.join(dn, 'fas2read.pl'))
-    my_arg = " -f %s_cor.fas" % in_stem
-    assert os.path.isfile("%s_cor.fas" % in_stem), \
-        'File %s_cor.fas not found' % in_stem
-    retcode_f2r = run_child(my_prog, my_arg)
-    if retcode_f2r:
-        sholog.error('fas2read did not return 0')
-        sys.exit('Something went wrong in fas2read')
-    else:
-        sholog.debug('fas2read exited successfully')
-
-    # 3. run contain to eliminate redundant reads
-    # output: filein.rest
-    sholog.debug('running contain')
-    my_prog = os.path.join(dn, 'contain')
-    my_arg = " -f %s_cor" % in_stem
-    assert os.path.isfile('%s_cor.fas' % in_stem), \
-        'File %s_cor.fas not found' % in_stem
-    retcode_c = run_child(my_prog, my_arg)
-    if retcode_c:
-        sholog.error('contain did not return 0')
-        sys.exit('Something went wrong while running contain')
-    else:
-        sholog.debug('contain exited successfully')
-
-    # 4. run main from mm.py to find minimal matching
-    sholog.debug('running mm.py')
-    shorah_mm.main('%s_cor.rest' % in_stem, maxhaplo=200)
-
-    # 5. run EM freqEst output: sample.$nr.popl
-    sholog.debug('running freqEst')
-    my_prog = os.path.join(dn, 'freqEst')
-    my_arg = " -f %s_cor" % in_stem
-    assert os.path.isfile('%s_cor.rest' % in_stem), \
-        'File %s_cor.rest not found' % in_stem
-    retcode_em = run_child(my_prog, my_arg)
-    if retcode_em:
-        sholog.error('freqEst did not return 0')
-        sys.exit('Something went wrong in the EM step')
-    else:
-        sholog.debug('freqEst exited successfully')
-
-    # 6. copy high frequency haps from .popl to global_haps.fasta
-    n_rest = len(list(open('%s_cor.rest' % in_stem)))
-    min_freq = 1.0 / n_rest
-    g_recs = []
-    for s in SeqIO.parse('%s_cor.popl' % in_stem, 'fasta'):
-        f_here = float(s.id.split('_')[1])
-        if f_here >= min_freq:
-            s.description = '| global_haplotype frequency=%f\n' % f_here
-            g_recs.append(s)
-    SeqIO.write(g_recs, '%s_global_haps.fasta' % in_stem, 'fasta')
diff --git a/src/scripts/snv.py.in b/src/scripts/snv.py.in
deleted file mode 100755
index 7fbdbed..0000000
--- a/src/scripts/snv.py.in
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env @PYTHON@
-
-import argparse
-import shorah_snv
-
-# parse command line
-parser = argparse.ArgumentParser(description="Single nucleotide variant calling",
-                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-# set the defaults (see http://bit.ly/2hCTQl)
-opts = shorah_snv.main.__defaults__
-
-requiredNamed = parser.add_argument_group('required named arguments')
-
-requiredNamed.add_argument("-r", "--ref", metavar='REF', type=str, required=True,
-                           dest="reference", help="reference file")
-
-requiredNamed.add_argument("-b", "--bam", metavar='BAM', type=str, required=True,
-                           dest="bam_file", help="sorted bam format alignment file")
-
-parser.add_argument("-s", "--sigma", metavar='FLOAT', default=opts[0], type=float,
-                    required=False, dest="sigma", help="value of sigma to use when calling\
-                                                        SNVs")
-
-parser.add_argument("-i", "--increment", metavar='INT', default=opts[1], type=int,
-                    required=False, dest="increment",
-                    help="value of increment to use when calling \
-                          SNVs (1 used by amplian.py)")
-
-parser.add_argument("-x", "--maxcov", metavar='INT', default=opts[2], type=int,
-                    dest="max_coverage", help="Maximum coverage allowed")
-
-args = parser.parse_args()
-
-shorah_snv.main(args.reference, args.bam_file, args.sigma,
-                args.increment, args.max_coverage)
diff --git a/src/shorah/__main__.py b/src/shorah/__main__.py
new file mode 100644
index 0000000..42e6c65
--- /dev/null
+++ b/src/shorah/__main__.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+"""Entrypoint module, in case you use `python -mshorah`.
+
+Why does this file exist, and why __main__? For more info, read:
+
+- https://www.python.org/dev/peps/pep-0338/
+- https://docs.python.org/2/using/cmdline.html#cmdoption-m
+- https://docs.python.org/3/using/cmdline.html#cmdoption-m
+"""
+from shorah.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/src/scripts/amplian.py.in b/src/shorah/amplicon.py
old mode 100755
new mode 100644
similarity index 67%
rename from src/scripts/amplian.py.in
rename to src/shorah/amplicon.py
index 81e1365..aef34e5
--- a/src/scripts/amplian.py.in
+++ b/src/shorah/amplicon.py
@@ -1,6 +1,6 @@
-#!/usr/bin/env @PYTHON@
+#!/usr/bin/env python3
 
-# Copyright 2007-2012
+# Copyright 2007-2018
 # Niko Beerenwinkel,
 # Nicholas Eriksson,
 # Moritz Gerstung,
@@ -32,20 +32,30 @@
 import os
 import os.path
 import sys
+import shlex
 
 import logging
 import logging.handlers
+from pkg_resources import resource_filename
 
 from Bio import SeqIO
 
-# Make a global logging object.
-amplog = logging.getLogger(__name__)
+dn_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if __name__ == '__main__':
+    if __package__ is None:
+        os.sys.path.insert(1, dn_dir)
+        mod = __import__('shorah')
+        sys.modules["shorah"] = mod
+        import shorah_snv
+else:
+    from . import shorah_snv
+
+diri_exe = resource_filename(__name__, 'bin/diri_sampler')
+b2w_exe = resource_filename(__name__, 'bin/b2w')
 
 win_min_ext = 0.95
 cutoff_depth = 10000
 
-dn = os.path.dirname(__file__)
-
 
 def run_child(exe_name, arg_string):
     '''use subrocess to run an external program with arguments'''
@@ -54,17 +64,17 @@ def run_child(exe_name, arg_string):
     if not arg_string.startswith(' '):
         arg_string = ' ' + arg_string
 
-    amplog.debug(exe_name + arg_string)
+    logging.debug(exe_name + arg_string)
 
     try:
         retcode = subprocess.call(exe_name + arg_string, shell=True)
         if retcode > 0:
-            amplog.error(exe_name + arg_string)
-            amplog.error("Child %s terminated by signal" % exe_name, retcode)
+            logging.error(exe_name + arg_string)
+            logging.error("Child %s terminated by signal %s", exe_name, retcode)
         else:
-            amplog.debug("Child %s returned %i" % (exe_name, retcode))
+            logging.debug("Child %s returned %i", exe_name, retcode)
     except OSError as ee:
-        amplog.error("Execution of %s failed:" % exe_name, ee)
+        logging.error("Execution of %s failed: %s", exe_name, ee)
 
     return retcode
 
@@ -103,15 +113,15 @@ def run_diagnostics(window_file, reads):
             untouched.append(unt)
             theta.append(the)
             gamma.append(gam)
-            del(it)
+            del it
 
-    amplog.info('sample has %d reads' % reads)
+    logging.info('sample has %d reads', reads)
     untouched_hst = untouched[-2000:]
     unt_mean = sum(untouched_hst) / len(untouched_hst)
     unt_ratio = 100 * unt_mean / q
     unt_msg = '%3.1f %% of untouched objects <should be around 90-95%%>' % \
         unt_ratio
-    amplog.info(unt_msg)
+    logging.info(unt_msg)
     if unt_ratio < 90.0:
         warnings.warn(unt_msg)
 
@@ -123,7 +133,7 @@ def matchremove(matchobj):
         c = int(match[1:])
     else:
         c = 0
-    del(c)
+    del c
     return ''
 
 
@@ -145,7 +155,7 @@ def plot_entropy(pos_ent, pos_coords, ave_ent, win_coords):
     try:
         import matplotlib.pyplot as plt
     except ImportError:
-        amplog.error('could not import matplotlib, no pdf produced')
+        logging.error('could not import matplotlib, no pdf produced')
         return
     high_start, high_stop = win_coords
 
@@ -173,7 +183,7 @@ def plot_entropy(pos_ent, pos_coords, ave_ent, win_coords):
 
     plt.title('chosen entropy window is %d-%d' % (high_start, high_stop))
     plt.savefig('entropy.pdf')
-
+    del fig
 
 def highest_entropy(bam_file, fasta_file, ent_sel='relative'):
     '''Parse reads to have their length distribution and compute the
@@ -189,13 +199,13 @@ def highest_entropy(bam_file, fasta_file, ent_sel='relative'):
     os.remove('rl.txt')
     read_len = sorted(read_len)
     n_reads = len(read_len)
-    amplog.info('n_reads: %d' % n_reads)
+    logging.info('n_reads: %d', n_reads)
     # max_len = max(read_len)
     trimmed_mean = sum([read_len[i] for i in range(int(0.1 * n_reads),
                                                    int(0.9 * n_reads))])
     trimmed_mean /= (0.8 * n_reads)
     trimmed_mean = int(round(trimmed_mean, 0))
-    amplog.info('trimmed_mean: %d' % trimmed_mean)
+    logging.info('trimmed_mean: %d', trimmed_mean)
     # Build the mpileup and compute the entropy per position
     ref_seq = list(SeqIO.parse(fasta_file, 'fasta'))[0]
     entropy = [None] * (len(ref_seq) + 1)
@@ -232,7 +242,7 @@ def highest_entropy(bam_file, fasta_file, ent_sel='relative'):
             stop = i
             break
         stop = i
-    amplog.info('start: %d, stop: %d' % (start, stop))
+    logging.info('start: %d, stop: %d', start, stop)
 
     # mean entropy
     ent_mean = [None] * (len(ref_seq) + 1)
@@ -246,7 +256,7 @@ def highest_entropy(bam_file, fasta_file, ent_sel='relative'):
         if entropy[i] > max_ent_per_pos:
             max_ent_per_pos = entropy[i]
             highest_ent_pos = i
-    amplog.info('highest entropy found at position %d' % highest_ent_pos)
+    logging.info('highest entropy found at position %d', highest_ent_pos)
 
     # the window is chosen as the absolute max mean_entropy or as the
     # max mean entropy covering the position with max entropy
@@ -293,26 +303,24 @@ def highest_entropy(bam_file, fasta_file, ent_sel='relative'):
     return high_ent_start, high_ent_stop
 
 
-def main(in_bam, in_fasta, min_overlap=0.95, max_coverage=50000,
-         alpha=0.5, s=0.01, region='', diversity=False):
+#def main(in_bam, in_fasta, min_overlap=0.95, max_coverage=50000,
+#         alpha=0.5, s=0.01, region='', diversity=False):
+def main(args):
     '''
     Performs the amplicon analysis, running diri_sampler
     and analyzing the result
     '''
-
-    import shorah_snv
-
-    # set logging level
-    amplog.setLevel(logging.DEBUG)
-    # This handler writes everything to a file.
-    LOG_FILENAME = './amplian.log'
-    hl = logging.handlers.RotatingFileHandler(LOG_FILENAME, 'w',
-                                              maxBytes=100000, backupCount=5)
-    f = logging.Formatter("%(levelname)s %(asctime)s %(funcName)s\
-                          %(lineno)d %(message)s")
-    hl.setFormatter(f)
-    amplog.addHandler(hl)
-    amplog.info(' '.join(sys.argv))
+    in_bam = args.b
+    in_fasta = args.f
+    region = args.r
+    max_coverage = args.max_coverage
+    alpha = args.a
+    seed = args.seed
+    sigma = args.sigma
+    diversity = args.diversity
+    min_overlap = args.min_overlap
+
+    logging.info(' '.join(sys.argv))
     # info on reference and region if given, or discover high entropy one
     ref_seq = list(SeqIO.parse(in_fasta, 'fasta'))[0]
     ref_name = ref_seq.id
@@ -329,101 +337,29 @@ def main(in_bam, in_fasta, min_overlap=0.95, max_coverage=50000,
         ref_length = len(ref_seq)
         reg_stop = ref_length
 
-    amplog.info('analysing region from %d to %d' % (reg_start, reg_stop))
+    logging.info('analysing region from %d to %d', reg_start, reg_stop)
 
     # output the reads, aligned to the amplicon
-    b2w_exe = os.path.join(dn, 'b2w')
+
     b2w_args = ' -i 0 -w %d -m %d -x %d %s %s %s' % \
         (ref_length, int(min_overlap * ref_length),
          max_coverage, in_bam, in_fasta, region)
-    ret_b2w = run_child(b2w_exe, b2w_args)
-    amplog.debug('b2w returned %d' % ret_b2w)
+    ret_b2w = run_child(shlex.quote(b2w_exe), b2w_args)
+    logging.debug('b2w returned %d', ret_b2w)
 
     # run diri_sampler on the aligned reads
     win_file = 'w-%s-%d-%d.reads.fas' % (ref_name, reg_start, reg_stop)
     h = list(open('coverage.txt'))[0]
     n_reads = int(h.split()[-1])
     assert os.path.exists(win_file), 'window file %s not found' % win_file
-    diri_exe = os.path.join(dn, 'diri_sampler')
+
     iterations = min(30000, n_reads * 20)
     diri_args = '-i %s -j %d -a %f -t 2000' % (win_file, iterations, alpha)
-    ret_diri = run_child(diri_exe, diri_args)
-    amplog.debug('diri_sampler returned %d' % ret_diri)
+    ret_diri = run_child(shlex.quote(diri_exe), diri_args)
+    logging.debug('diri_sampler returned %d', ret_diri)
 
     # diagnostics on the convergence
     run_diagnostics(win_file, n_reads)
 
     # run snv.py to parse single nucleotide variants
-    shorah_snv.main(reference=in_fasta, bam_file=in_bam, sigma=s, increment=1)
-
-
-if __name__ == "__main__":
-
-    import argparse
-    # parse command line
-    parser = argparse.ArgumentParser(description="Local haplotype reconstruction - amplicon mode",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    opts = main.__defaults__  # set the defaults (see http://bit.ly/2hCTQl)
-
-    # First define all option groups
-    group1 = parser.add_argument_group("Input files", "Required input")
-
-    group2 = parser.add_argument_group("Type of run",
-                                       "You can specify a region, or look for the highest diversity\
-                                        region")
-    group3 = parser.add_argument_group("Run options", "Fine tuning")
-    group4 = parser.add_argument_group("More options",
-                                       "Do you really want to change this?")
-
-    group1.add_argument("-b", "--bam", metavar='BAM', required=True, type=str,
-                        dest="in_bam", help="file with aligned reads in .bam format")
-
-    group1.add_argument("-f", "--fasta", metavar='REF', required=True, type=str,
-                        dest="in_fasta", help="reference genome in fasta format")
-
-    group2.add_argument("-r", "--region", metavar='chrm:start-stop', default=opts[4], type=str,
-                        dest="region", help="region in format 'chrm:start-stop' e.g.\
-                        'ch3:1000-1300'")
-
-    group2.add_argument("-d", "--diversity", action="store_true", default=opts[5],
-                        dest="diversity", help="if set, automatically detects the highest entropy\
-                        region and runs there")
-
-    group3.add_argument("-m", "--min_overlap", metavar='FLOAT', default=opts[0], type=float,
-                        dest="min_overlap", help="fraction of read overlap to be included")
-
-    group3.add_argument("-a", "--alpha", metavar='FLOAT', default=opts[2], type=float,
-                        dest="alpha", help="alpha in dpm sampling")
-
-    group4.add_argument("-x", "--maxcov", metavar='INT', default=opts[1], type=int,
-                        dest="max_coverage", help="approximate max coverage allowed")
-
-    group4.add_argument("-s", "--sigma", metavar='FLOAT', default=opts[3], type=float,
-                        dest="s", help="sigma value to use when calling SNVs")
-
-    args = parser.parse_args()
-
-    supported_formats = {
-        'bam': 'aligned reads',
-        'fasta': 'reference genome'
-    }
-    # check the input file is in supported format
-    try:
-        tmp_filename = os.path.split(args.in_bam)[1]
-        [in_stem, in_format] = [tmp_filename.split('.')[0],
-                                tmp_filename.split('.')[-1]]
-        t = supported_formats[in_format]
-    except IndexError:
-        print('The input file must be filestem.format')
-        print('Supported formats are')
-        for sf in supported_formats.items():
-            print(sf[0], ':', sf[1])
-        sys.exit()
-    except KeyError:
-        print('usage: amplian.py -b bam_file -f fasta_reference')
-        sys.exit('Please run with -h for all options')
-    if args.diversity and args.region != '':
-        sys.exit('Either detect the highest entropy region, or specify one')
-
-    main(args.in_bam, args.in_fasta, args.min_overlap, args.max_coverage, args.alpha,
-         args.s, args.region, args.diversity)
+    shorah_snv.main(reference=in_fasta, bam_file=in_bam, sigma=sigma, increment=1)
diff --git a/src/shorah/bin/.gitignore b/src/shorah/bin/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/src/shorah/cli.py b/src/shorah/cli.py
new file mode 100644
index 0000000..f2ab9a4
--- /dev/null
+++ b/src/shorah/cli.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+
+# Copyright 2007-2018
+# Niko Beerenwinkel,
+# Nicholas Eriksson,
+# Moritz Gerstung,
+# Lukas Geyrhofer,
+# Kerensa McElroy,
+# Osvaldo Zagordi,
+# ETH Zurich
+
+# This file is part of ShoRAH.
+# ShoRAH is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# ShoRAH is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Module that contains the command line app.
+
+Why does this file exist, and why not put this in __main__?
+
+  You might be tempted to import things from __main__ later, but that will cause
+  problems: the code will get executed twice:
+
+  - When you run `python -mminvar` python will execute
+    ``__main__.py`` as a script. That means there won't be any
+    ``minvar.__main__`` in ``sys.modules``.
+  - When you import __main__ it will get executed again (as a module) because
+    there's no ``minvar.__main__`` in ``sys.modules``.
+
+  Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
+"""
+import os
+import sys
+
+from pkg_resources import (get_distribution, DistributionNotFound)
+
+try:
+    __version__ = get_distribution('shorah').version
+except DistributionNotFound:
+    # probably installed using Autotools
+    with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '.version'), 'r') as version_file:
+        __version__ = version_file.read()
+
+# manipulate path to import functions
+parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if __name__ == '__main__':
+    if __package__ is None:
+        os.sys.path.insert(1, parent_dir)
+        mod = __import__('shorah')
+        sys.modules["shorah"] = mod
+        #from common import hcv_map, hiv_map, org_dict, wobbles
+        #from stats import (genome_coverage, start_stop_coverage)
+#else:
+    #from .common import hcv_map, hiv_map, org_dict, wobbles
+    #from .stats import (genome_coverage, start_stop_coverage)
+
+# each subcommand takes one of these functions as default
+
+def shotgun_run(args):
+    """Default function for command line parser."""
+    from shorah import shotgun
+    shotgun.main(args)
+
+def amplicon_run(args):
+    """Default function for command line parser."""
+    from shorah import amplicon
+    amplicon.main(args)
+
+def main():
+    """Parse command line, run default functions."""
+    import argparse
+    # parse command line
+    # create the top-level parser
+    parser = argparse.ArgumentParser(usage='%(prog)s <command> [options]',
+                                     epilog="Run `shorah subcommand -h` for more help")
+    parser.add_argument('-v', '--version', action='version', version=__version__)
+
+    parser.add_argument("-b", "--bam", metavar='BAM', required=True, type=str,
+                        dest="b", help="sorted bam format alignment file")
+
+    parser.add_argument("-f", "--fasta", metavar='REF', required=True, type=str,
+                        dest="f", help="reference genome in fasta format")
+
+    parser.add_argument("-a", "--alpha", metavar='FLOAT', required=False, type=float,
+                        dest="a", default=0.1, help="alpha in dpm sampling")
+
+    parser.add_argument("-r", "--region", metavar='chrm:start-stop', required=False, type=str,
+                        dest="r", default='', help="region in format 'chr:start-stop', e.g. 'chrm:1000-3000'")
+
+    parser.add_argument("-R", "--seed", metavar='INT', required=False, type=int,
+                        dest="seed", default=None, help="set seed for reproducible results")
+
+    parser.add_argument("-x", "--maxcov", metavar='INT', required=False, type=int,
+                        default=10000, dest="max_coverage", help="approximate max coverage allowed")
+
+    parser.add_argument("-S", "--sigma", metavar='FLOAT', default=0.01, type=float,
+                        dest="sigma", help="sigma value to use when calling SNVs")
+
+    subparsers = parser.add_subparsers(help='available sub-commands')
+
+    # create the parser for command "shotgun"
+    parser_shotgun = subparsers.add_parser('shotgun', help='run local analysis in shotgun mode')
+
+    parser_shotgun.add_argument("-w", "--windowsize", metavar='INT', required=False, type=int,
+                                dest="w", default=201, help="window size")
+
+    parser_shotgun.add_argument("-s", "--winshifts", metavar='INT', required=False, type=int,
+                                default=3, dest="win_shifts", help="number of window shifts")
+
+    parser_shotgun.add_argument("-k", "--keep_files", required=False, action='store_true',
+                                default=True, dest="keep_files", help="keep all intermediate files")
+
+    parser_shotgun.set_defaults(func=shotgun_run)
+
+    parser_amplicon = subparsers.add_parser('amplicon', help='run local analysis in amplicon mode')
+
+    parser_amplicon.add_argument("-d", "--diversity", action="store_true", default=False,
+                                 dest="diversity", help="detect the highest entropy region and run there")
+
+    parser_amplicon.add_argument("-m", "--min_overlap", metavar='FLOAT', default=0.95, type=float,
+                                 dest="min_overlap", help="fraction of read overlap to be included")
+
+    parser_amplicon.set_defaults(func=amplicon_run)
+
+    # exit so that log file is not written
+    if len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help':
+        parser.print_help()
+        sys.exit()
+
+    # logging configuration
+    import logging
+    import logging.handlers
+    logging.basicConfig(filename='shorah.log', level=logging.DEBUG,
+                        format='%(levelname)s %(asctime)s %(filename)s: %(funcName)s() %(lineno)d: \t%(message)s',
+                        datefmt='%Y/%m/%d %H:%M:%S')
+
+    logging.info(' '.join(sys.argv))
+    logging.info('shorah version:%s', __version__)
+    # parse the args
+    args = parser.parse_args()
+    args.func(args)
+
+if __name__ == "__main__":  # and __package__ is None:
+    main()
diff --git a/src/shorah/contain.cpp b/src/shorah/contain.cpp
deleted file mode 100644
index 219d0f8..0000000
--- a/src/shorah/contain.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * contain.cpp -- eliminates redundant reads
- *
- * usage: contain -f basename
- * input: basename.read
- * output: basename.rest
- * both in the format "start_pos read_string"
- *
- # Copyright 2007-2012
- # Niko Beerenwinkel,
- # Nicholas Eriksson,
- # Moritz Gerstung,
- # Lukas Geyrhofer,
- # Kerensa McElroy
- # Osvaldo Zagordi,
- # ETH Zurich
-
- # This file is part of ShoRAH.
- # ShoRAH is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
-
- # ShoRAH is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <unistd.h>
-#include <cmath>
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <map>
-#include <vector>
-
-struct Read
-{
-    unsigned int pos;
-    unsigned int len;
-    unsigned int end;
-    std::string seq;
-};
-
-void help(const int exit_code = EXIT_FAILURE)
-{
-    std::cout << "Usage: contain -f basename\n";
-    std::cout << "Expects basename.read, outputs basename.rest\n";
-    exit(exit_code);
-}
-
-void getReads(std::istream& infile, std::vector<Read>& ans)
-{
-    /*
-     * get the reads from a file/stdin
-     * FIXME: this should be much more robust
-       FIXME: die if the file isn't readable!!!
-     */
-
-    Read r;
-    std::string t;
-    std::string tmp;
-    std::string throwaway;
-
-    // line of file is "pos seq"
-    while (!std::getline(infile, tmp, ' ').eof()) {
-        if (tmp[0] == '#') {
-            std::getline(infile, throwaway);
-            std::cerr << "Comment : " << tmp << " " << throwaway << std::endl;
-            continue;
-        }
-        r.pos = atoi(tmp.c_str());
-        if ((r.pos < 0)) {
-            std::cerr << "Error: " << r.pos << " is an invalid start position\n";
-            exit(1);
-        }
-
-        std::getline(infile, r.seq);
-        r.len = r.seq.length();
-        r.end = r.pos + r.len - 1;
-
-        if (r.len > 0) ans.push_back(r);
-    }
-}
-
-int contain(std::vector<Read>& reads, int i, int j)
-{
-    // return i if read[i] contained in read[j]
-    //        j if read[j] ............ read[i]
-    //        -1 else
-    //
-    int tmp, overlap;
-
-    if ((reads[i].pos > reads[j].end) || (reads[j].pos > reads[i].end)) {
-        // std::cout << "no overlap\n";
-        return -1;
-    }
-
-    // switch so that i starts first
-    if ((reads[i].pos > reads[j].pos) ||
-        ((reads[i].pos == reads[j].pos) && (reads[i].len < reads[j].len))) {
-        tmp = j;
-        j = i;
-        i = tmp;
-    }
-    overlap = reads[i].pos + reads[i].len - reads[j].pos;
-    // std::cout << "read " << i << " at pos " << reads[i].pos << " length " << reads[i].len <<
-    // "\n";
-    // std::cout << "read " << j << " at pos " << reads[j].pos << " length " << reads[j].len <<
-    // "\n";
-    // std::cout << "overlap == " << overlap << "\n";
-    if (overlap < reads[j].len) {
-        //	std::cout << "not enough overlap\n";
-        return -1;
-    }
-    if (overlap > reads[j].len) {
-        overlap = reads[j].len;
-    }
-    if (reads[i].seq.substr(reads[j].pos - reads[i].pos, overlap) ==
-        reads[j].seq.substr(0, overlap)) {
-        //	std::cout << "Agree\n";
-        return j;
-    } else {
-        //	std::cout << "Disagree\n";
-        return -1;
-    }
-}
-
-int main(int argc, char** argv)
-{
-    int c;
-    std::vector<Read> reads;
-    std::string basename;
-    std::string filename;
-    std::string outfilename;
-    int i, j;
-
-    /* parse options
-     * -f file (expect file.read file.geno)
-     * -h                           (help)
-     */
-    while (1) {
-        c = getopt(argc, argv, "f:h?");
-        if (c == -1) break;
-        switch (c) {
-            case 'f':
-                basename = optarg;
-                break;
-            case '?':
-                help();
-                break;
-            case 'h':
-                help(EXIT_SUCCESS);
-                break;
-            default:
-                help();
-        }
-    }
-    if (basename == "") {
-        help();
-    }
-    filename = basename + ".read";
-    outfilename = basename + ".rest";
-
-    std::ifstream infile(filename.c_str());
-    std::ofstream outfile(outfilename.c_str());
-    std::map<int, int> delReads;
-
-    // pull all reads into file
-    getReads(infile, reads);
-    int idx;
-    int finalReads = 0;
-    int numDeleted = 0;
-    std::cout << "readfile read" << std::endl;
-    // do double loop over all reads.
-    // contain returns the index to delete: either i, j, or -1
-    // should be able to speed this up by skipping i or j if
-    // delReads[i] or j is 1
-    // but have to be careful
-    //
-    for (i = 0; i < reads.size(); ++i) {
-        // if reads[i] already deleted, then we will have already
-        // deleted everything reads[i] makes redundant
-        if (delReads[i]) {
-            continue;
-        }
-        if (i % 10 == 0) std::cout << "finished " << i << " reads, " << numDeleted << " deleted\n";
-
-        for (j = i + 1; j < reads.size(); ++j) {
-            if (delReads[j]) continue;
-            idx = contain(reads, i, j);
-
-            if (idx == i) {
-                delReads[idx] = 1;
-                // skip to next iteration of i loop
-                j = reads.size();
-                numDeleted++;
-            } else if (idx == j) {
-                delReads[idx] = 1;
-                numDeleted++;
-            }
-            /*
-            if (idx != -1) {
-                std::cout << "del " << idx << " (from " << i << " " << j  << ")\n";
-            }
-            */
-        }
-    }
-    for (i = 0; i < reads.size(); ++i) {
-        if (delReads[i] != 1) {
-            finalReads++;
-            outfile << reads[i].pos << " " << reads[i].seq << "\n";
-        }
-    }
-    std::cout << "Started with " << reads.size() << " reads.\n";
-    std::cout << "Ended with " << finalReads << " reads.\n";
-}
diff --git a/src/shorah/doxyfile b/src/shorah/doxyfile
deleted file mode 100755
index 7ed8af0..0000000
--- a/src/shorah/doxyfile
+++ /dev/null
@@ -1,257 +0,0 @@
-# Doxyfile 1.6.1
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-DOXYFILE_ENCODING      = UTF-8
-PROJECT_NAME           = SHORAH-DPM
-PROJECT_NUMBER         = 0.3a
-OUTPUT_DIRECTORY       = /home/barnab/Work/doc/
-CREATE_SUBDIRS         = NO
-OUTPUT_LANGUAGE        = English
-BRIEF_MEMBER_DESC      = YES
-REPEAT_BRIEF           = YES
-ABBREVIATE_BRIEF       =
-ALWAYS_DETAILED_SEC    = NO
-INLINE_INHERITED_MEMB  = NO
-FULL_PATH_NAMES        = YES
-STRIP_FROM_PATH        =
-STRIP_FROM_INC_PATH    =
-SHORT_NAMES            = NO
-JAVADOC_AUTOBRIEF      = NO
-QT_AUTOBRIEF           = NO
-MULTILINE_CPP_IS_BRIEF = NO
-INHERIT_DOCS           = YES
-SEPARATE_MEMBER_PAGES  = NO
-TAB_SIZE               = 8
-ALIASES                =
-OPTIMIZE_OUTPUT_FOR_C  = NO
-OPTIMIZE_OUTPUT_JAVA   = NO
-OPTIMIZE_FOR_FORTRAN   = NO
-OPTIMIZE_OUTPUT_VHDL   = NO
-EXTENSION_MAPPING      =
-BUILTIN_STL_SUPPORT    = NO
-CPP_CLI_SUPPORT        = NO
-SIP_SUPPORT            = NO
-IDL_PROPERTY_SUPPORT   = YES
-DISTRIBUTE_GROUP_DOC   = NO
-SUBGROUPING            = YES
-TYPEDEF_HIDES_STRUCT   = NO
-SYMBOL_CACHE_SIZE      = 0
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-EXTRACT_ALL            = YES 
-EXTRACT_PRIVATE        = NO
-EXTRACT_STATIC         = NO
-EXTRACT_LOCAL_CLASSES  = YES
-EXTRACT_LOCAL_METHODS  = NO
-EXTRACT_ANON_NSPACES   = NO
-HIDE_UNDOC_MEMBERS     = NO
-HIDE_UNDOC_CLASSES     = NO
-HIDE_FRIEND_COMPOUNDS  = NO
-HIDE_IN_BODY_DOCS      = NO
-INTERNAL_DOCS          = NO
-CASE_SENSE_NAMES       = NO
-HIDE_SCOPE_NAMES       = NO
-SHOW_INCLUDE_FILES     = YES
-INLINE_INFO            = YES
-SORT_MEMBER_DOCS       = YES
-SORT_BRIEF_DOCS        = NO
-SORT_MEMBERS_CTORS_1ST = NO
-SORT_GROUP_NAMES       = NO
-SORT_BY_SCOPE_NAME     = NO
-GENERATE_TODOLIST      = YES
-GENERATE_TESTLIST      = YES
-GENERATE_BUGLIST       = YES
-GENERATE_DEPRECATEDLIST= YES
-ENABLED_SECTIONS       =
-MAX_INITIALIZER_LINES  = 30
-SHOW_USED_FILES        = YES
-SHOW_DIRECTORIES       = NO
-SHOW_FILES             = YES
-SHOW_NAMESPACES        = YES
-FILE_VERSION_FILTER    =
-LAYOUT_FILE            =
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-QUIET                  = NO
-WARNINGS               = YES
-WARN_IF_UNDOCUMENTED   = YES
-WARN_IF_DOC_ERROR      = YES
-WARN_NO_PARAMDOC       = NO
-WARN_FORMAT            = "$file:$line: $text"
-WARN_LOGFILE           =
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-INPUT                  = ./dpm_src
-INPUT_ENCODING         = UTF-8
-FILE_PATTERNS          = *.cpp *.h
-RECURSIVE              = NO
-EXCLUDE                =
-EXCLUDE_SYMLINKS       = NO
-EXCLUDE_PATTERNS       =
-EXCLUDE_SYMBOLS        =
-EXAMPLE_PATH           =
-EXAMPLE_PATTERNS       =
-EXAMPLE_RECURSIVE      = NO
-IMAGE_PATH             =
-INPUT_FILTER           =
-FILTER_PATTERNS        =
-FILTER_SOURCE_FILES    = NO
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-SOURCE_BROWSER         = NO
-INLINE_SOURCES         = NO
-STRIP_CODE_COMMENTS    = YES
-REFERENCED_BY_RELATION = NO
-REFERENCES_RELATION    = NO
-REFERENCES_LINK_SOURCE = YES
-USE_HTAGS              = NO
-VERBATIM_HEADERS       = YES
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-ALPHABETICAL_INDEX     = NO
-COLS_IN_ALPHA_INDEX    = 5
-IGNORE_PREFIX          =
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-GENERATE_HTML          = YES
-HTML_OUTPUT            = html
-HTML_FILE_EXTENSION    = .html
-HTML_HEADER            =
-HTML_FOOTER            =
-HTML_STYLESHEET        =
-HTML_ALIGN_MEMBERS     = YES
-HTML_DYNAMIC_SECTIONS  = NO
-GENERATE_DOCSET        = NO
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-GENERATE_HTMLHELP      = NO
-CHM_FILE               =
-HHC_LOCATION           =
-GENERATE_CHI           = NO
-CHM_INDEX_ENCODING     =
-BINARY_TOC             = NO
-TOC_EXPAND             = NO
-GENERATE_QHP           = NO
-QCH_FILE               =
-QHP_NAMESPACE          =
-QHP_VIRTUAL_FOLDER     = doc
-QHP_CUST_FILTER_NAME   =
-QHP_CUST_FILTER_ATTRS  =
-QHP_SECT_FILTER_ATTRS  =
-QHG_LOCATION           =
-DISABLE_INDEX          = NO
-ENUM_VALUES_PER_LINE   = 4
-GENERATE_TREEVIEW      = NO
-USE_INLINE_TREES       = NO
-TREEVIEW_WIDTH         = 250
-FORMULA_FONTSIZE       = 10
-SEARCHENGINE           = YES
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-GENERATE_LATEX         = YES
-LATEX_OUTPUT           = latex
-LATEX_CMD_NAME         = latex
-MAKEINDEX_CMD_NAME     = makeindex
-COMPACT_LATEX          = NO
-PAPER_TYPE             = a4wide
-EXTRA_PACKAGES         =
-LATEX_HEADER           =
-PDF_HYPERLINKS         = YES
-USE_PDFLATEX           = YES
-LATEX_BATCHMODE        = NO
-LATEX_HIDE_INDICES     = NO
-LATEX_SOURCE_CODE      = NO
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-GENERATE_RTF           = NO
-RTF_OUTPUT             = rtf
-COMPACT_RTF            = NO
-RTF_HYPERLINKS         = NO
-RTF_STYLESHEET_FILE    =
-RTF_EXTENSIONS_FILE    =
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-GENERATE_MAN           = NO
-MAN_OUTPUT             = man
-MAN_EXTENSION          = .3
-MAN_LINKS              = NO
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-GENERATE_XML           = NO
-XML_OUTPUT             = xml
-XML_SCHEMA             =
-XML_DTD                =
-XML_PROGRAMLISTING     = YES
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-GENERATE_AUTOGEN_DEF   = NO
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-GENERATE_PERLMOD       = NO
-PERLMOD_LATEX          = NO
-PERLMOD_PRETTY         = YES
-PERLMOD_MAKEVAR_PREFIX =
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-ENABLE_PREPROCESSING   = YES
-MACRO_EXPANSION        = NO
-EXPAND_ONLY_PREDEF     = NO
-SEARCH_INCLUDES        = YES
-INCLUDE_PATH           =
-INCLUDE_FILE_PATTERNS  =
-PREDEFINED             =
-EXPAND_AS_DEFINED      =
-SKIP_FUNCTION_MACROS   = YES
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-TAGFILES               =
-GENERATE_TAGFILE       =
-ALLEXTERNALS           = NO
-EXTERNAL_GROUPS        = YES
-PERL_PATH              = /usr/bin/perl
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-CLASS_DIAGRAMS         = YES
-MSCGEN_PATH            =
-HIDE_UNDOC_RELATIONS   = YES
-HAVE_DOT               = YES 
-DOT_FONTNAME           = FreeSans
-DOT_FONTSIZE           = 10
-DOT_FONTPATH           =
-CLASS_GRAPH            = YES
-COLLABORATION_GRAPH    = YES
-GROUP_GRAPHS           = YES
-UML_LOOK               = NO
-TEMPLATE_RELATIONS     = NO
-INCLUDE_GRAPH          = YES
-INCLUDED_BY_GRAPH      = YES
-CALL_GRAPH             = NO
-CALLER_GRAPH           = NO
-GRAPHICAL_HIERARCHY    = YES
-DIRECTORY_GRAPH        = YES
-DOT_IMAGE_FORMAT       = png
-DOT_PATH               =
-DOTFILE_DIRS           =
-DOT_GRAPH_MAX_NODES    = 50
-MAX_DOT_GRAPH_DEPTH    = 0
-DOT_TRANSPARENT        = NO
-DOT_MULTI_TARGETS      = NO
-GENERATE_LEGEND        = YES
-DOT_CLEANUP            = YES
diff --git a/src/shorah/freqEst.cpp b/src/shorah/freqEst.cpp
deleted file mode 100644
index f1bd291..0000000
--- a/src/shorah/freqEst.cpp
+++ /dev/null
@@ -1,548 +0,0 @@
-/*  freqEst.cpp - runs the EM algorithm to estimate haplotype frequencies
- *  given reads and reconstructed haplotypes
-
- # Copyright 2007, 2008, 2009
- # Niko Beerenwinkel,
- # Nicholas Eriksson,
- # Moritz Gerstung,
- # Lukas Geyrhofer,
- # Osvaldo Zagordi,
- # ETH Zurich
-
- # This file is part of ShoRAH.
- # ShoRAH is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
-
- # ShoRAH is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with ShoRAH.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <unistd.h>
-#include <algorithm>
-#include <cmath>
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <map>
-#include <vector>
-
-struct Read
-{
-    unsigned int pos;
-    unsigned int len;
-    std::string seq;
-};
-
-// needed to sort, by Osvaldo
-typedef struct comp_class
-{
-    std::string* hap;
-    double freq;
-} com_pair;
-
-bool comp_func(com_pair* pi, com_pair* pj) { return (pi->freq > pj->freq); }
-
-//#define SEEPROB
-
-int CORRECTPARAM = 1;       // which definition of pr(r | h) do we use
-int SEED = 1;               // random seed
-int ITERMAX = 5000;         // maximum iterations in EM
-double ACCURACY = .000001;  // convergence for EM
-double KILLP = 0;           // kill all probabilities below this
-int RUNS = 5;
-int N, M;
-int unmatchingReads = 0;
-
-static inline int readMatchesSomeHaplotype(Read& r, std::vector<std::string>& haplotypes)
-{
-    for (std::vector<std::string>::iterator g = haplotypes.begin(); g != haplotypes.end(); ++g)
-        if (r.seq == (*g).substr(r.pos, r.len)) return 1;
-    return 0;
-}
-
-void getReads(std::istream& infile, std::vector<Read>& ans, std::vector<std::string>& haplotypes)
-{
-    /*
-     * get the reads from a file/stdin
-     * FIXME: this should be much more robust
-     */
-
-    Read r;
-    std::string t;
-    std::string tmp;
-    std::string throwaway;
-
-    // line of file is "pos seq"
-    while (!std::getline(infile, tmp, ' ').eof()) {
-        if (tmp[0] == '#') {
-            std::getline(infile, throwaway);
-            std::cerr << "Comment : " << tmp << " " << throwaway << std::endl;
-            continue;
-        }
-        r.pos = atoi(tmp.c_str());
-        if ((r.pos < 0)) {
-            std::cerr << "Error: " << r.pos << " is an invalid start position\n";
-            exit(1);
-        }
-
-        std::getline(infile, r.seq);
-        r.len = r.seq.length();
-        // std::cout << r.pos << " " << r.seq << std::endl;
-
-        if (readMatchesSomeHaplotype(r, haplotypes)) {
-            ans.push_back(r);
-        } else {
-            unmatchingReads++;
-        }
-    }
-}
-
-void getHaplotypes(std::istream& in, std::vector<std::string>& genotypesFinal)
-{
-    /* read in the list of candidate haplotypes */
-    std::string tmp;
-    genotypesFinal.clear();
-    std::vector<std::string> genotypes;
-    // int i = 0;
-
-    // line of file is "pos seq"
-    while (!std::getline(in, tmp).eof()) {
-        if (tmp[0] == '#') {
-            std::cerr << "Comment : " << tmp << std::endl;
-            continue;
-        }
-        genotypes.push_back(tmp);
-        // std::cout << i++ << " =" << tmp << ".\n";
-    }
-    if (genotypes.size() == 0) {
-        std::cout << "Error, no haplotypes\n";
-        exit(1);
-    }
-    // now genotypes looks like
-    // PQITSLX
-    // .QITSSX
-    // PQITSS.
-    // So we build a consensus haplotype and fill in the .'s with the consensus
-    int seqLen = genotypes[0].size();
-    char consChar = ' ';
-    std::string cons;
-    for (int pos = 0; pos < seqLen; ++pos) {
-        std::map<char, int> col;
-        for (std::vector<std::string>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
-            col[(*g)[pos]]++;
-        }
-        int max = -1;
-        // don't want to fill in with a '.'
-        col['.'] = -100;
-        for (std::map<char, int>::iterator base = col.begin(); base != col.end(); ++base) {
-            if (base->second > max) {
-                consChar = base->first;
-                max = base->second;
-            }
-        }
-        cons = cons + consChar;
-    }
-    std::cout << "consensus haplotype\n" << cons << std::endl;
-
-    std::vector<int> correctionCounts(genotypes.size(), 0);
-    int tmpCount;
-    for (std::vector<std::string>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
-        tmpCount = 0;
-        for (int pos = 0; pos < seqLen; ++pos) {
-            if ((*g)[pos] == '.') {
-                tmpCount++;
-                (*g)[pos] = cons[pos];
-#ifndef NDEBUG
-                std::cout << "correcting . to " << cons[pos] << " in sequence " << *g << std::endl;
-#endif
-            }
-        }
-        correctionCounts.push_back(tmpCount);
-    }
-
-    // for(std::vector<int>::iterator a = correctionCounts.begin(); a != correctionCounts.end();
-    // ++a) {
-    //	std::cout << *a << " ";
-    //}
-    // std::cout << std::endl;
-
-    // but the genotypes might not be unique right now
-    std::map<std::string, int> uniq;
-    for (std::vector<std::string>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
-        uniq[*g]++;
-    }
-    for (std::map<std::string, int>::iterator x = uniq.begin(); x != uniq.end(); ++x) {
-#ifndef NDEBUG
-        std::cout << "Haplotype was repeated " << x->second << " times\n";
-#endif
-        genotypesFinal.push_back(x->first);
-    }
-}
-
-inline int match(std::string geno, Read r) { return (r.seq == geno.substr(r.pos, r.len)); }
-
-inline int readsEqual(Read& r, Read& s)
-{
-    if (r.pos == s.pos) {
-        if (r.seq == s.seq) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-std::vector<int> countReads(std::vector<Read>& origReads, std::vector<Read>& reads)
-{
-    // take orig reads and do
-    // %hash = (rd -> number appearing in origReads
-    // u = values(hash)
-    // reads = keys(hash)
-    //
-    int tmp;
-    for (std::vector<Read>::iterator r = origReads.begin(); r != origReads.end(); ++r) {
-        tmp = 1;
-        for (std::vector<Read>::iterator s = origReads.begin(); s != r; ++s) {
-            if (readsEqual(*r, *s)) {
-                tmp = 0;
-            }
-        }
-        if (tmp) {
-            reads.push_back(*r);
-        }
-    }
-
-    std::vector<int> u(reads.size(), 0);
-
-    // for(std::vector<Read>::iterator r = reads.begin(); r != reads.end(); ++r) {
-    for (unsigned int i = 0; i < reads.size(); ++i) {
-        for (std::vector<Read>::iterator s = origReads.begin(); s != origReads.end(); ++s) {
-            if (readsEqual(reads.at(i), *s)) {
-                u[i]++;
-            }
-        }
-    }
-#ifndef NDEBUG
-    std::cout << "Started with " << origReads.size() << " reads.\n";
-    std::cout << reads.size() << " are unique\n";
-    std::cout << "Counts:\n";
-    for (unsigned int i = 0; i < u.size(); ++i) {
-        std::cout << u[i] << " ";
-    }
-    std::cout << std::endl;
-#endif
-    return u;
-}
-
-void Estep(std::vector<double> const& p, std::vector<std::vector<double> >& U,
-           std::vector<std::vector<double> > const& Z, std::vector<int> const& u)
-{
-    // given p, fills U with expected frequencies
-    int i, j;
-    double ProbY;
-
-#ifndef NDEBUG
-    std::cout << "Estep input: " << std::endl;
-    for (i = 0; i < N; ++i) {
-        std::cout << p[i] << " ";
-    }
-    std::cout << std::endl;
-#endif
-
-    for (i = 0; i < M; ++i) {
-        ProbY = 0;
-        for (j = 0; j < N; ++j) {
-            ProbY += Z[i][j] * p[j];
-            //		std::cout << "ProbY = " << ProbY << std::endl;
-        }
-        for (j = 0; j < N; ++j) {
-            //		std::cout << "U[i][j] = " << u[i] << " " << Z[i][j] << " " << p[j] << " " <<
-            // ProbY
-            //<<
-            // std::endl;
-            U[i][j] = u[i] * ((Z[i][j] * p[j]) / ProbY);
-        }
-    }
-    //	std::cout << "Estep output: " << std::endl;
-    //	for (i = 0; i < M; ++i) {
-    //		for (j = 0; j < N; ++j) {
-    //			std::cout << U[i][j] << " ";
-    //		}
-    //		std::cout << std::endl;
-    //	}
-}
-
-void Mstep(std::vector<double>& p, std::vector<std::vector<double> > const& U)
-{
-    std::vector<double> v(N, 0);
-    double m = 0;
-    int i, j;
-
-    for (j = 0; j < N; ++j) {
-        // std::cout << "." <<  v[j] << ".\n";
-        for (i = 0; i < M; ++i) {
-            //	std::cout << U[i][j] << " \n";
-            v[j] += U[i][j];
-        }
-        m += v[j];
-    }
-
-    for (j = 0; j < N; ++j) {
-        p[j] = v[j] / m;
-    }
-
-#ifndef NDEBUG
-    for (j = 0; j < N; ++j) {
-        std::cout << p[j] << " ";
-    }
-    std::cout << std::endl;
-#endif
-}
-
-double logLike(std::vector<double>& p, std::vector<std::vector<double> > const& Z,
-               std::vector<int> const& u)
-{
-    int i, j;
-
-    double ell = 0;
-    double Prob_Y;
-    for (i = 0; i < M; i++) {
-        Prob_Y = 0;
-        for (j = 0; j < N; j++) {
-            Prob_Y += Z[i][j] * p[j];
-        }
-        if (Prob_Y > 0) {
-            ell += (u[i] * log(Prob_Y));
-        }
-    }
-    return ell;
-}
-
-void round(std::vector<double>& p)
-{
-    for (std::vector<double>::iterator i = p.begin(); i != p.end(); ++i) {
-        if ((*i) < KILLP) *i = 0;
-    }
-}
-
-double EM(std::vector<double>& newP, std::vector<std::vector<double> > const& Z,
-          std::vector<int> const& u)
-{
-    double sum = 0;
-    double newEll = 0;
-    std::vector<double> p(N, 0);
-    std::vector<std::vector<double> > U(M, std::vector<double>(N, 0));
-    double ell = 0;
-    int iter = 0;
-    int j;
-
-    for (j = 0; j < N; ++j) {
-        p[j] = rand();
-        sum += p[j];
-    }
-    for (j = 0; j < N; ++j) {
-        p[j] = p[j] / sum;
-    }
-
-#ifndef NDEBUG
-    for (j = 0; j < N; ++j) {
-        std::cout << p[j] << " ";
-    }
-    std::cout << std::endl;
-#endif
-
-    while (((iter <= 2) || (std::abs(ell - newEll) > ACCURACY)) && (iter < ITERMAX)) {
-        if (iter > 0) {
-            round(newP);
-            p = newP;
-            ell = newEll;
-        }
-
-        Estep(p, U, Z, u);  //  fills U
-        Mstep(newP, U);     // fills p
-        newEll = logLike(newP, Z, u);
-
-        // Print out some stats
-        if (iter % 100 == 50 || iter == 1) {
-
-#ifdef SEEPROB
-            printf("%4d\t%f\t", iter, newEll);
-
-            for (j = 0; j < N; ++j) {
-                if (p[j] > 0.05) {
-                    std::cout << j << ":" << p[j] << " ";
-                }
-            }
-            std::cout << std::endl;
-#else
-            printf("%4d\t%f\n", iter, newEll);
-#endif
-        }
-        // printf("%.3f %.3f %.3f ", newP[0], newP[1], newP[2]);
-        // printf("%.3f %.3f %.3f ", newP[3], newP[4], newP[5]);
-        // printf("%.3f %.3f %.3f\n", newP[6], newP[7], newP[8]);
-        iter++;
-    }
-    return newEll;
-}
-void help(const int exit_code = EXIT_FAILURE)
-{
-    std::cout << "Usage: freqEst -f basename [-p precision -i maxiter -r runs -h -k kill -?]\n";
-    std::cout << "Expects basename.read and basename.geno\n";
-    std::cout << "Outputs to basename.popl\n";
-    exit(exit_code);
-}
-
-int main(int argc, char** argv)
-{
-    int c;
-    std::vector<std::string> haplotypes;
-    std::vector<Read> origReads, reads;
-    std::string basename;
-    std::string filename;
-    std::string genotypeFilename, outfilename;
-    int i, j;
-
-    /* parse options
-     * -f file (expect file.read file.geno)
-     * -h                           (help)
-     */
-    while (1) {
-        c = getopt(argc, argv, "f:h?p:i:r:k:s:");
-        if (c == -1) break;
-        switch (c) {
-            case 'k':
-                KILLP = atof(optarg);
-                break;
-            case 's':
-                SEED = atoi(optarg);
-                break;
-            case 'f':
-                basename = optarg;
-                break;
-            case 'p':
-                ACCURACY = atof(optarg);
-                break;
-            case 'i':
-                ITERMAX = atoi(optarg);
-                break;
-            case 'r':
-                RUNS = atoi(optarg);
-                break;
-            case '?':
-                help();
-                break;
-            case 'h':
-                help(EXIT_SUCCESS);
-                break;
-            default:
-                help();
-        }
-    }
-    if (basename == "") {
-        help();
-    }
-    filename = basename + ".read";
-    genotypeFilename = basename + ".geno";
-    outfilename = basename + ".popl";
-
-    srand(SEED);
-
-    std::ifstream infile(filename.c_str());
-    std::ifstream ginfile(genotypeFilename.c_str());
-    if (ginfile.fail() || infile.fail()) {
-        std::cout << "ERROR -- can't read " << basename << ".geno or .read\n";
-        exit(1);
-    }
-    std::ofstream outfile(outfilename.c_str());
-    // outfile.open (outfilename.c_str());
-
-    getHaplotypes(ginfile, haplotypes);
-
-    getReads(infile, origReads, haplotypes);
-
-    // uniq reads
-    std::vector<int> u;
-    u = countReads(origReads, reads);
-
-    M = reads.size();
-    N = haplotypes.size();
-
-    std::cout << N << " haplotypes\n" << M << " reads\n";
-    std::cout << "Threw out " << unmatchingReads << " unexplained reads\n";
-
-    // count how many reads a haplotype is compat with
-    std::vector<int> K(N, 0);
-
-    // the M x N matrix of 0/1 incidences
-    std::vector<std::vector<int> > A(M, std::vector<int>(N, 0));
-
-    for (i = 0; i < M; ++i) {
-        for (j = 0; j < N; ++j) {
-            if (match(haplotypes[j], reads[i])) {
-                A[i][j] = 1;
-                K[j]++;
-            }
-        }
-    }
-
-    // my @Z;  # Z[i][j] == Prob(Y = y_i | X = x_j)
-    std::vector<std::vector<double> > Z(M, std::vector<double>(N, 0));
-    for (i = 0; i < M; ++i) {
-        for (j = 0; j < N; ++j) {
-            if (A[i][j] == 1) {
-                if (CORRECTPARAM) {
-                    Z[i][j] = 1.0 / 1000;
-                } else {
-                    Z[i][j] = 1.0 / K[j];
-                }
-                //				std::cout << "Z[i][j] = "<< Z[i][j] << std::endl;
-            }
-        }
-    }
-
-    std::vector<double> prob(N, 0);
-    std::vector<double> bestProb(N, 0);
-    double logL;
-    double bestL = -1e100;
-
-    for (int run = 0; run < RUNS; ++run) {
-        std::cout << "run " << run << std::endl;
-        logL = EM(prob, Z, u);
-        if (logL > bestL) {
-            bestProb = prob;
-            bestL = logL;
-        }
-    }
-
-    // sort the haplotypes by frequency before printing
-    // added by Osvaldo
-    std::string s6(10, 'x');
-    com_pair** freq_hap;
-    freq_hap = (com_pair**)calloc(N, sizeof(com_pair*));
-    for (int i = 0; i < N; ++i) {
-        freq_hap[i] = (com_pair*)calloc(1, sizeof(com_pair));
-        freq_hap[i]->hap = &(haplotypes[i]);
-        freq_hap[i]->freq = bestProb[i];
-    }
-
-    std::sort(freq_hap, freq_hap + N, comp_func);
-
-    for (int i = 0; i < N; ++i) {
-        if (freq_hap[i]->freq > 0) {
-            // outfile << ">HAP" << i << "_" << bestProb[i] <<  "\n" << haplotypes[i] << std::endl;
-            outfile << ">HAP" << i << "_" << freq_hap[i]->freq << "\n"
-                    << *freq_hap[i]->hap << std::endl;
-            std::cout << i << "\t" << freq_hap[i]->freq << "\t" << *freq_hap[i]->hap << std::endl;
-            // std::cout << i << " " << freq_hap[i]->freq << std::endl;
-        }
-    }
-    // The output is now sorted
-    return 0;
-}
diff --git a/src/python/shorah_snv.py b/src/shorah/shorah_snv.py
old mode 100755
new mode 100644
similarity index 91%
rename from src/python/shorah_snv.py
rename to src/shorah/shorah_snv.py
index ec0c377..a8def08
--- a/src/python/shorah_snv.py
+++ b/src/shorah/shorah_snv.py
@@ -1,4 +1,6 @@
-# Copyright 2007-2014
+#!/usr/bin/env python3
+
+# Copyright 2007-2018
 # Niko Beerenwinkel,
 # Nicholas Eriksson,
 # Moritz Gerstung,
@@ -38,12 +40,13 @@
 import shutil
 import sys
 import warnings
+import shlex
 
 import logging
-import logging.handlers
 
-# Make a global logging object.
-snvlog = logging.getLogger(__name__)
+from pkg_resources import resource_filename
+fil_exe = resource_filename(__name__, 'bin/fil')
+
 
 # used to parse variants from support files
 posterior_thresh = 0.9
@@ -57,7 +60,7 @@ def segments(incr):
     try:
         infile = open('coverage.txt')
     except IOError:
-        snvlog.error('Coverage file generated by b2w not found.')
+        logging.error('Coverage file generated by b2w not found.')
         sys.exit('Coverage file generated by b2w not found.')
     for f in infile:
         # window_file, reference, begin, end, value
@@ -111,7 +114,7 @@ def parseWindow(line, ref1):
         else:
             window = open(filename, 'r')
     except IOError:
-        snvlog.error('File not found')
+        logging.error('File not found')
         return snp
 
     beg = int(beg)
@@ -125,7 +128,7 @@ def parseWindow(line, ref1):
         post, av = float(match_obj.group(1)), float(match_obj.group(2))
         if post > 1.0:
             warnings.warn('posterior = %4.3f > 1' % post)
-            snvlog.warning('posterior = %4.3f > 1' % post)
+            logging.warning('posterior = %4.3f > 1' % post)
         if post >= posterior_thresh:
             reads += av
             pos = beg
@@ -143,7 +146,7 @@ def parseWindow(line, ref1):
             if tot_snv > max_snv:
                 max_snv = tot_snv
 
-    snvlog.info('max number of snvs per sequence found: %d' % max_snv)
+    logging.info('max number of snvs per sequence found: %d', max_snv)
     # normalize
     for k, v in snp.items():
         v[5] /= v[4]
@@ -161,7 +164,7 @@ def getSNV(ref, segCov, incr):
     try:
         cov_file = open('coverage.txt')
     except IOError:
-        snvlog.error('Coverage file generated by b2w not found')
+        logging.error('Coverage file generated by b2w not found')
         sys.exit('Coverage file generated by b2w not found')
 
     # cycle over all windows reported in coverage.txt
@@ -172,7 +175,7 @@ def getSNV(ref, segCov, incr):
         if incr == 1:
             incr = end - beg + 1
             single_window = True
-            snvlog.info('working on single window as invoked by amplian')
+            logging.info('working on single window as invoked by amplian')
         key = list(snp.keys())
         key.sort()
         for k in key:
@@ -310,11 +313,11 @@ def sb_filter(in_bam, sigma, amplimode="", max_coverage=100000):
     """run strand bias filter calling the external program 'fil'
     """
     import subprocess
-    dn = sys.path[0]
-    my_prog = os.path.join(dn, 'fil')
+    # dn = sys.path[0]
+    my_prog = shlex.quote(fil_exe)  # os.path.join(dn, 'fil')
     my_arg = ' -b ' + in_bam + ' -v ' + str(sigma) + amplimode + ' -x ' \
              + str(max_coverage)
-    snvlog.debug('running %s%s' % (my_prog, my_arg))
+    logging.debug('running %s%s', my_prog, my_arg)
     retcode = subprocess.call(my_prog + my_arg, shell=True)
     return retcode
 
@@ -348,32 +351,21 @@ def main(reference, bam_file, sigma=0.01, increment=1, max_coverage=100000):
     import csv
     import inspect
 
-    # set logging level
-    snvlog.setLevel(logging.DEBUG)
-    # This handler writes everything to a file.
-    LOG_FILENAME = './snv.log'
-    h = logging.handlers.RotatingFileHandler(LOG_FILENAME, 'w',
-                                             maxBytes=100000, backupCount=5)
-    fl = logging.Formatter("%(levelname)s %(asctime)s %(funcName)s\
-                          %(lineno)d %(message)s")
-    h.setFormatter(fl)
-    snvlog.addHandler(h)
-
-    snvlog.info(str(inspect.getargspec(main)))
+    logging.info(str(inspect.getfullargspec(main)))
     ref_m = dict([[s.id, str(s.seq).upper()]
                   for s in SeqIO.parse(reference, 'fasta')])
 
     # number of windows per segment
     segCov_m = segments(increment)
-    snvlog.debug('coverage parsed')
+    logging.debug('coverage parsed')
 
     # snpD_m is the file with the 'consensus' SNVs (from different windows)
-    snvlog.debug('now parsing SNVs')
+    logging.debug('now parsing SNVs')
     if not os.path.isfile('snv/SNV.txt'):
         snpD_m = getSNV(ref_m, segCov_m, increment)
         printRaw(snpD_m, increment)
     else:
-        snvlog.debug('snv/SNV.txt found, moving to ./')
+        logging.debug('snv/SNV.txt found, moving to ./')
         shutil.move('snv/SNV.txt', './')
 
     # run strand bias filter, output in SNVs_%sigma.txt
@@ -383,7 +375,7 @@ def main(reference, bam_file, sigma=0.01, increment=1, max_coverage=100000):
     else:
         retcode_m = sb_filter(bam_file, sigma, max_coverage=max_coverage)
     if retcode_m is not 0:
-        snvlog.error('sb_filter exited with error %d' % retcode_m)
+        logging.error('sb_filter exited with error %d', retcode_m)
         sys.exit()
 
     # parse the p values from SNVs*txt file
diff --git a/src/python/shorah_dec.py b/src/shorah/shotgun.py
old mode 100755
new mode 100644
similarity index 85%
rename from src/python/shorah_dec.py
rename to src/shorah/shotgun.py
index d5b95a7..c957f88
--- a/src/python/shorah_dec.py
+++ b/src/shorah/shotgun.py
@@ -1,4 +1,6 @@
-# Copyright 2007-2012
+#!/usr/bin/env python3
+
+# Copyright 2007-2018
 # Niko Beerenwinkel,
 # Nicholas Eriksson,
 # Moritz Gerstung,
@@ -27,17 +29,28 @@
 
 from __future__ import division
 from __future__ import print_function
-import numpy as np
+
 import os
 import pipes
 import sys
-
+import shlex
 import logging
-import logging.handlers
+from pkg_resources import resource_filename
 
-# Make a global logging object.
-declog = logging.getLogger(__name__)
+import numpy as np
 
+dn_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if __name__ == '__main__':
+    if __package__ is None:
+        os.sys.path.insert(1, dn_dir)
+        mod = __import__('shorah')
+        sys.modules["shorah"] = mod
+        import shorah_snv
+else:
+    from . import shorah_snv
+
+diri_exe = resource_filename(__name__, 'bin/diri_sampler')
+b2w_exe = resource_filename(__name__, 'bin/b2w')
 #################################################
 # a common user should not edit above this line #
 #################################################
@@ -60,13 +73,14 @@
 # Dictionary storing by read ID (key) the posterior for each of window
 quality = {}
 
-count = {}
-count['A'] = 0
-count['C'] = 0
-count['G'] = 0
-count['T'] = 0
-count['X'] = 0
-count['-'] = 0
+count = {
+    'A': 0,
+    'C': 0,
+    'G': 0,
+    'T': 0,
+    'X': 0,
+    '-': 0
+}
 clusters = [[]]
 untouched = [[]]
 
@@ -91,18 +105,18 @@ def parse_aligned_reads(reads_file):
     out_reads = {}
 
     if not os.path.isfile(reads_file):
-        declog.error('There should be a file here: ' + reads_file)
+        logging.error('There should be a file here: ' + reads_file)
         sys.exit('There should be a file here: ' + reads_file)
     else:
-        declog.info('Using file ' + reads_file + ' of aligned reads')
+        logging.info('Using file ' + reads_file + ' of aligned reads')
 
     handle = open(reads_file)
-    declog.debug('Parsing aligned reads')
+    logging.debug('Parsing aligned reads')
 
     for h in handle:
         name, start, stop, mstart, mstop, this_m = h.rstrip().split('\t')
         if this_m == '':
-            declog.warning('parsing empty read: %s' % h)
+            logging.warning('parsing empty read: %s', h)
         out_reads[name] = [None, None, None, None, []]
         # start of the region of interest (0-based indexing)
         out_reads[name][0] = int(start)
@@ -122,21 +136,22 @@ def windows(run_settings):
     """
     import subprocess
     bam, fasta, w, i, m, x, reg = run_settings
-    dn = sys.path[0]
-    my_prog = os.path.join(dn, 'b2w')
+    #dn = sys.path[0]
+    my_prog = shlex.quote(b2w_exe)  # os.path.join(dn, 'b2w')
+
     my_arg = ' -w %i -i %i -m %i -x %i %s %s %s' % \
         (w, i, m, x, bam, fasta, reg)
 
     try:
         retcode = subprocess.call(my_prog + my_arg, shell=True)
         if retcode > 0:
-            declog.error('%s %s' % (my_prog, my_arg))
-            declog.error('b2w returned %i' % retcode)
+            logging.error('%s %s', my_prog, my_arg)
+            logging.error('b2w returned %i', retcode)
         else:
-            declog.debug('Finished making windows')
-            declog.debug('b2w returned %i' % retcode)
+            logging.debug('Finished making windows')
+            logging.debug('b2w returned %i', retcode)
     except OSError as ee:
-        declog.error('Execution of b2w failed: %s' % ee)
+        logging.error('Execution of b2w failed: %s', ee)
     return retcode
 
 
@@ -153,7 +168,7 @@ def run_dpm(run_setting):
     stem = filein.split('.reads')[0]
     corgz = 'corrected/%s.reads-cor.fas.gz' % stem
     if os.path.exists(corgz):
-        declog.debug('file %s already analysed, skipping' % filein)
+        logging.debug('file %s already analysed, skipping', filein)
         return
 
     # if already run before, extract the read file
@@ -164,8 +179,8 @@ def run_dpm(run_setting):
         shutil.move(fstgz, './')
         subprocess.check_call(["gunzip", "%s-reads.gz" % stem])
 
-    dn = sys.path[0]
-    my_prog = os.path.join(dn, 'diri_sampler')
+    # dn = sys.path[0]
+    my_prog = shlex.quote(diri_exe)  # os.path.join(dn, 'diri_sampler')
     my_arg = ' -i %s -j %i -t %i -a %f -K %d -R %d' % \
         (pipes.quote(filein), j, int(j * hist_fraction), a, init_K, seed)
 
@@ -174,18 +189,18 @@ def run_dpm(run_setting):
         # os.remove('./assignment.tmp')
     except OSError:
         pass
-    declog.debug(my_prog + my_arg)
+    logging.debug(my_prog + my_arg)
     # runs the gibbs sampler for the dirichlet process mixture
     try:
         retcode = subprocess.call(my_prog + my_arg, shell=True)
         if retcode < 0:
-            declog.error('%s %s' % (my_prog, my_arg))
-            declog.error('Child %s terminated by SIG %d' % (my_prog, -retcode))
+            logging.error('%s %s' % (my_prog, my_arg))
+            logging.error('Child %s terminated by SIG %d' % (my_prog, -retcode))
         else:
-            declog.debug('run %s finished' % my_arg)
-            declog.debug('Child %s returned %i' % (my_prog, retcode))
+            logging.debug('run %s finished' % my_arg)
+            logging.debug('Child %s returned %i' % (my_prog, retcode))
     except OSError as ee:
-        declog.error('Execution of %s failed: %s' % (my_prog, ee))
+        logging.error('Execution of %s failed: %s' % (my_prog, ee))
 
     return
 
@@ -231,7 +246,7 @@ def correct_reads(chr_c, wstart, wend):
         handle.close()
         return
     except IOError:
-        declog.warning('No reads in window %s?' % wstart)
+        logging.warning('No reads in window %s?' % wstart)
         return
 
 
@@ -350,8 +365,9 @@ def merge_corrected_reads(aligned_read):
     return(ID, merged_corrected_read)
 
 
-def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
-         max_coverage=10000, alpha=0.1, keep_files=True, seed=None):
+#def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
+#         max_coverage=10000, alpha=0.1, keep_files=True, seed=None):
+def main(args):
     '''
     Performs the error correction analysis, running diri_sampler
     and analyzing the result
@@ -361,25 +377,25 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
     import shutil
     import time
 
-    import shorah_snv
+    #import shorah_snv
+
+    in_bam = args.b
+    in_fasta = args.f
+    win_length = args.w
+    win_shifts = args.win_shifts
+    region = args.r
+    max_coverage = args.max_coverage
+    alpha = args.a
+    keep_files = args.keep_files
+    seed = args.seed
 
-    # set logging level
-    declog.setLevel(logging.DEBUG)
-    # This handler writes everything to a file.
-    LOG_FILENAME = './dec.log'
-    hl = logging.handlers.RotatingFileHandler(LOG_FILENAME, 'w',
-                                              maxBytes=100000, backupCount=5)
-    f = logging.Formatter("%(levelname)s %(asctime)s\
-                          %(funcName)s %(lineno)d %(message)s")
-    hl.setFormatter(f)
-    declog.addHandler(hl)
-    declog.info(' '.join(sys.argv))
+    logging.info(' '.join(sys.argv))
 
     # check options
     if win_length % win_shifts != 0:
         sys.exit('Window size must be divisible by win_shifts')
     if win_min_ext < 1 / win_shifts:
-        declog.warning('Some bases might not be covered by any window')
+        logging.warning('Some bases might not be covered by any window')
     if max_coverage / win_length < 1:
         sys.exit('Please increase max_coverage')
     if not os.path.isfile(in_bam):
@@ -406,14 +422,14 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
     if win_length > gen_length:
         sys.exit('The window size must be smaller than the genome region')
 
-    declog.info('%s reads are being considered' % len(aligned_reads))
+    logging.info('%s reads are being considered', len(aligned_reads))
 
     ############################################
     # Now the windows and the error correction #
     ############################################
 
     runlist = win_to_run(alpha, seed)
-    declog.info('will run on %d windows' % len(runlist))
+    logging.info('will run on %d windows', len(runlist))
     # run diri_sampler on all available processors but one
     max_proc = max(cpu_count() - 1, 1)
     pool = Pool(processes=max_proc)
@@ -434,24 +450,24 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
     proposed = {}
     for i in runlist:
         winFile, j, a, s = i
-        del(a)  # in future alpha might be different on each window
+        del a  # in future alpha might be different on each window
         parts = winFile.split('.')[0].split('-')
         chrom = '-'.join(parts[1:-2])
         beg = int(parts[-2])
         end = int(parts[-1])
-        declog.info('reading windows for start position %s' % beg)
+        logging.info('reading windows for start position %s', beg)
         # correct reads populates correction and quality, globally defined
         correct_reads(chrom, beg, end)
         stem = 'w-%s-%s-%s' % (chrom, beg, end)
-        declog.info('this is window %s' % stem)
+        logging.info('this is window %s' % stem)
         dbg_file = stem + '.dbg'
         # if os.path.exists(dbg_file):
         proposed[beg] = (get_prop(dbg_file), j)
-        declog.info('there were %s proposed' % str(proposed[beg][0]))
+        logging.info('there were %s proposed' % str(proposed[beg][0]))
 
     # (re)move intermediate files
     if not keep_all_files:
-        declog.info('removing intermediate files')
+        logging.info('removing intermediate files')
         tr_files = glob.glob('./w*reads.fas')
         tr_files.extend(glob.glob('./*.smp'))
         tr_files.extend(glob.glob('./w*.dbg'))
@@ -538,7 +554,7 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
     ## correction[read_id][wstart] = sequence ##
     ## quality[read_id][wstart] = posterior   ##
     # ##########################################
-    declog.info('Merging windows of corrected reads')
+    logging.info('Merging windows of corrected reads')
     # Multi-threaded version
     params = list(aligned_reads.items())
     pool = Pool(processes=max_proc)
@@ -546,12 +562,12 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
     pool.close()
     pool.join()
 
-    declog.info('All corrected reads have been merged')
+    logging.info('All corrected reads have been merged')
 
     ccx = {}
     cin_stem = '.'.join(os.path.split(in_bam)[1].split('.')[:-1])
     fch = open('%s.cor.fas' % cin_stem, 'w')
-    declog.debug('writing to file %s.cor.fas' % cin_stem)
+    logging.debug('writing to file %s.cor.fas', cin_stem)
     for ID, seq_list in to_correct:
         cor_read = ''.join(seq_list)
         init_x = len(cor_read.lstrip('-')) - len(cor_read.lstrip('-X'))
@@ -584,7 +600,7 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
                      (kp, proposed[kp][0] / proposed[kp][1]))
     ph.close()
 
-    declog.info('running snv.py')
+    logging.info('running snv.py')
     shorah_snv.main(reference=in_fasta, bam_file=in_bam,
                     increment=win_length // win_shifts, max_coverage=max_coverage)
 
@@ -597,4 +613,4 @@ def main(in_bam, in_fasta, win_length=201, win_shifts=3, region='',
     for snv_file in glob.glob('./SNV*'):
         shutil.move(snv_file, 'snv/')
 
-    declog.info('dec.py ends')
+    logging.info('shotgun run ends')