Skip to content

Commit

Permalink
Merge pull request #76 from dib-lab/analysis/sim-family
Browse files Browse the repository at this point in the history
Analysis on simulated trio
  • Loading branch information
standage committed May 15, 2017
2 parents 22137ff + a28976c commit fc88503
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 0 deletions.
10 changes: 10 additions & 0 deletions notebook/random-genome/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
mother-genome.fa
father-genome.fa
proband-genome.fa
mother-reads-*
father-reads-*
proband-reads-*
*augfastq*
*augfasta*
bogus-genome.fa*
proband-cc*
210 changes: 210 additions & 0 deletions notebook/random-genome/SimulateFamily.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"!rm -f bogus-genome.fa"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import random\n",
"import subprocess\n",
"import sys\n",
"\n",
"import kevlar"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"random.seed(42)\n",
"\n",
"try:\n",
" maxint = sys.maxint\n",
"except:\n",
" maxint = sys.maxsize\n",
"\n",
"for chrm in range(1, 6):\n",
" defline = '>bogus_chr{:d}'.format(chrm)\n",
" cmd = 'echo \"{}\" >> bogus-genome.fa'.format(defline)\n",
" subprocess.check_call(cmd, shell=True)\n",
"\n",
" for i in range(1, 11):\n",
" seed = random.randint(1, maxint)\n",
" cmd = '../nullgraph/make-random-genome.py --length 250000 --seed {} | grep -v \"^>\" >> bogus-genome.fa'.format(seed)\n",
" subprocess.check_call(cmd, shell=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[kevlar::mutate] loading mutations\n",
"[kevlar::mutate] mutating genome\n",
"[kevlar::mutate] loading mutations\n",
"[kevlar::mutate] mutating genome\n",
"[kevlar::mutate] loading mutations\n",
"[kevlar::mutate] mutating genome\n"
]
}
],
"source": [
"arglist = ['mutate', '-o', 'proband-genome.fa', 'proband-mutations.txt', 'bogus-genome.fa']\n",
"args = kevlar.cli.parser().parse_args(arglist)\n",
"kevlar.mutate.main(args)\n",
"\n",
"arglist = ['mutate', '-o', 'mother-genome.fa', 'mother-mutations.txt', 'bogus-genome.fa']\n",
"args = kevlar.cli.parser().parse_args(arglist)\n",
"kevlar.mutate.main(args)\n",
"\n",
"arglist = ['mutate', '-o', 'father-genome.fa', 'father-mutations.txt', 'bogus-genome.fa']\n",
"args = kevlar.cli.parser().parse_args(arglist)\n",
"kevlar.mutate.main(args)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random.seed(98765432)\n",
"\n",
"seed = random.randint(1, maxint)\n",
"cmd = 'wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 1500000 -1 125 -2 125 -S {} proband-genome.fa proband-reads-1.fq proband-reads-2.fq'.format(seed)\n",
"subprocess.call(cmd, shell=True)\n",
"\n",
"seed = random.randint(1, maxint)\n",
"cmd = 'wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 1500000 -1 125 -2 125 -S {} mother-genome.fa mother-reads-1.fq mother-reads-2.fq'.format(seed)\n",
"subprocess.call(cmd, shell=True)\n",
"\n",
"seed = random.randint(1, maxint)\n",
"cmd = 'wgsim -e 0.005 -r 0.0 -d 450 -s 50 -N 1500000 -1 125 -2 125 -S {} father-genome.fa father-reads-1.fq father-reads-2.fq'.format(seed)\n",
"subprocess.call(cmd, shell=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random.seed(246813579)\n",
"\n",
"seed = random.randint(1, maxint)\n",
"cmd = 'wgsim -e 0.0 -r 0.0 -d 450 -s 50 -N 1500000 -1 125 -2 125 -S {} proband-genome.fa proband-reads-noerror-1.fq proband-reads-noerror-2.fq'.format(seed)\n",
"subprocess.call(cmd, shell=True)\n",
"\n",
"seed = random.randint(1, maxint)\n",
"cmd = 'wgsim -e 0.0 -r 0.0 -d 450 -s 50 -N 1500000 -1 125 -2 125 -S {} mother-genome.fa mother-reads-noerror-1.fq mother-reads-noerror-2.fq'.format(seed)\n",
"subprocess.call(cmd, shell=True)\n",
"\n",
"seed = random.randint(1, maxint)\n",
"cmd = 'wgsim -e 0.0 -r 0.0 -d 450 -s 50 -N 1500000 -1 125 -2 125 -S {} father-genome.fa father-reads-noerror-1.fq father-reads-noerror-2.fq'.format(seed)\n",
"subprocess.call(cmd, shell=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"30.0"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1.5e6 * 250 / 1.25e7"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
10 changes: 10 additions & 0 deletions notebook/random-genome/father-mutations.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
bogus_chr3 2450469 inv 633
bogus_chr3 989982 snv 2
bogus_chr1 2208698 del 3
bogus_chr4 1345628 del 49
bogus_chr3 1569783 del 377
bogus_chr3 1519701 inv 73
bogus_chr4 1104418 ins AGCATTTGTGGGAATCTGGGTTGTCTCGCTACAGCGCTATAAAAACACATGTGTACCTCGACAAGGTCCCCGTGTCCTCGAACGTCAACTTGGAACGGCATTTCTCGCTATACTATCTAAGGGTCAAACCTGGCGGCTTCGACCCGGGGATGCGAACAATGTAATAGGTCGAAATGCAATTATCTCTGGCGC
bogus_chr2 2432245 del 11
bogus_chr1 2265339 del 1
bogus_chr2 2295658 ins TGAGCGCATGACTATC
10 changes: 10 additions & 0 deletions notebook/random-genome/mother-mutations.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
bogus_chr4 1967828 snv 3
bogus_chr1 1384544 inv 270
bogus_chr2 501069 ins TTCCAGGA
bogus_chr1 2026035 del 47
bogus_chr2 489093 del 7
bogus_chr2 360117 ins AATCGGTAACGTGAGTCTCAAGTTTTAGCGGTAGCGTAGTACATCACACGAGTCTGAAGAGTTGCTAGACGCGGTTAGTTTCGTCCAATCCGGGTTACAGTATATGGGCGACAAAGACATCGGATCCTTCGAAGAGTCATTCGAGCGGCACTTTGGGTCGACGCCACAACCCCATCAGTTTATATTCCTATGATACTAAGTACGCTTCTATCTACGACCAAGGGGAAGGCCTTCGCGTAAAGGCCGGATCACTTCCGGGCGTCGGGACTCAGCACGTCATATTAGATAAACGCCACTTGGTAATAATTGCAGCAAAACGGCCCTGTCAGTGCACCATGTGAGCCCGTTGTATGAAGAGTTGGAGGCATAGGTCACGTGGGTGGGTCGGCACGTCCTTGGGAGAACAGAAAGGGGCCCCTTTCATAATGTCTTTCCTTTCTGAGATAGTTCGGGAGGACCAGTGCGCCACGATCATACACGTTCACAAAACCTTTTGCTCCACGTTCCCTCAGGAGCGACTTTAGCGCCAAGACTTCAACCTTTCCGACCAAGTATGCGGGGGGGTAATCTCCCAGACGAATCGTTTAATTCCGTCCTCTTCCTATTGATCGGTATACATCTACACGCCTACGGGCGCCCAAGGAAAAAGCTGACAGCTACAGTTGGATAACCTGACGATCTTATTTATGATGCTGATGACCTCTGCTCAACATTACGAGATGTTGTAGTTTCCACAAGGGACATCGCGCTGT
bogus_chr3 414831 ins TGAGTGGCATTACAAGCCCTAAAAGGTGCTCTCCTAAGGCGCTAAAGTGTCCACAGCGCTATTTCATAGGCTTCAGATCCTGGG
bogus_chr3 329008 inv 4011
bogus_chr4 1327539 inv 816
bogus_chr1 1271095 del 119
25 changes: 25 additions & 0 deletions notebook/random-genome/proband-mutations.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Novel
bogus_chr2 1276563 ins ATTGGGGCGCATGTAACCTTACCA
bogus_chr2 2343128 snv 3
bogus_chr4 1079681 inv 510
bogus_chr5 2494953 inv 744
bogus_chr5 1534128 ins GCTTTTATCACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGTTACACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTACGGCGGAAGCCGTCTCTAATGGACTCAAGGACCTGAATCCAACTAGAGGAGCTTGCCACGCCATGGAATGATGCCCCGGGCTCACCATCTTAAGGCATCCGATGC
bogus_chr1 1767266 del 27
bogus_chr2 1322245 snv 1
bogus_chr5 727550 snv 4
bogus_chr5 590004 del 187
bogus_chr3 1445649 snv 2

# Inherited from mother
bogus_chr4 1967828 snv 3
bogus_chr1 1271095 del 119
bogus_chr3 414831 ins TGAGTGGCATTACAAGCCCTAAAAGGTGCTCTCCTAAGGCGCTAAAGTGTCCACAGCGCTATTTCATAGGCTTCAGATCCTGGG
bogus_chr1 1384544 inv 270
bogus_chr4 1327539 inv 816
bogus_chr1 2026035 del 47

# Inherited from father
bogus_chr1 2265339 del 1
bogus_chr2 2432245 del 11
bogus_chr3 1569783 del 377
bogus_chr4 1345628 del 49

0 comments on commit fc88503

Please sign in to comment.