Skip to content

Commit

Permalink
updated index pipeline with latest index locations
Browse files Browse the repository at this point in the history
  • Loading branch information
Acribbs committed Jul 25, 2023
1 parent 683c4ee commit 3da74a7
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 44 deletions.
5 changes: 3 additions & 2 deletions tallytrin/pipeline_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,14 @@ def seperate_by_barcode(infile, outfile):
'''
Identify barcode and save to different samples
'''

primer = PARAMS['primer']
name = outfile.replace("seperate_samples.dir/", "")
name = name.replace(".fastq", "")

PYTHON_ROOT = os.path.join(os.path.dirname(__file__), "python/")

statement = '''python %(PYTHON_ROOT)s/identify_index.py --infile=%(infile)s --name=%(name)s'''
statement = '''python %(PYTHON_ROOT)s/identify_index.py --infile=%(infile)s --name=%(name)s
--primer=%(primer)s'''

P.run(statement)

Expand Down
4 changes: 3 additions & 1 deletion tallytrin/pipeline_index/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ copyright: cribbslab (2021)

split: 400000

job_options: -t 48:00:00
job_options: -t 48:00:00

primer: AAGCAGTGGTAT
99 changes: 58 additions & 41 deletions tallytrin/python/identify_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
help='infile fastq file')
parser.add_argument("--name", default=None, type=str,
help='name of sample')

parser.add_argument("--primer", default=None, type=str,
help='primer')
args = parser.parse_args()

L.info("args:")
Expand Down Expand Up @@ -113,15 +114,20 @@ def remove_indels(x, umi, first):
'CCCTTTCCCTTT': '12'}


tab = str.maketrans("ACTG", "TGAC")

def reverse_complement(seq):
return seq.translate(tab)[::-1]


with pysam.FastxFile(args.infile) as fh:

for record in fh:

seq_nano = record.sequence
primer = "(%s){e<=0}" % (args.primer)

m=regex.finditer("(AAGCAGTGGT){e<=1}", str(seq_nano))
m=regex.finditer(str(primer), str(seq_nano[:150]))


for i in m:
Expand Down Expand Up @@ -156,46 +162,57 @@ def remove_indels(x, umi, first):
fname = name + '' + "Unidentified.fastq"

if fname == name + '' + 'Unidentified.fastq':
m=regex.finditer("(ACCACTGCTT){e<=1}", str(seq_nano))
for i in m:
barcode = seq_nano[int(i.end()):int(i.end()+12)]
barcode = remove_point_mutations(barcode)[0]
barcode = barcode[::3]
if barcode == 'GCAT':
fname = name + '' + "Sample1.fastq"
elif barcode == 'CTGA':
fname = name + '' + "Sample2.fastq"
elif barcode == 'AGTC':
fname = name + '' + "Sample3.fastq"
elif barcode == 'TACG':
fname = name + '' + "Sample4.fastq"
elif barcode == 'TCGT':
fname = name + '' + "Sample5.fastq"
elif barcode == 'ATCA':
fname = name + '' + "Sample6.fastq"
elif barcode == 'CGAC':
fname = name + '' + "Sample7.fastq"
elif barcode == 'GATG':
fname = name + '' + "Sample8.fastq"
elif barcode == 'CTCT':
fname = name + '' + "Sample9.fastq"
elif barcode == 'TATA':
fname = name + '' + "Sample10.fastq"
elif barcode == 'GCGC':
fname = name + '' + "Sample11.fastq"
elif barcode == 'CTCT':
fname = name + '' + "Sample12.fastq"
else:
fname = name + '' + "Unidentified.fastq"
if os.path.exists('seperate_samples.dir/' + fname):

with open('seperate_samples.dir/'+ fname, "a") as myfile:
myfile.write("@%s\n%s\n+\n%s\n" % (record.name, record.sequence, record.quality))
else:
with iotools.open_file('seperate_samples.dir/' + fname, "w") as myfile:
myfile.write("@%s\n%s\n+\n%s\n" % (record.name, record.sequence, record.quality))
pass

if os.path.exists('seperate_samples.dir/' + fname):

with open('seperate_samples.dir/'+ fname, "a") as myfile:
myfile.write("@%s\n%s\n+\n%s\n" % (record.name, record.sequence, record.quality))
else:
with iotools.open_file('seperate_samples.dir/' + fname, "w") as myfile:
myfile.write("@%s\n%s\n+\n%s\n" % (record.name, record.sequence, record.quality))

primer2 = "(%s){e<=0}" % (reverse_complement(args.primer))

m=regex.finditer(str(primer2), str(seq_nano[:-150]))
for i in m:
barcode = seq_nano[int(i.end()):int(i.end()+12)]
barcode = remove_point_mutations(barcode)[0]
barcode = barcode[::3]
if barcode == 'GCAT':
fname = name + '' + "Sample1.fastq"
elif barcode == 'CTGA':
fname = name + '' + "Sample2.fastq"
elif barcode == 'AGTC':
fname = name + '' + "Sample3.fastq"
elif barcode == 'TACG':
fname = name + '' + "Sample4.fastq"
elif barcode == 'TCGT':
fname = name + '' + "Sample5.fastq"
elif barcode == 'ATCA':
fname = name + '' + "Sample6.fastq"
elif barcode == 'CGAC':
fname = name + '' + "Sample7.fastq"
elif barcode == 'GATG':
fname = name + '' + "Sample8.fastq"
elif barcode == 'CTCT':
fname = name + '' + "Sample9.fastq"
elif barcode == 'TATA':
fname = name + '' + "Sample10.fastq"
elif barcode == 'GCGC':
fname = name + '' + "Sample11.fastq"
elif barcode == 'CTCT':
fname = name + '' + "Sample12.fastq"
else:
fname = name + '' + "Unidentified.fastq"

if os.path.exists('seperate_samples.dir/' + fname):

with open('seperate_samples.dir/'+ fname, "a") as myfile:
myfile.write("@%s\n%s\n+\n%s\n" % (record.name, record.sequence, record.quality))
else:
break
with iotools.open_file('seperate_samples.dir/' + fname, "w") as myfile:
myfile.write("@%s\n%s\n+\n%s\n" % (record.name, record.sequence, record.quality))



0 comments on commit 3da74a7

Please sign in to comment.