# Merge probes for CTP-12 marker gene DNA RNA

There are 2 different sources:
1. TSS 150 probe library for cortex marker genes
2. corresponding smFISH from Meng's M1 library used in paper: https://www.nature.com/articles/s41586-021-03705-x

In [1]:
%run "..\..\Startup_py3.py"
sys.path.append(r"..\..\..\..\Documents")

import ImageAnalysis3
from ImageAnalysis3 import get_img_info, visual_tools, corrections, library_tools

from ImageAnalysis3.library_tools import LibraryDesigner as ld
from ImageAnalysis3.library_tools import LibraryTools

%matplotlib notebook
print(os.getpid())

# biopython for SeqRecord
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# blast
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML 
import ImageAnalysis3.library_tools.quality_check as qc

25904


In [2]:
# Library directories
pool_folder = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-12_mop_markers'
sub_fds = [os.path.join(pool_folder, _fd) for _fd in os.listdir(pool_folder) if os.path.isdir(os.path.join(pool_folder, _fd))]


## scan probe files

In [3]:
files = []
library_names = []
for _fd in sub_fds:
    if 'blast_full_probes.fasta' in os.listdir(_fd):
        files.append(os.path.join(_fd, 'blast_full_probes.fasta'))
        library_names.append(os.path.basename(_fd).replace('_', '-'))
    elif 'filtered_full_probes.fasta' in os.listdir(_fd):
        files.append(os.path.join(_fd, 'filtered_full_probes.fasta'))
        library_names.append(os.path.basename(_fd).replace('_', '-'))
        
print(files)
print(library_names)

['\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-12_mop_markers\\smFISH_cell_class\\filtered_full_probes.fasta', '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-12_mop_markers\\marker_gene_TSS\\blast_full_probes.fasta']
['smFISH-cell-class', 'marker-gene-TSS']


## load sub-libraries

In [4]:
target_len = 120
library_2_pbs = {}

for _fl, _lib_name in zip(files, library_names):
    print(f"loading probes from file: {_fl}")
    _records = []
    with open(_fl, 'r') as _handle:
        for _record in SeqIO.parse(_handle, "fasta"):
            # modify names
            _record.id = _record.id + f"_library_{_lib_name}"
            _record.name, _record.description = '',''
            # modify seq if length doesn't match
            if len(_record.seq) < target_len:
                _seq = _record.seq + Seq('A'*(target_len - len(_record.seq)))
                _record.seq = _seq
        
            _records.append(_record)
        
    library_2_pbs[_lib_name] = _records

loading probes from file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-12_mop_markers\smFISH_cell_class\filtered_full_probes.fasta
loading probes from file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-12_mop_markers\marker_gene_TSS\blast_full_probes.fasta


## merge and save

In [5]:
# merge into one file and save
final_pool_records = []
for _lib_name, _records in library_2_pbs.items():
    print(f"appending {len(_records)} probes from library: {_lib_name}")
    final_pool_records.extend(_records)

appending 2260 probes from library: smFISH-cell-class
appending 3360 probes from library: marker-gene-TSS


In [6]:
save_filename = os.path.join(pool_folder, 'merged_CTP12.fasta')
print(f"number of probes saved: {len(final_pool_records)}")
with open(save_filename, 'w') as _output_handle:
    SeqIO.write(final_pool_records, _output_handle, "fasta")

number of probes saved: 5620


# load and summarize primers

In [4]:
# split probes into

save_filename = os.path.join(pool_folder, 'merged_CTP12.fasta')
full_records = []
with open(save_filename, 'r') as _handle:
    for _record in SeqIO.parse(_handle, "fasta"):
        full_records.append(_record)


In [6]:
full_records[-1]

SeqRecord(seq=Seq('CGCAAACTGGTGCGGAAGGCAAGTCCGCGCAGGAACGTGAAGCAGCTCACAGAA...TGG'), id='loc_11:78489091-78509091_gene_Vtn_pb_191_pos_13894_strand_+_readouts_[Stv_119_u,Stv_119_u]_primers_[W1A07_primer_6,W1A12_primer_11]_library_marker-gene-TSS', name='loc_11:78489091-78509091_gene_Vtn_pb_191_pos_13894_strand_+_readouts_[Stv_119_u,Stv_119_u]_primers_[W1A07_primer_6,W1A12_primer_11]_library_marker-gene-TSS', description='loc_11:78489091-78509091_gene_Vtn_pb_191_pos_13894_strand_+_readouts_[Stv_119_u,Stv_119_u]_primers_[W1A07_primer_6,W1A12_primer_11]_library_marker-gene-TSS', dbxrefs=[])

In [5]:
full_records[0]

SeqRecord(seq=Seq('CCCGCAATGGCTGACAACCGAGAGATTAGAGATGAGTTGGATGGATCCCACACC...AAA'), id='CTP11_MO_RNA_W1A03_2_RS0708_gene_Aqp4__ENSMUST00000079081__190__30__0.43333__69.0522__1_RS0708_W1A10_9_library_smFISH-cell-class', name='CTP11_MO_RNA_W1A03_2_RS0708_gene_Aqp4__ENSMUST00000079081__190__30__0.43333__69.0522__1_RS0708_W1A10_9_library_smFISH-cell-class', description='CTP11_MO_RNA_W1A03_2_RS0708_gene_Aqp4__ENSMUST00000079081__190__30__0.43333__69.0522__1_RS0708_W1A10_9_library_smFISH-cell-class', dbxrefs=[])

# Calculate concentration

In [7]:
# marker-gene RNA
vol = 9 * 1e3 # ul
conc = (93+92.4+90.3)/3 * 1e-9 # g/ul
num_pb = 2260
pb_len = 120-11
final_conc = 10 # nM
pb_mw = (pb_len * 303.7 + 79) # g/mol

mass_total = vol * conc
mass_per_pb = mass_total / num_pb
mol_per_pb = mass_per_pb / pb_mw
print(mass_total)
print(mass_per_pb)
print(mol_per_pb)
target_conc = final_conc * 1e-9 * 1e-6 # mol/ul
final_vol = mol_per_pb / target_conc
print("final in ul:", final_vol)

0.0008271
3.659734513274336e-07
1.1029176739630274e-11
final in ul: 1102.9176739630275
The history saving thread hit an unexpected error (OperationalError('database or disk is full')).History will not be written to the database.


In [8]:
# marker-gene DNA TSS
vol = 9 * 1e3 # ul
conc = (165.8+191.3+174.5)/3 * 1e-9 # g/ul
num_pb = 3360
pb_len = 142-11
final_conc = 10 # nM
pb_mw = (pb_len * 303.7 + 79) # g/mol

mass_total = vol * conc
mass_per_pb = mass_total / num_pb
mol_per_pb = mass_per_pb / pb_mw
print(mass_total)
print(mass_per_pb)
print(mol_per_pb)
target_conc = final_conc * 1e-9 * 1e-6 # mol/ul
final_vol = mol_per_pb / target_conc
print("final in ul:", final_vol)

0.0015948000000000002
4.746428571428572e-07
1.1906643315669575e-11
final in ul: 1190.6643315669576
