In [1]:
# Import the RAP probe design module
import rpd

In [2]:
# Make 83mer probes based on a FASTA in the current directory
mujpx, filename, name = rpd.rap_probes('MuJpx.fasta.txt', 'MuJpx', probe_length=83)

print(mujpx.iloc[:5,:])

            Name                                           Sequence  \
0  MuJpx_83mer_1  CAAGTCACACGATTTGAGAATTTATTTTCACAGAGATGCAATCATC...   
1  MuJpx_83mer_2  CAAGTCAAATTGACCGGATTTCAAAACAAAACAAAACACTCATTCA...   
2  MuJpx_83mer_3  CAAGTCAAAAGAAACTTTATATGACATCTATGGAGAAAAAAAAAGG...   
3  MuJpx_83mer_4  CAAGTCAAGTTCTAGCTGGTAAATCTGGACTTCTCCACATGCATCA...   
4  MuJpx_83mer_5  CAAGTCAACTAGACATTTCAGGACCAGTTTTCCTATGGTCAGAGGA...   

   Length (bp)  Tm (°C) Nanomoles  
0           90     83.2      25nm  
1           90     78.6      25nm  
2           90     82.8      25nm  
3           90     85.4      25nm  
4           90     82.2      25nm  


In [3]:
# Use the resulting "fasta-input" file to blat all the probes
# Manually assemble a list of the ones that blatted poorly 
# (>25bp match elsewhere in genome)

bad_probes = [1,10,11,12,13,14,16,17,2,20,
              23,5,9,27,28,29,30,31,32,33,
              34,35,36,37,40]

mujpx = rpd.probe_filter(bad_probes, mujpx, name)
print(mujpx.iloc[:5,:])

            Name                                           Sequence  \
2  MuJpx_83mer_3  CAAGTCAAAAGAAACTTTATATGACATCTATGGAGAAAAAAAAAGG...   
3  MuJpx_83mer_4  CAAGTCAAGTTCTAGCTGGTAAATCTGGACTTCTCCACATGCATCA...   
5  MuJpx_83mer_6  CAAGTCAAGTCCTCCGTGTATTCATGTTTATTTTCATAAATAACAC...   
6  MuJpx_83mer_7  CAAGTCAATATACATGTATTTTCTAGGTTGCTGCTTAGTCCAGAAG...   
7  MuJpx_83mer_8  CAAGTCAACCTTCTAAATCTGTTTTTCAACAGGAAGTACCCAGGCT...   

   Length (bp)  Tm (°C) Nanomoles  
2           90     82.8      25nm  
3           90     85.4      25nm  
5           90     82.7      25nm  
6           90     84.0      25nm  
7           90     85.1      25nm  


In [4]:
# Use the resulting 'blat-filtered_fasta-input' file to 
# run Repeat Masker and Tandem Repeat Finder
# Manually assemble a list of probes containing repeats
# Jpx probes contained no repeats, but this would be the next step

bad_probes = []

mujpx = rpd.probe_filter(bad_probes, mujpx, name, filter_type = 'repeats')

print(mujpx.iloc[:5,:])

            Name                                           Sequence  \
2  MuJpx_83mer_3  CAAGTCAAAAGAAACTTTATATGACATCTATGGAGAAAAAAAAAGG...   
3  MuJpx_83mer_4  CAAGTCAAGTTCTAGCTGGTAAATCTGGACTTCTCCACATGCATCA...   
5  MuJpx_83mer_6  CAAGTCAAGTCCTCCGTGTATTCATGTTTATTTTCATAAATAACAC...   
6  MuJpx_83mer_7  CAAGTCAATATACATGTATTTTCTAGGTTGCTGCTTAGTCCAGAAG...   
7  MuJpx_83mer_8  CAAGTCAACCTTCTAAATCTGTTTTTCAACAGGAAGTACCCAGGCT...   

   Length (bp)  Tm (°C) Nanomoles  
2           90     82.8      25nm  
3           90     85.4      25nm  
5           90     82.7      25nm  
6           90     84.0      25nm  
7           90     85.1      25nm  


In [5]:
# Reindex the probes so that they are not out of order
mujpx = rpd.reindex(mujpx, name)

print(mujpx.iloc[:5,:])

            Name                                           Sequence  \
2  MuJpx_83mer_1  CAAGTCAAAAGAAACTTTATATGACATCTATGGAGAAAAAAAAAGG...   
3  MuJpx_83mer_2  CAAGTCAAGTTCTAGCTGGTAAATCTGGACTTCTCCACATGCATCA...   
5  MuJpx_83mer_3  CAAGTCAAGTCCTCCGTGTATTCATGTTTATTTTCATAAATAACAC...   
6  MuJpx_83mer_4  CAAGTCAATATACATGTATTTTCTAGGTTGCTGCTTAGTCCAGAAG...   
7  MuJpx_83mer_5  CAAGTCAACCTTCTAAATCTGTTTTTCAACAGGAAGTACCCAGGCT...   

   Length (bp)  Tm (°C) Nanomoles  
2           90     82.8      25nm  
3           90     85.4      25nm  
5           90     82.7      25nm  
6           90     84.0      25nm  
7           90     85.1      25nm  


In [6]:
# Update the final probe list
mujpx.to_csv(filename)