In [4]:
# Imports
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt
import pandas as pd

In [1]:
def rap_probes(file, gene_name, probe_length = 50, coverage  = 1, adaptor = 'CAAGTCA', nmol = '25nm'):
    '''Takes a FASTA imported as a Pandas DataFrame and generates 
    RNA affinity purification probes based on given parameters'''
    # Necessary imports:
    # from Bio.Seq import Seq
    # import pandas as pd
    # from Bio.SeqUtils import MeltingTemp as mt
    
    # Import the FASTA as a Pandas Dataframe
    seq = pd.read_csv(file)
    
    # Reformat the FASTA as a Biopython sequence object and generate reverse complement
    ls = [seq.iloc[i,0] for i in range(len(seq))]
    ls = Seq(''.join(ls))
    ls = ls.complement()
    
    # Generate start indices for each probe
    ind2 = [int(i*(probe_length/coverage)) for i in range(0,int(len(ls)/probe_length*coverage))]
    
    # Add a last probe if more than a quarter probe remains
    if ind2[-1] < int(len(ls) - probe_length/4):
        ind2.append(len(ls)-probe_length)
    
    # Generate the probes
    probes = [ls[i:i+probe_length] for i in ind2]
    
    # Add adaptor to 5' end of probes
    probes = [str(adaptor + i) for i in probes]
    
    # Double check probe lengths
    lengths = [len(i) for i in probes]
    
    # Calculate Tms for probes based on hybridization buffer
    melts = [mt.Tm_NN(i, Na = 500, Tris = 10) for i in probes]
    melts = [round(i,1) for i in melts]
    
    # Make appropriate labels for each probe
    labels = [gene_name + '_' + str(i) for i in range(1, len(probes)+ 1)]
    
    # Add nanomoles to order from IDT
    idt_bulk = [nmol for i in range(len(probes))]
    
    # Put everything in a DataFrame
    probe_complete = pd.DataFrame({
        'Name':labels,
        'Sequence':probes,
        'Length (bp)': lengths,
        'Tm (°C)':melts,
        'Nanomoles':idt_bulk
    })
    
    # Name your file
    name = gene_name + '_Probes.csv'
    
    probe_complete.to_csv(name)
    
    return probe_complete

In [6]:
fastas = ['MuTsix.fasta.txt', 'MuXist_long.fasta.txt', 'MuXist_short.fasta.txt', 'U1.txt']
names = ['MuTsix', 'MuXist_long', 'MuXist_short','MuU1']

In [7]:
for i in range(len(fastas)):
    rap_probes(fastas[i], names[i])