In [2]:
from Bio.Seq import Seq
import pyperclip
# Original DNA sequence
original_sequence = "atgccgtccaaggcggagaatctgcggccctccgagccggccccgcagccgccggaagggaggacgctcca......agcaaagaccccaacgagaagcgcgatcacatggtcctgctggagttcgtgaccgccgccgggatcactctcggcatggacgagctgtacaag"

# Convert sequence to Biopython Seq object
seq = Seq(original_sequence)

# Complement the sequence
complement_seq = seq.complement()


# Convert the complement sequence to lowercase
complement_seq_lower = complement_seq.lower()

print(complement_seq_lower)
pyperclip.copy(str(complement_seq_lower))
print("Complemented sequence copied to clipboard.")

tacggcaggttccgcctcttagacgccgggaggctcggccggggcgtcggcggccttccctcctgcgaggt......tcgtttctggggttgctcttcgcgctagtgtaccaggacgacctcaagcactggcggcggccctagtgagagccgtacctgctcgacatgttc
Complemented sequence copied to clipboard.


In [491]:
from tabulate import tabulate

def print_primers_table(inserts):
    table_data = []

    for insert, data in inserts.items():
        fw_primer = data.get("FW_primer", ["Not found"])
        rv_primer = data.get("RV_primer", ["Not found"])
        table_data.append([insert, "", "Forward Primer", *fw_primer])
        table_data.append(["", "", "Reverse Primer", *rv_primer])
        table_data.append(["", "", "", "", ""])  # Empty row for visual separation

    headers = ["Insert", "", "Primer Type", "Sequence 5'->3'", "Tm", "GC Content", "Start Position"]
    print(tabulate(table_data, headers=headers, tablefmt="grid"))


In [492]:
from tabulate import tabulate
import pandas as pd
def primers_to_df(inserts, file_path,format="html"):
    headers = ["Insert", "", "Primer Type", "Sequence 5'->3'", "Tm", "GC Content", "Start Position"]
    table_data = []

    for insert, data in inserts.items():
        fw_primer = data.get("FW_primer", ["Not found"])
        rv_primer = data.get("RV_primer", ["Not found"])
        table_data.append([insert, "", "Forward Primer", *fw_primer])
        table_data.append(["", "", "Reverse Primer", *rv_primer])
    df = pd.DataFrame(table_data, columns=headers)
    df
    #with open(file_path, 'w') as file:
    #    file.write(tabulate(table_data, headers=headers, tablefmt=format))
    return df
# Example usage:
# save_primers_table_to_file(inserts, 'primers_table.txt')


In [493]:
def read_seqs(file_path):

    # Dictionary to store the sequences
    sequences = {}

    # Temporary variables for keys and values
    current_key = None
    current_sequence = Seq("")

    # Open and read the file line by line
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('#'):
                pass
            else:
                if line.startswith('>'):  # New sequence name detected
                    # Save the previous sequence if it exists
                    if current_key is not None:
                        
                        sequences[current_key] = {"Sequence":Seq("").join(current_sequence),"FW_primer":None,"RV_primer":None}
                    
                    # Reset the current sequence and set the new key
                    current_key = line[1:].strip()  # Remove '>' and trim any whitespace
                    current_sequence = []
                else:
                    # Add this line to the current sequence, removing unwanted characters like dots
                    current_sequence.append(line.strip())

    # Save the last sequence
    if current_key is not None:
        sequences[current_key] = {"Sequence":Seq("").join(current_sequence),"FW_primer":None,"RV_primer":None}

    # Print the result
    for key, value in sequences.items():
        if '*' in key:
            sequences[key]["Sequence"] = sequences[key]["Sequence"].reverse_complement()
          
    return sequences
inserts = read_seqs('/Users/quillan/Documents/Lab/Thesis/Random stuff/primerstuff/QF_Pkd2l1_Inpp5e.md')
for k,v in inserts.items():
    print(k,v)


Backbone {'Sequence': Seq('atatgattattacatttatcaaaagagttgatgaagttatacttggatgtagat...gat'), 'FW_primer': None, 'RV_primer': None}
*OPCM {'Sequence': Seq('ttacttgtacagctcgtccatgccgccggtggagtggcggccctcggcgcgttc...tag'), 'FW_primer': None, 'RV_primer': None}
PKD2L1 {'Sequence': Seq('tcatttcaccgaagccctctgtatcccaagataagacgacaccaaacaaatgtc...aca'), 'FW_primer': None, 'RV_primer': None}
INPP5E {'Sequence': Seq('atgccgtccaaggcggagaatctgcggccctccgagccggccccgcagccgccc...aag'), 'FW_primer': None, 'RV_primer': None}


In [494]:
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt
from Bio.SeqUtils import gc_fraction

def find_primer(sequence, max_primer_start_pos, window_size, max_primer_length, m_t, min_gc):
    """
    Find primers by widening a window then sliding it 
    """
    for i in range(max_primer_start_pos):
        
        for pl in range(window_size, max_primer_length):
            primer = sequence[i:i+pl]
            primer_tm = mt.Tm_GC(primer)
            gc_content = gc_fraction(primer)
            
            if primer_tm > m_t and gc_content > min_gc :
                
                if i != 0:
                    primer = sequence[0:i]+"-"+primer
                  
                    return  [primer, primer_tm, gc_content, i] 
                
                return  [primer, primer_tm, gc_content, i] # Exit the function when primer found
            

    return ["Not found","Not found","Not found"]
    


In [495]:
def get_primers(inserts):
    window_size = 10
    max_primer_length = 30
    max_primer_start_pos = 10
    min_gc = 0.5
    m_t = 55
    for insert,data in inserts.items():
        data["FW_primer"] = None
        data["RV_primer"] = None

        # fw primer
        gc_adj = 0
        m_t_adj = 0
        data["FW_primer"] = find_primer(data['Sequence'],max_primer_start_pos,window_size,max_primer_length,m_t,min_gc)
        
        while "Not found" in data["FW_primer"]:
            data["FW_primer"] = find_primer(data['Sequence'],max_primer_start_pos,window_size,max_primer_length,m_t-m_t_adj,min_gc-gc_adj)
            
            
            if min_gc-gc_adj > 0.1:
                gc_adj += 0.1
                
            if m_t-m_t_adj > 45:
                m_t_adj += 0.1
                
            elif data["FW_primer"][0] == "Not found":
                data["FW_primer"] = ["Not Found."]

        # rv primer
        gc_adj = 0
        m_t_adj = 0
        data["RV_primer"] = find_primer(data['Sequence'].reverse_complement(),max_primer_start_pos,window_size,max_primer_length,m_t,min_gc)
        while "Not found" in data["RV_primer"]:
            data["RV_primer"] = find_primer(data['Sequence'].reverse_complement(),max_primer_start_pos,window_size,max_primer_length,m_t-m_t_adj,min_gc-gc_adj)
                
            if min_gc-gc_adj > 0.1:
                gc_adj += 0.1
            if m_t-m_t_adj > 45:
                m_t_adj += 0.1
            elif data["RV_primer"][0] == "Not found":
                data["RV_primer"] = ["Not Found."]
    return inserts

In [542]:
homology_seq_length = 10
def add_homology(inserts,hom_length=10):
    hom_length+=1
    insert_names = list(inserts.keys())
    for i,name in enumerate(insert_names):
        
        if i == len(insert_names)-1:
            hom_seq = inserts[insert_names[0]]["Sequence"].complement()[:homology_seq_length]
            inserts[insert_names[i]]["RV_primer"][0] = f'{hom_seq}-{inserts[insert_names[i]]["RV_primer"][0]}'
        else:
            hom_seq = inserts[insert_names[i+1]]["Sequence"].complement()[:homology_seq_length]
            inserts[insert_names[i]]["RV_primer"][0] = f'{hom_seq}-{inserts[insert_names[i]]["RV_primer"][0]}'

        if i == 0:
            hom_seq = inserts[insert_names[-1]]["Sequence"][-homology_seq_length:]
            inserts[insert_names[i]]["FW_primer"][0] = f'{hom_seq}-{inserts[insert_names[i]]["FW_primer"][0]}'
        else:
            hom_seq = inserts[insert_names[i-1]]["Sequence"][-homology_seq_length:]
            inserts[insert_names[i]]["FW_primer"][0] = f'{hom_seq }-{inserts[insert_names[i]]["FW_primer"][0]}'
            
    return inserts

In [543]:
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt
from Bio.SeqUtils import gc_fraction
import pyperclip

# find primers
inserts = get_primers(inserts)

# append homology sequence overhang
inserts = add_homology(inserts)
df = primers_to_df(inserts,"Primers_SLIC.html")

html_df = df.to_html()


In [548]:
def export(inserts):
    insert_names = list(inserts.keys(),homology_seq_length)
    for id, name in enumerate(insert_names):

        # append primers to sequence for visualization purposes
        if id == 0:
            poi = insert_names[id]  # part of interest
            homology =  "".join(sorted([poi,insert_names[-1]])).replace("*","rev")
            inserts[poi]["FW_sequence_woverhang"]=f'<p><span class="{homology}">{inserts[poi]["FW_primer"][0].split("-")[0]}</span>{inserts[poi]["Sequence"]}{"-"*homology_seq_length}</p>'
        
        else:
            poi = insert_names[id]  # part of interest
            homology =  "".join(sorted([poi,insert_names[id-1]])).replace("*","rev")
            inserts[poi]["FW_sequence_woverhang"]=f'<p><span class="{homology}">{inserts[poi]["FW_primer"][0].split("-")[0]}</span>{inserts[poi]["Sequence"]}{"-"*homology_seq_length}</p>'
            
        if id == len(insert_names)-1:
            poi = insert_names[id]  # part of interest
            print(poi)
            homology =  "".join(sorted([poi,insert_names[0]])).replace("*","rev")
            inserts[poi]["RV_sequence_woverhang"]=f'<p>{"-"*homology_seq_length}{inserts[poi]["Sequence"].complement()}<span class="{homology}">{inserts[poi]["RV_primer"][0].split("-")[0]}</span></p>'
        else:
            poi = insert_names[id]  # part of interest
            homology =  "".join(sorted([poi,insert_names[id+1]])).replace("*","rev")
            inserts[poi]["RV_sequence_woverhang"]=f'<p>{"-"*homology_seq_length}{inserts[poi]["Sequence"].complement()}<span class="{homology}">{inserts[poi]["RV_primer"][0].split("-")[0]}</span></p>'

print(inserts["INPP5E"]["RV_sequence_woverhang"])


INPP5E
<p>----------tacggcaggttccgcctcttagacgccgggaggctcggccggggcgtcggcggga...taccaggacgacctcaagcactggcggcggccctagtgagagccgtacctgctcgacatgttc<span class="BackboneINPP5E">tatactaata</span></p>


In [549]:
js = """
<script>
// Define a function to generate a random color
function getRandomColor() {
  const letters = '0123456789ABCDEF';
  let color = '#';
  for (let i = 0; i < 6; i++) {
    color += letters[Math.floor(Math.random() * 16)];
  }
  return color;
}

// Get all elements in the DOM
const allElements = document.querySelectorAll('*');

// Create a Set to store unique class names
const uniqueClasses = new Set();

// Iterate through all elements to collect unique class names
allElements.forEach(element => {
  const classes = element.classList;
  classes.forEach(className => {
    if (className != 'dataframe'){
    uniqueClasses.add(className);
    }
  });
});

// Assign a random color to all elements sharing each class
uniqueClasses.forEach(className => {
  const elementsWithClass = document.querySelectorAll('.' + className);
  const randomColor = getRandomColor();
  elementsWithClass.forEach(element => {
    element.style.backgroundColor = randomColor;
  });
});
</script>
<style>
p {
    font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
    font-size: 14px;
    line-height: 1.5;
    padding: 10px;
    background-color: #f4f4f4;
    border: 1px solid #ddd;
    border-radius: 5px;
    overflow-x: auto; /* Enable horizontal scrolling if needed */
    white-space: pre-wrap; /* Preserve line breaks */
}
/* Reset default table styles */
table {
  border-collapse: collapse;
  width: 100%;
}

/* Table header styles */
thead {
  background-color: #f2f2f2 !important;
}

/* Table cell styles */
td, th {
  font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
  border: 1px solid #dddddd;
  text-align: left;
  padding: 8px;
}

/* Alternate row color */


/* Hover effect */
tbody tr:hover {
  background-color: #f2f2f2 !important;
}

/* Modern table style */
.modern-table {
  border-radius: 8px;
  overflow: hidden;
  box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);
}
h2{
font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
}



</style>
"""
Title="""
<h2>SLIC helper results</h2>
Below are the primers found in [homology-ext-binding] format. The Tm is for the binding sequence only
"""

In [552]:
outname = "output"
with open(outname+".html", 'w') as file:
        file.write(Title)
        file.write(html_df)
        for insert,data in inserts.items():
                file.write(f'<h2>{insert}</h2>')
                file.write(f'{data["FW_sequence_woverhang"]}')
                file.write(f'{data["RV_sequence_woverhang"]}')
        file.write(f'<h2>Final construct</h2>')
        file.write(f'<p>{str(finalconstruct)}</p>')     
        file.write(js)

In [551]:
# Create final construct
def create_final_construct(inserts):
    finalconstruct = Seq("")
    outname = ""
    for insert in inserts:
        outname += str(insert)
        finalconstruct += inserts[insert]["Sequence"]

    with open(outname+".txt", 'w') as file:
            file.write(str(finalconstruct))