In [18]:
import nupack
import pandas as pd
import os
from nupack import Strand, Complex, Model, mfe

def find_closest_toehold(rate_constant, file_path):
    
    # Load the dataset
    df = pd.read_csv(file_path, header=None)
    
    # Assign meaningful column names
    df.columns = ["Toehold Sequence", "Category", "Energy", "Rate Constant"]
    
    # Sort by absolute difference in rate constant
    df["Difference"] = (df["Rate Constant"] - rate_constant).abs()
    df = df.sort_values(by="Difference")
    
    return df

def check_secondary_structure(sequence):
    # Define the model parameters at 25°C
    model = Model(material='dna', celsius=25)
    
    # Create a strand and complex
    strand = Strand(sequence, name='input')
    complex = Complex([strand])
    
    # Compute the minimum free energy (MFE) structure
    mfe_result = mfe(complex, model=model)
    
    # Extract the structure
    mfe_structure = mfe_result[0].structure
    
    # Convert structure to dot-parentheses notation
    dot_parens = str(mfe_structure)
    
    # Check if a secondary structure forms
    return not (dot_parens == '.' * len(sequence))

# Use the relative file path (since it's inside Jupyter Notebook)
file_path = "mismatch_data03.csv"

user_input = float(input("Enter a rate constant: "))
df_sorted = find_closest_toehold(user_input, file_path)

if df_sorted is not None:
    for index, row in df_sorted.iterrows():
        toehold = row["Toehold Sequence"]
        matched_rate = row["Rate Constant"]
        
        if not check_secondary_structure(toehold):
            print(f"Selected Toehold Sequence: {toehold} (Matched rate constant: {matched_rate})")
            print("No secondary structure detected.")
            break
        else:
            print(f"Toehold {toehold} has secondary structure (Rate Constant: {matched_rate}). Trying next closest...")


Enter a rate constant:  3


Toehold ACTTATGAGC has secondary structure (Rate Constant: 2.985711261). Trying next closest...
Selected Toehold Sequence: CTCTAAGCGC (Matched rate constant: 2.92166856)
No secondary structure detected.
