<a href="https://colab.research.google.com/github/jjjung99/SD-design/blob/main/143%2C360%EC%83%98%ED%94%8C_%EC%B5%9C%EC%A2%85.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip -q install viennarna


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os, csv, time
from itertools import product
from multiprocessing import Pool, cpu_count
import RNA
import matplotlib.pyplot as plt

# ===============================
# Fixed sequences (unchanged)
# ===============================
UTR_DNA = "ATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCC"
AU_RICH_DNA = "TTAATTAA"
AUG_DNA = "ATG"

EMPTY_ORF_PREFIX = (
    "CTGCTGGGTGAGCTTTCTCCGTAAACTTAAAGGAAAAGATTCCGTTGAAAGATT"
    "CAAAGCTATCGTTCAGCGTATACAAGAGACTTCCTCCTGAGACTCGTGTTCCC"
)

WT_SD_DNA = "AGGAGG"
WT_ASD_CORE_DNA = "CTCCTT"

# ===============================
# Utilities
# ===============================
def dna_to_rna(seq): return seq.replace("T", "U")
def revcomp_dna(seq):
    comp = {"A":"T","T":"A","C":"G","G":"C"}
    return "".join(comp[b] for b in seq[::-1])

def gc_count(seq): return sum(b in "GC" for b in seq)

def duplex_dG(a,b): return float(RNA.duplexfold(a,b).energy)

def fold_window(sd, spacer):
    dna = UTR_DNA + AU_RICH_DNA + sd + spacer + AUG_DNA + EMPTY_ORF_PREFIX
    rna = dna_to_rna(dna)
    struct, mfe = RNA.fold(rna)
    return struct, mfe

# ===============================
# SD / spacer generation
# ===============================
def generate_sd():
    bases = ("A","T","G","C")
    out=[]
    for s in product(bases, repeat=6):
        s="".join(s)
        g=gc_count(s)
        if g in (3,4):
            out.append(s)
    return out  # 2240

def generate_spacer():
    return ["".join(p) for p in product(("A","T"), repeat=6)]  # 64

SDS = generate_sd()
SPACERS = generate_spacer()

TOTAL = len(SDS)*len(SPACERS)
print(f"Total SD–spacer combinations: {TOTAL:,}")

# ===============================
# Worker (index-based)
# ===============================
WT_SD_RNA = dna_to_rna(WT_SD_DNA)
WT_ASD_CORE_RNA = dna_to_rna(WT_ASD_CORE_DNA)

def evaluate(i):
    sd = SDS[i//len(SPACERS)]
    sp = SPACERS[i%len(SPACERS)]

    osd_rna = dna_to_rna(sd)
    oasd_rna = dna_to_rna(revcomp_dna(sd))

    dG_main = duplex_dG(osd_rna, oasd_rna)
    dG_sd_wt = duplex_dG(osd_rna, WT_ASD_CORE_RNA)
    dG_wt_sd = duplex_dG(WT_SD_RNA, oasd_rna)

    pass_main = -10 <= dG_main <= -8.5
    pass_strict = pass_main and dG_sd_wt>0 and dG_wt_sd>0
    neg_cross = int(dG_sd_wt<0) + int(dG_wt_sd<0)

    struct, mfe = fold_window(sd, sp)

    return [
        sd, sp, revcomp_dna(sd),
        dG_main, dG_sd_wt, dG_wt_sd,
        pass_main, pass_strict, neg_cross,
        mfe, struct
    ]

# ===============================
# Run
# ===============================
OUTDIR="/content/final_SD_design"
os.makedirs(OUTDIR, exist_ok=True)
csv_path=f"{OUTDIR}/SD_design_full.csv"

header=[
    "SD","Spacer","ASD(comp)",
    "dG_SD_ASD","dG_SD_WTASD","dG_WTSD_ASD",
    "pass_main","pass_strict","neg_cross",
    "mfe","structure"
]

t0=time.time()
with open(csv_path,"w",newline="") as f:
    w=csv.writer(f); w.writerow(header)
    with Pool(max(1,cpu_count()-1)) as p:
        for k,row in enumerate(p.imap_unordered(evaluate, range(TOTAL), chunksize=2000),1):
            w.writerow(row)
            if k%5000==0:
                print(f"[PROGRESS] {k}/{TOTAL}  {(time.time()-t0)/60:.1f} min")

print("Finished. CSV saved:", csv_path)


Total SD–spacer combinations: 143,360
[PROGRESS] 5000/143360  3.5 min
[PROGRESS] 10000/143360  5.8 min
[PROGRESS] 15000/143360  9.3 min
[PROGRESS] 20000/143360  11.6 min
[PROGRESS] 25000/143360  15.1 min
[PROGRESS] 30000/143360  17.3 min
[PROGRESS] 35000/143360  20.7 min
[PROGRESS] 40000/143360  22.9 min
[PROGRESS] 45000/143360  26.4 min
[PROGRESS] 50000/143360  28.6 min
[PROGRESS] 55000/143360  32.0 min
[PROGRESS] 60000/143360  34.3 min
[PROGRESS] 65000/143360  37.7 min
[PROGRESS] 70000/143360  40.0 min
[PROGRESS] 75000/143360  43.4 min
[PROGRESS] 80000/143360  45.7 min
[PROGRESS] 85000/143360  49.2 min
[PROGRESS] 90000/143360  51.5 min
[PROGRESS] 95000/143360  55.0 min
[PROGRESS] 100000/143360  57.3 min
[PROGRESS] 105000/143360  60.8 min
[PROGRESS] 110000/143360  63.1 min
[PROGRESS] 115000/143360  66.5 min
[PROGRESS] 120000/143360  68.9 min
[PROGRESS] 125000/143360  72.4 min
[PROGRESS] 130000/143360  74.7 min
[PROGRESS] 135000/143360  78.1 min
[PROGRESS] 140000/143360  80.3 min
Finis

In [3]:
import pandas as pd
df = pd.read_csv("/content/final_SD_design/SD_design_full.csv")
df[df["pass_main"] == True].shape


(14144, 11)

In [4]:
df[df["pass_strict"] == True].shape


(4928, 11)